diff --git a/Core/MIPS/ARM64/Arm64IRJit.cpp b/Core/MIPS/ARM64/Arm64IRJit.cpp
index 26d9913bdc74..420f7443ca45 100644
--- a/Core/MIPS/ARM64/Arm64IRJit.cpp
+++ b/Core/MIPS/ARM64/Arm64IRJit.cpp
@@ -65,10 +65,11 @@ static void NoBlockExits() {
 	_assert_msg_(false, "Never exited block, invalid IR?");
 }
 
-bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
+bool Arm64JitBackend::CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) {
 	if (GetSpaceLeft() < 0x800)
 		return false;
 
+	IRBlock *block = irBlockCache->GetBlock(block_num);
 	BeginWrite(std::min(GetSpaceLeft(), (size_t)block->GetNumInstructions() * 32));
 
 	u32 startPC = block->GetOriginalStart();
@@ -92,12 +93,13 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
 	compilingBlockNum_ = block_num;
 	lastConstPC_ = 0;
 
-	regs_.Start(block);
+	regs_.Start(irBlockCache, block_num);
 
 	std::vector<const u8 *> addresses;
 	addresses.reserve(block->GetNumInstructions());
+	const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);
 	for (int i = 0; i < block->GetNumInstructions(); ++i) {
-		const IRInst &inst = block->GetInstructions()[i];
+		const IRInst &inst = instructions[i];
 		regs_.SetIRIndex(i);
 		addresses.push_back(GetCodePtr());
 
@@ -156,10 +158,11 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
 			addressesLookup[addresses[i]] = i;
 
 		INFO_LOG(JIT, "=============== ARM64 (%08x, %d bytes) ===============", startPC, len);
+		const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);
 		for (const u8 *p = blockStart; p < GetCodePointer(); ) {
 			auto it = addressesLookup.find(p);
 			if (it != addressesLookup.end()) {
-				const IRInst &inst = block->GetInstructions()[it->second];
+				const IRInst &inst = instructions[it->second];
 
 				char temp[512];
 				DisassembleIR(temp, sizeof(temp), inst);
@@ -319,7 +322,8 @@ void Arm64JitBackend::ClearAllBlocks() {
 	EraseAllLinks(-1);
 }
 
-void Arm64JitBackend::InvalidateBlock(IRBlock *block, int block_num) {
+void Arm64JitBackend::InvalidateBlock(IRBlockCache *irBlockCache, int block_num) {
+	IRBlock *block = irBlockCache->GetBlock(block_num);
 	int offset = block->GetTargetOffset();
 	u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset;
 
diff --git a/Core/MIPS/ARM64/Arm64IRJit.h b/Core/MIPS/ARM64/Arm64IRJit.h
index 055e525565f8..c2992f3520b7 100644
--- a/Core/MIPS/ARM64/Arm64IRJit.h
+++ b/Core/MIPS/ARM64/Arm64IRJit.h
@@ -40,9 +40,9 @@ class Arm64JitBackend : public Arm64Gen::ARM64CodeBlock, public IRNativeBackend
 	bool DescribeCodePtr(const u8 *ptr, std::string &name) const override;
 
 	void GenerateFixedCode(MIPSState *mipsState) override;
-	bool CompileBlock(IRBlock *block, int block_num, bool preload) override;
+	bool CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) override;
 	void ClearAllBlocks() override;
-	void InvalidateBlock(IRBlock *block, int block_num) override;
+	void InvalidateBlock(IRBlockCache *irBlockCache, int block_num) override;
 
 	void UpdateFCR31(MIPSState *mipsState) override;
 
diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp
index 191ab8b2f7f8..7a1453f2f5cb 100644
--- a/Core/MIPS/IR/IRInst.cpp
+++ b/Core/MIPS/IR/IRInst.cpp
@@ -184,6 +184,8 @@ static const IRMeta irMeta[] = {
 const IRMeta *metaIndex[256];
 
 void InitIR() {
+	if (metaIndex[0])
+		return;
 	for (size_t i = 0; i < ARRAY_SIZE(irMeta); i++) {
 		metaIndex[(int)irMeta[i].op] = &irMeta[i];
 	}
diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp
index 6815c941ed25..f962e9518020 100644
--- a/Core/MIPS/IR/IRJit.cpp
+++ b/Core/MIPS/IR/IRJit.cpp
@@ -46,7 +46,6 @@ namespace MIPSComp {
 
 IRJit::IRJit(MIPSState *mipsState) : frontend_(mipsState->HasDefaultPrefix()), mips_(mipsState) {
 	// u32 size = 128 * 1024;
-	// blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline");
 	InitIR();
 
 	jo.optimizeForInterpreter = true;
@@ -91,7 +90,7 @@ void IRJit::InvalidateCacheAt(u32 em_address, int length) {
 	std::vector<int> numbers = blocks_.FindInvalidatedBlockNumbers(em_address, length);
 	for (int block_num : numbers) {
 		auto block = blocks_.GetBlock(block_num);
-		int cookie = block->GetTargetOffset() < 0 ? block_num : block->GetTargetOffset();
+		int cookie = block->GetTargetOffset() < 0 ? block->GetInstructionOffset() : block->GetTargetOffset();
 		block->Destroy(cookie);
 	}
 }
@@ -103,13 +102,13 @@ void IRJit::Compile(u32 em_address) {
 		// Look to see if we've preloaded this block.
 		int block_num = blocks_.FindPreloadBlock(em_address);
 		if (block_num != -1) {
-			IRBlock *b = blocks_.GetBlock(block_num);
+			IRBlock *block = blocks_.GetBlock(block_num);
 			// Okay, let's link and finalize the block now.
-			int cookie = b->GetTargetOffset() < 0 ? block_num : b->GetTargetOffset();
-			b->Finalize(cookie);
-			if (b->IsValid()) {
+			int cookie = block->GetTargetOffset() < 0 ? block->GetInstructionOffset() : block->GetTargetOffset();
+			block->Finalize(cookie);
+			if (block->IsValid()) {
 				// Success, we're done.
-				FinalizeTargetBlock(b, block_num);
+				FinalizeTargetBlock(&blocks_, block_num);
 				return;
 			}
 		}
@@ -139,27 +138,25 @@ bool IRJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32
 		return preload;
 	}
 
-	int block_num = blocks_.AllocateBlock(em_address);
+	int block_num = blocks_.AllocateBlock(em_address, mipsBytes, instructions);
 	if ((block_num & ~MIPS_EMUHACK_VALUE_MASK) != 0) {
+		WARN_LOG(JIT, "Failed to allocate block for %08x (%d instructions)", em_address, (int)instructions.size());
 		// Out of block numbers.  Caller will handle.
 		return false;
 	}
 
 	IRBlock *b = blocks_.GetBlock(block_num);
-	b->SetInstructions(instructions);
-	b->SetOriginalAddrSize(em_address, mipsBytes);
 	if (preload) {
 		// Hash, then only update page stats, don't link yet.
 		// TODO: Should we always hash?  Then we can reuse blocks.
 		b->UpdateHash();
 	}
-	if (!CompileTargetBlock(b, block_num, preload))
+	if (!CompileTargetBlock(&blocks_, block_num, preload))
 		return false;
 	// Overwrites the first instruction, and also updates stats.
 	blocks_.FinalizeBlock(block_num, preload);
 	if (!preload)
-		FinalizeTargetBlock(b, block_num);
-
+		FinalizeTargetBlock(&blocks_, block_num);
 	return true;
 }
 
@@ -257,20 +254,21 @@ void IRJit::RunLoopUntil(u64 globalticks) {
 			u32 inst = Memory::ReadUnchecked_U32(mips->pc);
 			u32 opcode = inst & 0xFF000000;
 			if (opcode == MIPS_EMUHACK_OPCODE) {
-				IRBlock *block = blocks_.GetBlockUnchecked(inst & 0xFFFFFF);
-
+				u32 offset = inst & 0x00FFFFFF; // Alternatively, inst - opcode
 #ifdef IR_PROFILING
-				{
-					TimeSpan span;
-					mips->pc = IRInterpret(mips, block->GetInstructions());
-					block->profileStats_.executions += 1;
-					block->profileStats_.totalNanos += span.ElapsedNanos();
-				}
+				IRBlock *block = blocks_.GetBlock(blocks_.GetBlockNumFromOffset(offset));
+				TimeSpan span;
+				mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset);
+				int64_t elapsedNanos = span.ElapsedNanos();
+				block->profileStats_.executions += 1;
+				block->profileStats_.totalNanos += elapsedNanos;
 #else
-				mips->pc = IRInterpret(mips, block->GetInstructions());
+				mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset);
 #endif
 				// Note: this will "jump to zero" on a badly constructed block missing exits.
 				if (!Memory::IsValid4AlignedAddress(mips->pc)) {
+					int blockNum = blocks_.GetBlockNumFromOffset(offset);
+					IRBlock *block = blocks_.GetBlockUnchecked(blockNum);
 					Core_ExecException(mips->pc, block->GetOriginalStart(), ExecExceptionType::JUMP);
 					break;
 				}
@@ -300,11 +298,68 @@ void IRJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
 
 void IRBlockCache::Clear() {
 	for (int i = 0; i < (int)blocks_.size(); ++i) {
-		int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
+		int cookie = blocks_[i].GetTargetOffset() < 0 ? blocks_[i].GetInstructionOffset() : blocks_[i].GetTargetOffset();
 		blocks_[i].Destroy(cookie);
 	}
 	blocks_.clear();
 	byPage_.clear();
+	arena_.clear();
+	arena_.shrink_to_fit();
+}
+
+IRBlockCache::IRBlockCache() {
+	// For whatever reason, this makes things go slower?? Probably just a CPU cache alignment fluke.
+	// arena_.reserve(1024 * 1024 * 2);
+}
+
+int IRBlockCache::AllocateBlock(int emAddr, u32 origSize, const std::vector<IRInst> &inst) {
+	// We have 24 bits to represent offsets with.
+	const u32 MAX_ARENA_SIZE = 0x1000000 - 1;
+	int offset = (int)arena_.size();
+	if (offset >= MAX_ARENA_SIZE) {
+		WARN_LOG(JIT, "Filled JIT arena, restarting");
+		return -1;
+	}
+	for (int i = 0; i < inst.size(); i++) {
+		arena_.push_back(inst[i]);
+	}
+	blocks_.push_back(IRBlock(emAddr, origSize, offset, (u16)inst.size()));
+	return (int)blocks_.size() - 1;
+}
+
+int IRBlockCache::GetBlockNumFromOffset(int offset) const {
+	// Block offsets are always in rising order (we don't go back and replace them when invalidated). So we can binary search.
+	int low = 0;
+	int high = (int)blocks_.size() - 1;
+	int found = -1;
+	while (low <= high) {
+		int mid = low + (high - low) / 2;
+		const int blockOffset = blocks_[mid].GetInstructionOffset();
+		if (blockOffset == offset) {
+			found = mid;
+			break;
+		}
+		if (blockOffset < offset) {
+			low = mid + 1;
+		} else {
+			high = mid - 1;
+		}
+	}
+
+#ifndef _DEBUG
+	// Then, in debug builds, cross check the result.
+	return found;
+#else
+	// TODO: Optimize if we need to call this often.
+	for (int i = 0; i < (int)blocks_.size(); i++) {
+		if (blocks_[i].GetInstructionOffset() == offset) {
+			_dbg_assert_(i == found);
+			return i;
+		}
+	}
+#endif
+	_dbg_assert_(found == -1);
+	return -1;
 }
 
 std::vector<int> IRBlockCache::FindInvalidatedBlockNumbers(u32 address, u32 length) {
@@ -331,7 +386,7 @@ std::vector<int> IRBlockCache::FindInvalidatedBlockNumbers(u32 address, u32 leng
 
 void IRBlockCache::FinalizeBlock(int i, bool preload) {
 	if (!preload) {
-		int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
+		int cookie = blocks_[i].GetTargetOffset() < 0 ? blocks_[i].GetInstructionOffset() : blocks_[i].GetTargetOffset();
 		blocks_[i].Finalize(cookie);
 	}
 
@@ -372,16 +427,18 @@ int IRBlockCache::FindPreloadBlock(u32 em_address) {
 int IRBlockCache::FindByCookie(int cookie) {
 	if (blocks_.empty())
 		return -1;
+
 	// TODO: Maybe a flag to determine target offset mode?
 	if (blocks_[0].GetTargetOffset() < 0)
-		return cookie;
+		return GetBlockNumFromOffset(cookie);
 
+	// TODO: Now that we are using offsets in pure IR mode too, we can probably unify
+	// the two paradigms. Or actually no, we still need two offsets..
 	for (int i = 0; i < GetNumBlocks(); ++i) {
 		int offset = blocks_[i].GetTargetOffset();
 		if (offset == cookie)
 			return i;
 	}
-
 	return -1;
 }
 
@@ -391,7 +448,7 @@ std::vector<u32> IRBlockCache::SaveAndClearEmuHackOps() {
 
 	for (int number = 0; number < (int)blocks_.size(); ++number) {
 		IRBlock &b = blocks_[number];
-		int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset();
+		int cookie = b.GetTargetOffset() < 0 ? b.GetInstructionOffset() : b.GetTargetOffset();
 		if (b.IsValid() && b.RestoreOriginalFirstOp(cookie)) {
 			result[number] = number;
 		} else {
@@ -412,7 +469,7 @@ void IRBlockCache::RestoreSavedEmuHackOps(const std::vector<u32> &saved) {
 		IRBlock &b = blocks_[number];
 		// Only if we restored it, write it back.
 		if (b.IsValid() && saved[number] != 0 && b.HasOriginalFirstOp()) {
-			int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset();
+			int cookie = b.GetTargetOffset() < 0 ? b.GetInstructionOffset() : b.GetTargetOffset();
 			b.Finalize(cookie);
 		}
 	}
@@ -434,8 +491,9 @@ JitBlockDebugInfo IRBlockCache::GetBlockDebugInfo(int blockNum) const {
 	}
 
 	debugInfo.irDisasm.reserve(ir.GetNumInstructions());
+	const IRInst *instructions = GetBlockInstructionPtr(ir);
 	for (int i = 0; i < ir.GetNumInstructions(); i++) {
-		IRInst inst = ir.GetInstructions()[i];
+		IRInst inst = instructions[i];
 		char buffer[256];
 		DisassembleIR(buffer, sizeof(buffer), inst);
 		debugInfo.irDisasm.push_back(buffer);
@@ -448,10 +506,9 @@ void IRBlockCache::ComputeStats(BlockCacheStats &bcStats) const {
 	double maxBloat = 0.0;
 	double minBloat = 1000000000.0;
 	for (const auto &b : blocks_) {
-		double codeSize = (double)b.GetNumInstructions() * sizeof(IRInst);
+		double codeSize = (double)b.GetNumInstructions() * 4;  // We count bloat in instructions, not bytes. sizeof(IRInst);
 		if (codeSize == 0)
 			continue;
-
 		u32 origAddr, mipsBytes;
 		b.GetRange(origAddr, mipsBytes);
 		double origSize = (double)mipsBytes;
diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h
index 96c6a4d9a63c..81d97925c7ae 100644
--- a/Core/MIPS/IR/IRJit.h
+++ b/Core/MIPS/IR/IRJit.h
@@ -33,48 +33,43 @@
 #include "stddef.h"
 #endif
 
+// Very expensive, time-profiles every block.
+// Not to be released with this enabled.
+//
 // #define IR_PROFILING
 
+// Try to catch obvious misses of be above rule.
+#if defined(IR_PROFILING) && defined(GOLD)
+#error
+#endif
+
 namespace MIPSComp {
 
 // TODO : Use arena allocators. For now let's just malloc.
 class IRBlock {
 public:
 	IRBlock() {}
-	IRBlock(u32 emAddr) : origAddr_(emAddr) {}
+	IRBlock(u32 emAddr, u32 origSize, int instOffset, u16 numInstructions)
+		: origAddr_(emAddr), origSize_(origSize), instOffset_(instOffset), numInstructions_(numInstructions) {}
 	IRBlock(IRBlock &&b) {
-		instr_ = b.instr_;
+		instOffset_ = b.instOffset_;
 		hash_ = b.hash_;
 		origAddr_ = b.origAddr_;
 		origSize_ = b.origSize_;
 		origFirstOpcode_ = b.origFirstOpcode_;
 		targetOffset_ = b.targetOffset_;
 		numInstructions_ = b.numInstructions_;
-		b.instr_ = nullptr;
-	}
-
-	~IRBlock() {
-		delete[] instr_;
+		b.instOffset_ = 0xFFFFFFFF;
 	}
 
-	void SetInstructions(const std::vector<IRInst> &inst) {
-		instr_ = new IRInst[inst.size()];
-		numInstructions_ = (u16)inst.size();
-		if (!inst.empty()) {
-			memcpy(instr_, &inst[0], sizeof(IRInst) * inst.size());
-		}
-	}
+	~IRBlock() {}
 
-	const IRInst *GetInstructions() const { return instr_; }
+	u32 GetInstructionOffset() const { return instOffset_; }
 	int GetNumInstructions() const { return numInstructions_; }
 	MIPSOpcode GetOriginalFirstOp() const { return origFirstOpcode_; }
 	bool HasOriginalFirstOp() const;
 	bool RestoreOriginalFirstOp(int number);
 	bool IsValid() const { return origAddr_ != 0 && origFirstOpcode_.encoding != 0x68FFFFFF; }
-	void SetOriginalAddrSize(u32 address, u32 size) {
-		origAddr_ = address;
-		origSize_ = size;
-	}
 	void SetTargetOffset(int offset) {
 		targetOffset_ = offset;
 	}
@@ -107,7 +102,9 @@ class IRBlock {
 private:
 	u64 CalculateHash() const;
 
-	IRInst *instr_ = nullptr;
+	// Offset into the block cache's Arena
+	// TODO: These should maybe be stored in a separate array.
+	u32 instOffset_ = 0;
 	u64 hash_ = 0;
 	u32 origAddr_ = 0;
 	u32 origSize_ = 0;
@@ -118,15 +115,12 @@ class IRBlock {
 
 class IRBlockCache : public JitBlockCacheDebugInterface {
 public:
-	IRBlockCache() {}
+	IRBlockCache();
 	void Clear();
 	std::vector<int> FindInvalidatedBlockNumbers(u32 address, u32 length);
 	void FinalizeBlock(int blockNum, bool preload = false);
 	int GetNumBlocks() const override { return (int)blocks_.size(); }
-	int AllocateBlock(int emAddr) {
-		blocks_.push_back(IRBlock(emAddr));
-		return (int)blocks_.size() - 1;
-	}
+	int AllocateBlock(int emAddr, u32 origSize, const std::vector<IRInst> &inst);
 	IRBlock *GetBlock(int blockNum) {
 		if (blockNum >= 0 && blockNum < (int)blocks_.size()) {
 			return &blocks_[blockNum];
@@ -134,6 +128,16 @@ class IRBlockCache : public JitBlockCacheDebugInterface {
 			return nullptr;
 		}
 	}
+	int GetBlockNumFromOffset(int offset) const;
+	const IRInst *GetBlockInstructionPtr(const IRBlock &block) const {
+		return arena_.data() + block.GetInstructionOffset();
+	}
+	const IRInst *GetBlockInstructionPtr(int blockNum) const {
+		return arena_.data() + blocks_[blockNum].GetInstructionOffset();
+	}
+	const IRInst *GetArenaPtr() const {
+		return arena_.data();
+	}
 	bool IsValidBlock(int blockNum) const override {
 		return blockNum >= 0 && blockNum < (int)blocks_.size() && blocks_[blockNum].IsValid();
 	}
@@ -185,6 +189,7 @@ class IRBlockCache : public JitBlockCacheDebugInterface {
 	u32 AddressToPage(u32 addr) const;
 
 	std::vector<IRBlock> blocks_;
+	std::vector<IRInst> arena_;
 	std::unordered_map<u32, std::vector<int>> byPage_;
 };
 
@@ -227,8 +232,8 @@ class IRJit : public JitInterface {
 
 protected:
 	bool CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes, bool preload);
-	virtual bool CompileTargetBlock(IRBlock *block, int block_num, bool preload) { return true; }
-	virtual void FinalizeTargetBlock(IRBlock *block, int block_num) {}
+	virtual bool CompileTargetBlock(IRBlockCache *irBlockCache, int block_num, bool preload) { return true; }
+	virtual void FinalizeTargetBlock(IRBlockCache *irBlockCache, int block_num) {}
 
 	JitOptions jo;
 
diff --git a/Core/MIPS/IR/IRNativeCommon.cpp b/Core/MIPS/IR/IRNativeCommon.cpp
index c3253230bce9..319495f3437e 100644
--- a/Core/MIPS/IR/IRNativeCommon.cpp
+++ b/Core/MIPS/IR/IRNativeCommon.cpp
@@ -506,12 +506,12 @@ void IRNativeJit::Init(IRNativeBackend &backend) {
 	}
 }
 
-bool IRNativeJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) {
-	return backend_->CompileBlock(block, block_num, preload);
+bool IRNativeJit::CompileTargetBlock(IRBlockCache *irblockCache, int block_num, bool preload) {
+	return backend_->CompileBlock(irblockCache, block_num, preload);
 }
 
-void IRNativeJit::FinalizeTargetBlock(IRBlock *block, int block_num) {
-	backend_->FinalizeBlock(block, block_num, jo);
+void IRNativeJit::FinalizeTargetBlock(IRBlockCache *irblockCache, int block_num) {
+	backend_->FinalizeBlock(irblockCache, block_num, jo);
 }
 
 void IRNativeJit::RunLoopUntil(u64 globalticks) {
@@ -532,7 +532,7 @@ void IRNativeJit::InvalidateCacheAt(u32 em_address, int length) {
 	std::vector<int> numbers = blocks_.FindInvalidatedBlockNumbers(em_address, length);
 	for (int block_num : numbers) {
 		auto block = blocks_.GetBlock(block_num);
-		backend_->InvalidateBlock(block, block_num);
+		backend_->InvalidateBlock(&blocks_, block_num);
 		block->Destroy(block->GetTargetOffset());
 	}
 }
@@ -645,7 +645,8 @@ int IRNativeBackend::OffsetFromCodePtr(const u8 *ptr) {
 	return (int)codeBlock.GetOffset(ptr);
 }
 
-void IRNativeBackend::FinalizeBlock(IRBlock *block, int block_num, const JitOptions &jo) {
+void IRNativeBackend::FinalizeBlock(IRBlockCache *irBlockCache, int block_num, const JitOptions &jo) {
+	IRBlock *block = irBlockCache->GetBlock(block_num);
 	if (jo.enableBlocklink) {
 		uint32_t pc = block->GetOriginalStart();
 
diff --git a/Core/MIPS/IR/IRNativeCommon.h b/Core/MIPS/IR/IRNativeCommon.h
index a5ccea28f46f..53e17178c15d 100644
--- a/Core/MIPS/IR/IRNativeCommon.h
+++ b/Core/MIPS/IR/IRNativeCommon.h
@@ -71,10 +71,10 @@ class IRNativeBackend {
 	int OffsetFromCodePtr(const u8 *ptr);
 
 	virtual void GenerateFixedCode(MIPSState *mipsState) = 0;
-	virtual bool CompileBlock(IRBlock *block, int block_num, bool preload) = 0;
+	virtual bool CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) = 0;
 	virtual void ClearAllBlocks() = 0;
-	virtual void InvalidateBlock(IRBlock *block, int block_num) = 0;
-	void FinalizeBlock(IRBlock *block, int block_num, const JitOptions &jo);
+	virtual void InvalidateBlock(IRBlockCache *irBlockCache, int block_num) = 0;
+	void FinalizeBlock(IRBlockCache *irBlockCache, int block_num, const JitOptions &jo);
 
 	virtual void UpdateFCR31(MIPSState *mipsState) {}
 
@@ -199,8 +199,8 @@ class IRNativeJit : public IRJit {
 
 protected:
 	void Init(IRNativeBackend &backend);
-	bool CompileTargetBlock(IRBlock *block, int block_num, bool preload) override;
-	void FinalizeTargetBlock(IRBlock *block, int block_num) override;
+	bool CompileTargetBlock(IRBlockCache *irBlockCache, int block_num, bool preload) override;
+	void FinalizeTargetBlock(IRBlockCache *irBlockCache, int block_num) override;
 
 	IRNativeBackend *backend_ = nullptr;
 	IRNativeHooks hooks_;
diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp
index 01eed63fae0c..1af6dbfed068 100644
--- a/Core/MIPS/IR/IRRegCache.cpp
+++ b/Core/MIPS/IR/IRRegCache.cpp
@@ -105,7 +105,7 @@ void IRImmRegCache::MapDirtyInIn(IRReg rd, IRReg rs, IRReg rt) {
 IRNativeRegCacheBase::IRNativeRegCacheBase(MIPSComp::JitOptions *jo)
 	: jo_(jo) {}
 
-void IRNativeRegCacheBase::Start(MIPSComp::IRBlock *irBlock) {
+void IRNativeRegCacheBase::Start(MIPSComp::IRBlockCache *irBlockCache, int blockNum) {
 	if (!initialReady_) {
 		SetupInitialRegs();
 		initialReady_ = true;
@@ -114,6 +114,8 @@ void IRNativeRegCacheBase::Start(MIPSComp::IRBlock *irBlock) {
 	memcpy(nr, nrInitial_, sizeof(nr[0]) * config_.totalNativeRegs);
 	memcpy(mr, mrInitial_, sizeof(mr));
 
+	irBlock_ = irBlockCache->GetBlock(blockNum);
+
 	int numStatics;
 	const StaticAllocation *statics = GetStaticAllocations(numStatics);
 	for (int i = 0; i < numStatics; i++) {
@@ -124,10 +126,11 @@ void IRNativeRegCacheBase::Start(MIPSComp::IRBlock *irBlock) {
 		mr[statics[i].mr].nReg = statics[i].nr;
 		mr[statics[i].mr].isStatic = true;
 		// Lock it until the very end.
-		mr[statics[i].mr].spillLockIRIndex = irBlock->GetNumInstructions();
+		mr[statics[i].mr].spillLockIRIndex = irBlock_->GetNumInstructions();
 	}
 
-	irBlock_ = irBlock;
+	irBlockNum_ = blockNum;
+	irBlockCache_ = irBlockCache;
 	irIndex_ = 0;
 }
 
@@ -430,7 +433,7 @@ bool IRNativeRegCacheBase::IsRegClobbered(MIPSLoc type, IRReg r) const {
 	info.lookaheadCount = UNUSED_LOOKAHEAD_OPS;
 	// We look starting one ahead, unlike spilling.  We want to know if it clobbers later.
 	info.currentIndex = irIndex_ + 1;
-	info.instructions = irBlock_->GetInstructions();
+	info.instructions = irBlockCache_->GetBlockInstructionPtr(irBlockNum_);
 	info.numInstructions = irBlock_->GetNumInstructions();
 
 	// Make sure we're on the first one if this is multi-lane.
@@ -457,7 +460,7 @@ bool IRNativeRegCacheBase::IsRegRead(MIPSLoc type, IRReg first) const {
 	info.lookaheadCount = UNUSED_LOOKAHEAD_OPS;
 	// We look starting one ahead, unlike spilling.
 	info.currentIndex = irIndex_ + 1;
-	info.instructions = irBlock_->GetInstructions();
+	info.instructions = irBlockCache_->GetBlockInstructionPtr(irBlockNum_);
 	info.numInstructions = irBlock_->GetNumInstructions();
 
 	// Note: this intentionally doesn't look at the full reg, only the lane.
@@ -474,7 +477,7 @@ IRNativeReg IRNativeRegCacheBase::FindBestToSpill(MIPSLoc type, MIPSMap flags, b
 	IRSituation info;
 	info.lookaheadCount = UNUSED_LOOKAHEAD_OPS;
 	info.currentIndex = irIndex_;
-	info.instructions = irBlock_->GetInstructions();
+	info.instructions = irBlockCache_->GetBlockInstructionPtr(irBlockNum_);
 	info.numInstructions = irBlock_->GetNumInstructions();
 
 	*clobbered = false;
@@ -1026,7 +1029,7 @@ void IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg fi
 							IRSituation info;
 							info.lookaheadCount = 16;
 							info.currentIndex = irIndex_;
-							info.instructions = irBlock_->GetInstructions();
+							info.instructions = irBlockCache_->GetBlockInstructionPtr(irBlockNum_);
 							info.numInstructions = irBlock_->GetNumInstructions();
 
 							IRReg basefpr = first - oldlane - 32;
diff --git a/Core/MIPS/IR/IRRegCache.h b/Core/MIPS/IR/IRRegCache.h
index 4301886b4414..742d1cae3385 100644
--- a/Core/MIPS/IR/IRRegCache.h
+++ b/Core/MIPS/IR/IRRegCache.h
@@ -40,6 +40,7 @@ class MIPSState;
 
 namespace MIPSComp {
 class IRBlock;
+class IRBlockCache;
 struct JitOptions;
 }
 
@@ -153,7 +154,7 @@ class IRNativeRegCacheBase {
 	IRNativeRegCacheBase(MIPSComp::JitOptions *jo);
 	virtual ~IRNativeRegCacheBase() {}
 
-	virtual void Start(MIPSComp::IRBlock *irBlock);
+	virtual void Start(MIPSComp::IRBlockCache *irBlockCache, int blockNum);
 	void SetIRIndex(int index) {
 		irIndex_ = index;
 	}
@@ -248,7 +249,9 @@ class IRNativeRegCacheBase {
 	bool IsValidFPR(IRReg r) const;
 
 	MIPSComp::JitOptions *jo_;
+	int irBlockNum_ = 0;
 	const MIPSComp::IRBlock *irBlock_ = nullptr;
+	const MIPSComp::IRBlockCache *irBlockCache_ = nullptr;
 	int irIndex_ = 0;
 
 	struct {
diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp
index 6388d1652afb..d2391a2e7424 100644
--- a/Core/MIPS/MIPSIntVFPU.cpp
+++ b/Core/MIPS/MIPSIntVFPU.cpp
@@ -2128,7 +2128,7 @@ namespace MIPSInt
 			ApplySwizzleT(&t[n - 1], V_Single, -INFINITY);
 		}
 
-		for (int i = 0; i < n; i++) {
+		for (int i = 0; i < (int)n; i++) {
 			switch (optype) {
 			case 0: d.f[i] = s[i] + t[i]; break; //vadd
 			case 1: d.f[i] = s[i] - t[i]; break; //vsub
diff --git a/Core/MIPS/RiscV/RiscVJit.cpp b/Core/MIPS/RiscV/RiscVJit.cpp
index 65ebffc7b991..6c3544969c52 100644
--- a/Core/MIPS/RiscV/RiscVJit.cpp
+++ b/Core/MIPS/RiscV/RiscVJit.cpp
@@ -56,10 +56,11 @@ static void NoBlockExits() {
 	_assert_msg_(false, "Never exited block, invalid IR?");
 }
 
-bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
+bool RiscVJitBackend::CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) {
 	if (GetSpaceLeft() < 0x800)
 		return false;
 
+	IRBlock *block = irBlockCache->GetBlock(block_num);
 	BeginWrite(std::min(GetSpaceLeft(), (size_t)block->GetNumInstructions() * 32));
 
 	u32 startPC = block->GetOriginalStart();
@@ -81,11 +82,12 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
 	block->SetTargetOffset((int)GetOffset(blockStart));
 	compilingBlockNum_ = block_num;
 
-	regs_.Start(block);
+	regs_.Start(irBlockCache, block_num);
 
 	std::vector<const u8 *> addresses;
+	const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);
 	for (int i = 0; i < block->GetNumInstructions(); ++i) {
-		const IRInst &inst = block->GetInstructions()[i];
+		const IRInst &inst = instructions[i];
 		regs_.SetIRIndex(i);
 		addresses.push_back(GetCodePtr());
 
@@ -142,10 +144,11 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
 			addressesLookup[addresses[i]] = i;
 
 		INFO_LOG(JIT, "=============== RISCV (%08x, %d bytes) ===============", startPC, len);
+		const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);
 		for (const u8 *p = blockStart; p < GetCodePointer(); ) {
 			auto it = addressesLookup.find(p);
 			if (it != addressesLookup.end()) {
-				const IRInst &inst = block->GetInstructions()[it->second];
+				const IRInst &inst = instructions[it->second];
 
 				char temp[512];
 				DisassembleIR(temp, sizeof(temp), inst);
@@ -295,7 +298,8 @@ void RiscVJitBackend::ClearAllBlocks() {
 	EraseAllLinks(-1);
 }
 
-void RiscVJitBackend::InvalidateBlock(IRBlock *block, int block_num) {
+void RiscVJitBackend::InvalidateBlock(IRBlockCache *irBlockCache, int block_num) {
+	IRBlock *block = irBlockCache->GetBlock(block_num);
 	int offset = block->GetTargetOffset();
 	u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset;
 
diff --git a/Core/MIPS/RiscV/RiscVJit.h b/Core/MIPS/RiscV/RiscVJit.h
index 7ccbcce90b36..2bc0b1ec5127 100644
--- a/Core/MIPS/RiscV/RiscVJit.h
+++ b/Core/MIPS/RiscV/RiscVJit.h
@@ -36,9 +36,9 @@ class RiscVJitBackend : public RiscVGen::RiscVCodeBlock, public IRNativeBackend
 	bool DescribeCodePtr(const u8 *ptr, std::string &name) const override;
 
 	void GenerateFixedCode(MIPSState *mipsState) override;
-	bool CompileBlock(IRBlock *block, int block_num, bool preload) override;
+	bool CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) override;
 	void ClearAllBlocks() override;
-	void InvalidateBlock(IRBlock *block, int block_num) override;
+	void InvalidateBlock(IRBlockCache *irBlockCache, int block_num) override;
 
 protected:
 	const CodeBlockCommon &CodeBlock() const override {
diff --git a/Core/MIPS/x86/X64IRJit.cpp b/Core/MIPS/x86/X64IRJit.cpp
index be6a37469713..4bbc92430668 100644
--- a/Core/MIPS/x86/X64IRJit.cpp
+++ b/Core/MIPS/x86/X64IRJit.cpp
@@ -55,10 +55,11 @@ static void NoBlockExits() {
 	_assert_msg_(false, "Never exited block, invalid IR?");
 }
 
-bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
+bool X64JitBackend::CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) {
 	if (GetSpaceLeft() < 0x800)
 		return false;
 
+	IRBlock *block = irBlockCache->GetBlock(block_num);
 	u32 startPC = block->GetOriginalStart();
 	bool wroteCheckedOffset = false;
 	if (jo.enableBlocklink && !jo.useBackJump) {
@@ -85,12 +86,13 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
 	compilingBlockNum_ = block_num;
 	lastConstPC_ = 0;
 
-	regs_.Start(block);
+	regs_.Start(irBlockCache, block_num);
 
 	std::vector<const u8 *> addresses;
 	addresses.reserve(block->GetNumInstructions());
+	const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);
 	for (int i = 0; i < block->GetNumInstructions(); ++i) {
-		const IRInst &inst = block->GetInstructions()[i];
+		const IRInst &inst = instructions[i];
 		regs_.SetIRIndex(i);
 		addresses.push_back(GetCodePtr());
 
@@ -146,10 +148,11 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
 			addressesLookup[addresses[i]] = i;
 
 		INFO_LOG(JIT, "=============== x86 (%08x, %d bytes) ===============", startPC, len);
+		const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);
 		for (const u8 *p = blockStart; p < GetCodePointer(); ) {
 			auto it = addressesLookup.find(p);
 			if (it != addressesLookup.end()) {
-				const IRInst &inst = block->GetInstructions()[it->second];
+				const IRInst &inst = instructions[it->second];
 
 				char temp[512];
 				DisassembleIR(temp, sizeof(temp), inst);
@@ -316,7 +319,8 @@ void X64JitBackend::ClearAllBlocks() {
 	EraseAllLinks(-1);
 }
 
-void X64JitBackend::InvalidateBlock(IRBlock *block, int block_num) {
+void X64JitBackend::InvalidateBlock(IRBlockCache *irBlockCache, int block_num) {
+	IRBlock *block = irBlockCache->GetBlock(block_num);
 	int offset = block->GetTargetOffset();
 	u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset;
 
diff --git a/Core/MIPS/x86/X64IRJit.h b/Core/MIPS/x86/X64IRJit.h
index 15a2fb9b449c..e80a8544d234 100644
--- a/Core/MIPS/x86/X64IRJit.h
+++ b/Core/MIPS/x86/X64IRJit.h
@@ -52,9 +52,9 @@ class X64JitBackend : public Gen::XCodeBlock, public IRNativeBackend {
 	bool DescribeCodePtr(const u8 *ptr, std::string &name) const override;
 
 	void GenerateFixedCode(MIPSState *mipsState) override;
-	bool CompileBlock(IRBlock *block, int block_num, bool preload) override;
+	bool CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) override;
 	void ClearAllBlocks() override;
-	void InvalidateBlock(IRBlock *block, int block_num) override;
+	void InvalidateBlock(IRBlockCache *irBlockCache, int block_num) override;
 
 protected:
 	const CodeBlockCommon &CodeBlock() const override {
diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp
index 7f141d634d23..4c7d3227801c 100644
--- a/GPU/Common/VertexDecoderCommon.cpp
+++ b/GPU/Common/VertexDecoderCommon.cpp
@@ -592,7 +592,7 @@ void VertexDecoder::Step_Color565Morph() const
 		c[i] = clamp_u8((int)col[i]);
 	}
 	c[3] = 255;
-	// Always full alpha.
+	// Always full alpha. (Is this true??)
 }
 
 void VertexDecoder::Step_Color5551Morph() const