diff --git a/Core/MIPS/ARM64/Arm64IRJit.cpp b/Core/MIPS/ARM64/Arm64IRJit.cpp index 26d9913bdc74..420f7443ca45 100644 --- a/Core/MIPS/ARM64/Arm64IRJit.cpp +++ b/Core/MIPS/ARM64/Arm64IRJit.cpp @@ -65,10 +65,11 @@ static void NoBlockExits() { _assert_msg_(false, "Never exited block, invalid IR?"); } -bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) { +bool Arm64JitBackend::CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) { if (GetSpaceLeft() < 0x800) return false; + IRBlock *block = irBlockCache->GetBlock(block_num); BeginWrite(std::min(GetSpaceLeft(), (size_t)block->GetNumInstructions() * 32)); u32 startPC = block->GetOriginalStart(); @@ -92,12 +93,13 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) compilingBlockNum_ = block_num; lastConstPC_ = 0; - regs_.Start(block); + regs_.Start(irBlockCache, block_num); std::vector addresses; addresses.reserve(block->GetNumInstructions()); + const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block); for (int i = 0; i < block->GetNumInstructions(); ++i) { - const IRInst &inst = block->GetInstructions()[i]; + const IRInst &inst = instructions[i]; regs_.SetIRIndex(i); addresses.push_back(GetCodePtr()); @@ -156,10 +158,11 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) addressesLookup[addresses[i]] = i; INFO_LOG(JIT, "=============== ARM64 (%08x, %d bytes) ===============", startPC, len); + const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block); for (const u8 *p = blockStart; p < GetCodePointer(); ) { auto it = addressesLookup.find(p); if (it != addressesLookup.end()) { - const IRInst &inst = block->GetInstructions()[it->second]; + const IRInst &inst = instructions[it->second]; char temp[512]; DisassembleIR(temp, sizeof(temp), inst); @@ -319,7 +322,8 @@ void Arm64JitBackend::ClearAllBlocks() { EraseAllLinks(-1); } -void Arm64JitBackend::InvalidateBlock(IRBlock *block, int block_num) { +void Arm64JitBackend::InvalidateBlock(IRBlockCache *irBlockCache, int block_num) { + IRBlock *block = irBlockCache->GetBlock(block_num); int offset = block->GetTargetOffset(); u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset; diff --git a/Core/MIPS/ARM64/Arm64IRJit.h b/Core/MIPS/ARM64/Arm64IRJit.h index 055e525565f8..c2992f3520b7 100644 --- a/Core/MIPS/ARM64/Arm64IRJit.h +++ b/Core/MIPS/ARM64/Arm64IRJit.h @@ -40,9 +40,9 @@ class Arm64JitBackend : public Arm64Gen::ARM64CodeBlock, public IRNativeBackend bool DescribeCodePtr(const u8 *ptr, std::string &name) const override; void GenerateFixedCode(MIPSState *mipsState) override; - bool CompileBlock(IRBlock *block, int block_num, bool preload) override; + bool CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) override; void ClearAllBlocks() override; - void InvalidateBlock(IRBlock *block, int block_num) override; + void InvalidateBlock(IRBlockCache *irBlockCache, int block_num) override; void UpdateFCR31(MIPSState *mipsState) override; diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 191ab8b2f7f8..7a1453f2f5cb 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -184,6 +184,8 @@ static const IRMeta irMeta[] = { const IRMeta *metaIndex[256]; void InitIR() { + if (metaIndex[0]) + return; for (size_t i = 0; i < ARRAY_SIZE(irMeta); i++) { metaIndex[(int)irMeta[i].op] = &irMeta[i]; } diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 6815c941ed25..f962e9518020 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -46,7 +46,6 @@ namespace MIPSComp { IRJit::IRJit(MIPSState *mipsState) : frontend_(mipsState->HasDefaultPrefix()), mips_(mipsState) { // u32 size = 128 * 1024; - // blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline"); InitIR(); jo.optimizeForInterpreter = true; @@ -91,7 +90,7 @@ void IRJit::InvalidateCacheAt(u32 em_address, int length) { std::vector numbers = blocks_.FindInvalidatedBlockNumbers(em_address, length); for (int block_num : numbers) { auto block = blocks_.GetBlock(block_num); - int cookie = block->GetTargetOffset() < 0 ? block_num : block->GetTargetOffset(); + int cookie = block->GetTargetOffset() < 0 ? block->GetInstructionOffset() : block->GetTargetOffset(); block->Destroy(cookie); } } @@ -103,13 +102,13 @@ void IRJit::Compile(u32 em_address) { // Look to see if we've preloaded this block. int block_num = blocks_.FindPreloadBlock(em_address); if (block_num != -1) { - IRBlock *b = blocks_.GetBlock(block_num); + IRBlock *block = blocks_.GetBlock(block_num); // Okay, let's link and finalize the block now. - int cookie = b->GetTargetOffset() < 0 ? block_num : b->GetTargetOffset(); - b->Finalize(cookie); - if (b->IsValid()) { + int cookie = block->GetTargetOffset() < 0 ? block->GetInstructionOffset() : block->GetTargetOffset(); + block->Finalize(cookie); + if (block->IsValid()) { // Success, we're done. - FinalizeTargetBlock(b, block_num); + FinalizeTargetBlock(&blocks_, block_num); return; } } @@ -139,27 +138,25 @@ bool IRJit::CompileBlock(u32 em_address, std::vector &instructions, u32 return preload; } - int block_num = blocks_.AllocateBlock(em_address); + int block_num = blocks_.AllocateBlock(em_address, mipsBytes, instructions); if ((block_num & ~MIPS_EMUHACK_VALUE_MASK) != 0) { + WARN_LOG(JIT, "Failed to allocate block for %08x (%d instructions)", em_address, (int)instructions.size()); // Out of block numbers. Caller will handle. return false; } IRBlock *b = blocks_.GetBlock(block_num); - b->SetInstructions(instructions); - b->SetOriginalAddrSize(em_address, mipsBytes); if (preload) { // Hash, then only update page stats, don't link yet. // TODO: Should we always hash? Then we can reuse blocks. b->UpdateHash(); } - if (!CompileTargetBlock(b, block_num, preload)) + if (!CompileTargetBlock(&blocks_, block_num, preload)) return false; // Overwrites the first instruction, and also updates stats. blocks_.FinalizeBlock(block_num, preload); if (!preload) - FinalizeTargetBlock(b, block_num); - + FinalizeTargetBlock(&blocks_, block_num); return true; } @@ -257,20 +254,21 @@ void IRJit::RunLoopUntil(u64 globalticks) { u32 inst = Memory::ReadUnchecked_U32(mips->pc); u32 opcode = inst & 0xFF000000; if (opcode == MIPS_EMUHACK_OPCODE) { - IRBlock *block = blocks_.GetBlockUnchecked(inst & 0xFFFFFF); - + u32 offset = inst & 0x00FFFFFF; // Alternatively, inst - opcode #ifdef IR_PROFILING - { - TimeSpan span; - mips->pc = IRInterpret(mips, block->GetInstructions()); - block->profileStats_.executions += 1; - block->profileStats_.totalNanos += span.ElapsedNanos(); - } + IRBlock *block = blocks_.GetBlock(blocks_.GetBlockNumFromOffset(offset)); + TimeSpan span; + mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset); + int64_t elapsedNanos = span.ElapsedNanos(); + block->profileStats_.executions += 1; + block->profileStats_.totalNanos += elapsedNanos; #else - mips->pc = IRInterpret(mips, block->GetInstructions()); + mips->pc = IRInterpret(mips, blocks_.GetArenaPtr() + offset); #endif // Note: this will "jump to zero" on a badly constructed block missing exits. if (!Memory::IsValid4AlignedAddress(mips->pc)) { + int blockNum = blocks_.GetBlockNumFromOffset(offset); + IRBlock *block = blocks_.GetBlockUnchecked(blockNum); Core_ExecException(mips->pc, block->GetOriginalStart(), ExecExceptionType::JUMP); break; } @@ -300,11 +298,68 @@ void IRJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) { void IRBlockCache::Clear() { for (int i = 0; i < (int)blocks_.size(); ++i) { - int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset(); + int cookie = blocks_[i].GetTargetOffset() < 0 ? blocks_[i].GetInstructionOffset() : blocks_[i].GetTargetOffset(); blocks_[i].Destroy(cookie); } blocks_.clear(); byPage_.clear(); + arena_.clear(); + arena_.shrink_to_fit(); +} + +IRBlockCache::IRBlockCache() { + // For whatever reason, this makes things go slower?? Probably just a CPU cache alignment fluke. + // arena_.reserve(1024 * 1024 * 2); +} + +int IRBlockCache::AllocateBlock(int emAddr, u32 origSize, const std::vector &inst) { + // We have 24 bits to represent offsets with. + const u32 MAX_ARENA_SIZE = 0x1000000 - 1; + int offset = (int)arena_.size(); + if (offset >= MAX_ARENA_SIZE) { + WARN_LOG(JIT, "Filled JIT arena, restarting"); + return -1; + } + for (int i = 0; i < inst.size(); i++) { + arena_.push_back(inst[i]); + } + blocks_.push_back(IRBlock(emAddr, origSize, offset, (u16)inst.size())); + return (int)blocks_.size() - 1; +} + +int IRBlockCache::GetBlockNumFromOffset(int offset) const { + // Block offsets are always in rising order (we don't go back and replace them when invalidated). So we can binary search. + int low = 0; + int high = (int)blocks_.size() - 1; + int found = -1; + while (low <= high) { + int mid = low + (high - low) / 2; + const int blockOffset = blocks_[mid].GetInstructionOffset(); + if (blockOffset == offset) { + found = mid; + break; + } + if (blockOffset < offset) { + low = mid + 1; + } else { + high = mid - 1; + } + } + +#ifndef _DEBUG + // Then, in debug builds, cross check the result. + return found; +#else + // TODO: Optimize if we need to call this often. + for (int i = 0; i < (int)blocks_.size(); i++) { + if (blocks_[i].GetInstructionOffset() == offset) { + _dbg_assert_(i == found); + return i; + } + } +#endif + _dbg_assert_(found == -1); + return -1; } std::vector IRBlockCache::FindInvalidatedBlockNumbers(u32 address, u32 length) { @@ -331,7 +386,7 @@ std::vector IRBlockCache::FindInvalidatedBlockNumbers(u32 address, u32 leng void IRBlockCache::FinalizeBlock(int i, bool preload) { if (!preload) { - int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset(); + int cookie = blocks_[i].GetTargetOffset() < 0 ? blocks_[i].GetInstructionOffset() : blocks_[i].GetTargetOffset(); blocks_[i].Finalize(cookie); } @@ -372,16 +427,18 @@ int IRBlockCache::FindPreloadBlock(u32 em_address) { int IRBlockCache::FindByCookie(int cookie) { if (blocks_.empty()) return -1; + // TODO: Maybe a flag to determine target offset mode? if (blocks_[0].GetTargetOffset() < 0) - return cookie; + return GetBlockNumFromOffset(cookie); + // TODO: Now that we are using offsets in pure IR mode too, we can probably unify + // the two paradigms. Or actually no, we still need two offsets.. for (int i = 0; i < GetNumBlocks(); ++i) { int offset = blocks_[i].GetTargetOffset(); if (offset == cookie) return i; } - return -1; } @@ -391,7 +448,7 @@ std::vector IRBlockCache::SaveAndClearEmuHackOps() { for (int number = 0; number < (int)blocks_.size(); ++number) { IRBlock &b = blocks_[number]; - int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset(); + int cookie = b.GetTargetOffset() < 0 ? b.GetInstructionOffset() : b.GetTargetOffset(); if (b.IsValid() && b.RestoreOriginalFirstOp(cookie)) { result[number] = number; } else { @@ -412,7 +469,7 @@ void IRBlockCache::RestoreSavedEmuHackOps(const std::vector &saved) { IRBlock &b = blocks_[number]; // Only if we restored it, write it back. if (b.IsValid() && saved[number] != 0 && b.HasOriginalFirstOp()) { - int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset(); + int cookie = b.GetTargetOffset() < 0 ? b.GetInstructionOffset() : b.GetTargetOffset(); b.Finalize(cookie); } } @@ -434,8 +491,9 @@ JitBlockDebugInfo IRBlockCache::GetBlockDebugInfo(int blockNum) const { } debugInfo.irDisasm.reserve(ir.GetNumInstructions()); + const IRInst *instructions = GetBlockInstructionPtr(ir); for (int i = 0; i < ir.GetNumInstructions(); i++) { - IRInst inst = ir.GetInstructions()[i]; + IRInst inst = instructions[i]; char buffer[256]; DisassembleIR(buffer, sizeof(buffer), inst); debugInfo.irDisasm.push_back(buffer); @@ -448,10 +506,9 @@ void IRBlockCache::ComputeStats(BlockCacheStats &bcStats) const { double maxBloat = 0.0; double minBloat = 1000000000.0; for (const auto &b : blocks_) { - double codeSize = (double)b.GetNumInstructions() * sizeof(IRInst); + double codeSize = (double)b.GetNumInstructions() * 4; // We count bloat in instructions, not bytes. sizeof(IRInst); if (codeSize == 0) continue; - u32 origAddr, mipsBytes; b.GetRange(origAddr, mipsBytes); double origSize = (double)mipsBytes; diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index 96c6a4d9a63c..81d97925c7ae 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -33,48 +33,43 @@ #include "stddef.h" #endif +// Very expensive, time-profiles every block. +// Not to be released with this enabled. +// // #define IR_PROFILING +// Try to catch obvious misses of be above rule. +#if defined(IR_PROFILING) && defined(GOLD) +#error +#endif + namespace MIPSComp { // TODO : Use arena allocators. For now let's just malloc. class IRBlock { public: IRBlock() {} - IRBlock(u32 emAddr) : origAddr_(emAddr) {} + IRBlock(u32 emAddr, u32 origSize, int instOffset, u16 numInstructions) + : origAddr_(emAddr), origSize_(origSize), instOffset_(instOffset), numInstructions_(numInstructions) {} IRBlock(IRBlock &&b) { - instr_ = b.instr_; + instOffset_ = b.instOffset_; hash_ = b.hash_; origAddr_ = b.origAddr_; origSize_ = b.origSize_; origFirstOpcode_ = b.origFirstOpcode_; targetOffset_ = b.targetOffset_; numInstructions_ = b.numInstructions_; - b.instr_ = nullptr; - } - - ~IRBlock() { - delete[] instr_; + b.instOffset_ = 0xFFFFFFFF; } - void SetInstructions(const std::vector &inst) { - instr_ = new IRInst[inst.size()]; - numInstructions_ = (u16)inst.size(); - if (!inst.empty()) { - memcpy(instr_, &inst[0], sizeof(IRInst) * inst.size()); - } - } + ~IRBlock() {} - const IRInst *GetInstructions() const { return instr_; } + u32 GetInstructionOffset() const { return instOffset_; } int GetNumInstructions() const { return numInstructions_; } MIPSOpcode GetOriginalFirstOp() const { return origFirstOpcode_; } bool HasOriginalFirstOp() const; bool RestoreOriginalFirstOp(int number); bool IsValid() const { return origAddr_ != 0 && origFirstOpcode_.encoding != 0x68FFFFFF; } - void SetOriginalAddrSize(u32 address, u32 size) { - origAddr_ = address; - origSize_ = size; - } void SetTargetOffset(int offset) { targetOffset_ = offset; } @@ -107,7 +102,9 @@ class IRBlock { private: u64 CalculateHash() const; - IRInst *instr_ = nullptr; + // Offset into the block cache's Arena + // TODO: These should maybe be stored in a separate array. + u32 instOffset_ = 0; u64 hash_ = 0; u32 origAddr_ = 0; u32 origSize_ = 0; @@ -118,15 +115,12 @@ class IRBlock { class IRBlockCache : public JitBlockCacheDebugInterface { public: - IRBlockCache() {} + IRBlockCache(); void Clear(); std::vector FindInvalidatedBlockNumbers(u32 address, u32 length); void FinalizeBlock(int blockNum, bool preload = false); int GetNumBlocks() const override { return (int)blocks_.size(); } - int AllocateBlock(int emAddr) { - blocks_.push_back(IRBlock(emAddr)); - return (int)blocks_.size() - 1; - } + int AllocateBlock(int emAddr, u32 origSize, const std::vector &inst); IRBlock *GetBlock(int blockNum) { if (blockNum >= 0 && blockNum < (int)blocks_.size()) { return &blocks_[blockNum]; @@ -134,6 +128,16 @@ class IRBlockCache : public JitBlockCacheDebugInterface { return nullptr; } } + int GetBlockNumFromOffset(int offset) const; + const IRInst *GetBlockInstructionPtr(const IRBlock &block) const { + return arena_.data() + block.GetInstructionOffset(); + } + const IRInst *GetBlockInstructionPtr(int blockNum) const { + return arena_.data() + blocks_[blockNum].GetInstructionOffset(); + } + const IRInst *GetArenaPtr() const { + return arena_.data(); + } bool IsValidBlock(int blockNum) const override { return blockNum >= 0 && blockNum < (int)blocks_.size() && blocks_[blockNum].IsValid(); } @@ -185,6 +189,7 @@ class IRBlockCache : public JitBlockCacheDebugInterface { u32 AddressToPage(u32 addr) const; std::vector blocks_; + std::vector arena_; std::unordered_map> byPage_; }; @@ -227,8 +232,8 @@ class IRJit : public JitInterface { protected: bool CompileBlock(u32 em_address, std::vector &instructions, u32 &mipsBytes, bool preload); - virtual bool CompileTargetBlock(IRBlock *block, int block_num, bool preload) { return true; } - virtual void FinalizeTargetBlock(IRBlock *block, int block_num) {} + virtual bool CompileTargetBlock(IRBlockCache *irBlockCache, int block_num, bool preload) { return true; } + virtual void FinalizeTargetBlock(IRBlockCache *irBlockCache, int block_num) {} JitOptions jo; diff --git a/Core/MIPS/IR/IRNativeCommon.cpp b/Core/MIPS/IR/IRNativeCommon.cpp index c3253230bce9..319495f3437e 100644 --- a/Core/MIPS/IR/IRNativeCommon.cpp +++ b/Core/MIPS/IR/IRNativeCommon.cpp @@ -506,12 +506,12 @@ void IRNativeJit::Init(IRNativeBackend &backend) { } } -bool IRNativeJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) { - return backend_->CompileBlock(block, block_num, preload); +bool IRNativeJit::CompileTargetBlock(IRBlockCache *irblockCache, int block_num, bool preload) { + return backend_->CompileBlock(irblockCache, block_num, preload); } -void IRNativeJit::FinalizeTargetBlock(IRBlock *block, int block_num) { - backend_->FinalizeBlock(block, block_num, jo); +void IRNativeJit::FinalizeTargetBlock(IRBlockCache *irblockCache, int block_num) { + backend_->FinalizeBlock(irblockCache, block_num, jo); } void IRNativeJit::RunLoopUntil(u64 globalticks) { @@ -532,7 +532,7 @@ void IRNativeJit::InvalidateCacheAt(u32 em_address, int length) { std::vector numbers = blocks_.FindInvalidatedBlockNumbers(em_address, length); for (int block_num : numbers) { auto block = blocks_.GetBlock(block_num); - backend_->InvalidateBlock(block, block_num); + backend_->InvalidateBlock(&blocks_, block_num); block->Destroy(block->GetTargetOffset()); } } @@ -645,7 +645,8 @@ int IRNativeBackend::OffsetFromCodePtr(const u8 *ptr) { return (int)codeBlock.GetOffset(ptr); } -void IRNativeBackend::FinalizeBlock(IRBlock *block, int block_num, const JitOptions &jo) { +void IRNativeBackend::FinalizeBlock(IRBlockCache *irBlockCache, int block_num, const JitOptions &jo) { + IRBlock *block = irBlockCache->GetBlock(block_num); if (jo.enableBlocklink) { uint32_t pc = block->GetOriginalStart(); diff --git a/Core/MIPS/IR/IRNativeCommon.h b/Core/MIPS/IR/IRNativeCommon.h index a5ccea28f46f..53e17178c15d 100644 --- a/Core/MIPS/IR/IRNativeCommon.h +++ b/Core/MIPS/IR/IRNativeCommon.h @@ -71,10 +71,10 @@ class IRNativeBackend { int OffsetFromCodePtr(const u8 *ptr); virtual void GenerateFixedCode(MIPSState *mipsState) = 0; - virtual bool CompileBlock(IRBlock *block, int block_num, bool preload) = 0; + virtual bool CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) = 0; virtual void ClearAllBlocks() = 0; - virtual void InvalidateBlock(IRBlock *block, int block_num) = 0; - void FinalizeBlock(IRBlock *block, int block_num, const JitOptions &jo); + virtual void InvalidateBlock(IRBlockCache *irBlockCache, int block_num) = 0; + void FinalizeBlock(IRBlockCache *irBlockCache, int block_num, const JitOptions &jo); virtual void UpdateFCR31(MIPSState *mipsState) {} @@ -199,8 +199,8 @@ class IRNativeJit : public IRJit { protected: void Init(IRNativeBackend &backend); - bool CompileTargetBlock(IRBlock *block, int block_num, bool preload) override; - void FinalizeTargetBlock(IRBlock *block, int block_num) override; + bool CompileTargetBlock(IRBlockCache *irBlockCache, int block_num, bool preload) override; + void FinalizeTargetBlock(IRBlockCache *irBlockCache, int block_num) override; IRNativeBackend *backend_ = nullptr; IRNativeHooks hooks_; diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp index 01eed63fae0c..1af6dbfed068 100644 --- a/Core/MIPS/IR/IRRegCache.cpp +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -105,7 +105,7 @@ void IRImmRegCache::MapDirtyInIn(IRReg rd, IRReg rs, IRReg rt) { IRNativeRegCacheBase::IRNativeRegCacheBase(MIPSComp::JitOptions *jo) : jo_(jo) {} -void IRNativeRegCacheBase::Start(MIPSComp::IRBlock *irBlock) { +void IRNativeRegCacheBase::Start(MIPSComp::IRBlockCache *irBlockCache, int blockNum) { if (!initialReady_) { SetupInitialRegs(); initialReady_ = true; @@ -114,6 +114,8 @@ void IRNativeRegCacheBase::Start(MIPSComp::IRBlock *irBlock) { memcpy(nr, nrInitial_, sizeof(nr[0]) * config_.totalNativeRegs); memcpy(mr, mrInitial_, sizeof(mr)); + irBlock_ = irBlockCache->GetBlock(blockNum); + int numStatics; const StaticAllocation *statics = GetStaticAllocations(numStatics); for (int i = 0; i < numStatics; i++) { @@ -124,10 +126,11 @@ void IRNativeRegCacheBase::Start(MIPSComp::IRBlock *irBlock) { mr[statics[i].mr].nReg = statics[i].nr; mr[statics[i].mr].isStatic = true; // Lock it until the very end. - mr[statics[i].mr].spillLockIRIndex = irBlock->GetNumInstructions(); + mr[statics[i].mr].spillLockIRIndex = irBlock_->GetNumInstructions(); } - irBlock_ = irBlock; + irBlockNum_ = blockNum; + irBlockCache_ = irBlockCache; irIndex_ = 0; } @@ -430,7 +433,7 @@ bool IRNativeRegCacheBase::IsRegClobbered(MIPSLoc type, IRReg r) const { info.lookaheadCount = UNUSED_LOOKAHEAD_OPS; // We look starting one ahead, unlike spilling. We want to know if it clobbers later. info.currentIndex = irIndex_ + 1; - info.instructions = irBlock_->GetInstructions(); + info.instructions = irBlockCache_->GetBlockInstructionPtr(irBlockNum_); info.numInstructions = irBlock_->GetNumInstructions(); // Make sure we're on the first one if this is multi-lane. @@ -457,7 +460,7 @@ bool IRNativeRegCacheBase::IsRegRead(MIPSLoc type, IRReg first) const { info.lookaheadCount = UNUSED_LOOKAHEAD_OPS; // We look starting one ahead, unlike spilling. info.currentIndex = irIndex_ + 1; - info.instructions = irBlock_->GetInstructions(); + info.instructions = irBlockCache_->GetBlockInstructionPtr(irBlockNum_); info.numInstructions = irBlock_->GetNumInstructions(); // Note: this intentionally doesn't look at the full reg, only the lane. @@ -474,7 +477,7 @@ IRNativeReg IRNativeRegCacheBase::FindBestToSpill(MIPSLoc type, MIPSMap flags, b IRSituation info; info.lookaheadCount = UNUSED_LOOKAHEAD_OPS; info.currentIndex = irIndex_; - info.instructions = irBlock_->GetInstructions(); + info.instructions = irBlockCache_->GetBlockInstructionPtr(irBlockNum_); info.numInstructions = irBlock_->GetNumInstructions(); *clobbered = false; @@ -1026,7 +1029,7 @@ void IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg fi IRSituation info; info.lookaheadCount = 16; info.currentIndex = irIndex_; - info.instructions = irBlock_->GetInstructions(); + info.instructions = irBlockCache_->GetBlockInstructionPtr(irBlockNum_); info.numInstructions = irBlock_->GetNumInstructions(); IRReg basefpr = first - oldlane - 32; diff --git a/Core/MIPS/IR/IRRegCache.h b/Core/MIPS/IR/IRRegCache.h index 4301886b4414..742d1cae3385 100644 --- a/Core/MIPS/IR/IRRegCache.h +++ b/Core/MIPS/IR/IRRegCache.h @@ -40,6 +40,7 @@ class MIPSState; namespace MIPSComp { class IRBlock; +class IRBlockCache; struct JitOptions; } @@ -153,7 +154,7 @@ class IRNativeRegCacheBase { IRNativeRegCacheBase(MIPSComp::JitOptions *jo); virtual ~IRNativeRegCacheBase() {} - virtual void Start(MIPSComp::IRBlock *irBlock); + virtual void Start(MIPSComp::IRBlockCache *irBlockCache, int blockNum); void SetIRIndex(int index) { irIndex_ = index; } @@ -248,7 +249,9 @@ class IRNativeRegCacheBase { bool IsValidFPR(IRReg r) const; MIPSComp::JitOptions *jo_; + int irBlockNum_ = 0; const MIPSComp::IRBlock *irBlock_ = nullptr; + const MIPSComp::IRBlockCache *irBlockCache_ = nullptr; int irIndex_ = 0; struct { diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index 6388d1652afb..d2391a2e7424 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -2128,7 +2128,7 @@ namespace MIPSInt ApplySwizzleT(&t[n - 1], V_Single, -INFINITY); } - for (int i = 0; i < n; i++) { + for (int i = 0; i < (int)n; i++) { switch (optype) { case 0: d.f[i] = s[i] + t[i]; break; //vadd case 1: d.f[i] = s[i] - t[i]; break; //vsub diff --git a/Core/MIPS/RiscV/RiscVJit.cpp b/Core/MIPS/RiscV/RiscVJit.cpp index 65ebffc7b991..6c3544969c52 100644 --- a/Core/MIPS/RiscV/RiscVJit.cpp +++ b/Core/MIPS/RiscV/RiscVJit.cpp @@ -56,10 +56,11 @@ static void NoBlockExits() { _assert_msg_(false, "Never exited block, invalid IR?"); } -bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) { +bool RiscVJitBackend::CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) { if (GetSpaceLeft() < 0x800) return false; + IRBlock *block = irBlockCache->GetBlock(block_num); BeginWrite(std::min(GetSpaceLeft(), (size_t)block->GetNumInstructions() * 32)); u32 startPC = block->GetOriginalStart(); @@ -81,11 +82,12 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) block->SetTargetOffset((int)GetOffset(blockStart)); compilingBlockNum_ = block_num; - regs_.Start(block); + regs_.Start(irBlockCache, block_num); std::vector addresses; + const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block); for (int i = 0; i < block->GetNumInstructions(); ++i) { - const IRInst &inst = block->GetInstructions()[i]; + const IRInst &inst = instructions[i]; regs_.SetIRIndex(i); addresses.push_back(GetCodePtr()); @@ -142,10 +144,11 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) addressesLookup[addresses[i]] = i; INFO_LOG(JIT, "=============== RISCV (%08x, %d bytes) ===============", startPC, len); + const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block); for (const u8 *p = blockStart; p < GetCodePointer(); ) { auto it = addressesLookup.find(p); if (it != addressesLookup.end()) { - const IRInst &inst = block->GetInstructions()[it->second]; + const IRInst &inst = instructions[it->second]; char temp[512]; DisassembleIR(temp, sizeof(temp), inst); @@ -295,7 +298,8 @@ void RiscVJitBackend::ClearAllBlocks() { EraseAllLinks(-1); } -void RiscVJitBackend::InvalidateBlock(IRBlock *block, int block_num) { +void RiscVJitBackend::InvalidateBlock(IRBlockCache *irBlockCache, int block_num) { + IRBlock *block = irBlockCache->GetBlock(block_num); int offset = block->GetTargetOffset(); u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset; diff --git a/Core/MIPS/RiscV/RiscVJit.h b/Core/MIPS/RiscV/RiscVJit.h index 7ccbcce90b36..2bc0b1ec5127 100644 --- a/Core/MIPS/RiscV/RiscVJit.h +++ b/Core/MIPS/RiscV/RiscVJit.h @@ -36,9 +36,9 @@ class RiscVJitBackend : public RiscVGen::RiscVCodeBlock, public IRNativeBackend bool DescribeCodePtr(const u8 *ptr, std::string &name) const override; void GenerateFixedCode(MIPSState *mipsState) override; - bool CompileBlock(IRBlock *block, int block_num, bool preload) override; + bool CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) override; void ClearAllBlocks() override; - void InvalidateBlock(IRBlock *block, int block_num) override; + void InvalidateBlock(IRBlockCache *irBlockCache, int block_num) override; protected: const CodeBlockCommon &CodeBlock() const override { diff --git a/Core/MIPS/x86/X64IRJit.cpp b/Core/MIPS/x86/X64IRJit.cpp index be6a37469713..4bbc92430668 100644 --- a/Core/MIPS/x86/X64IRJit.cpp +++ b/Core/MIPS/x86/X64IRJit.cpp @@ -55,10 +55,11 @@ static void NoBlockExits() { _assert_msg_(false, "Never exited block, invalid IR?"); } -bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) { +bool X64JitBackend::CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) { if (GetSpaceLeft() < 0x800) return false; + IRBlock *block = irBlockCache->GetBlock(block_num); u32 startPC = block->GetOriginalStart(); bool wroteCheckedOffset = false; if (jo.enableBlocklink && !jo.useBackJump) { @@ -85,12 +86,13 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) { compilingBlockNum_ = block_num; lastConstPC_ = 0; - regs_.Start(block); + regs_.Start(irBlockCache, block_num); std::vector addresses; addresses.reserve(block->GetNumInstructions()); + const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block); for (int i = 0; i < block->GetNumInstructions(); ++i) { - const IRInst &inst = block->GetInstructions()[i]; + const IRInst &inst = instructions[i]; regs_.SetIRIndex(i); addresses.push_back(GetCodePtr()); @@ -146,10 +148,11 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) { addressesLookup[addresses[i]] = i; INFO_LOG(JIT, "=============== x86 (%08x, %d bytes) ===============", startPC, len); + const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block); for (const u8 *p = blockStart; p < GetCodePointer(); ) { auto it = addressesLookup.find(p); if (it != addressesLookup.end()) { - const IRInst &inst = block->GetInstructions()[it->second]; + const IRInst &inst = instructions[it->second]; char temp[512]; DisassembleIR(temp, sizeof(temp), inst); @@ -316,7 +319,8 @@ void X64JitBackend::ClearAllBlocks() { EraseAllLinks(-1); } -void X64JitBackend::InvalidateBlock(IRBlock *block, int block_num) { +void X64JitBackend::InvalidateBlock(IRBlockCache *irBlockCache, int block_num) { + IRBlock *block = irBlockCache->GetBlock(block_num); int offset = block->GetTargetOffset(); u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset; diff --git a/Core/MIPS/x86/X64IRJit.h b/Core/MIPS/x86/X64IRJit.h index 15a2fb9b449c..e80a8544d234 100644 --- a/Core/MIPS/x86/X64IRJit.h +++ b/Core/MIPS/x86/X64IRJit.h @@ -52,9 +52,9 @@ class X64JitBackend : public Gen::XCodeBlock, public IRNativeBackend { bool DescribeCodePtr(const u8 *ptr, std::string &name) const override; void GenerateFixedCode(MIPSState *mipsState) override; - bool CompileBlock(IRBlock *block, int block_num, bool preload) override; + bool CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) override; void ClearAllBlocks() override; - void InvalidateBlock(IRBlock *block, int block_num) override; + void InvalidateBlock(IRBlockCache *irBlockCache, int block_num) override; protected: const CodeBlockCommon &CodeBlock() const override { diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp index 7f141d634d23..4c7d3227801c 100644 --- a/GPU/Common/VertexDecoderCommon.cpp +++ b/GPU/Common/VertexDecoderCommon.cpp @@ -592,7 +592,7 @@ void VertexDecoder::Step_Color565Morph() const c[i] = clamp_u8((int)col[i]); } c[3] = 255; - // Always full alpha. + // Always full alpha. (Is this true??) } void VertexDecoder::Step_Color5551Morph() const