Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,71 @@ bool DrawEngineCommon::TestBoundingBoxFast(const void *vdata, int vertexCount, u
return true;
}

// 2D bounding box test against scissor. No indexing yet.
// Only supports non-indexed draws with float positions.
bool DrawEngineCommon::TestBoundingBoxThrough(const void *vdata, int vertexCount, u32 vertType) {
// Grab temp buffer space from large offsets in decoded_. Not exactly safe for large draws.
if (vertexCount > 16) {
return true;
}

float *verts = (float *)(decoded_ + 65536 * 18);

// Although this may lead to drawing that shouldn't happen, the viewport is more complex on VR.
// Let's always say objects are within bounds.
if (gstate_c.Use(GPU_USE_VIRTUAL_REALITY))
return true;

// Try to skip NormalizeVertices if it's pure positions. No need to bother with a vertex decoder
// and a large vertex format.
u8 *temp_buffer = decoded_ + 65536 * 24;
// Simple, most common case.
VertexDecoder *dec = GetVertexDecoder(vertType);
int stride = dec->VertexSize();
int offset = dec->posoff;
switch (vertType & GE_VTYPE_POS_MASK) {
case GE_VTYPE_POS_FLOAT:
{
for (int i = 0; i < vertexCount; i++) {
memcpy(&verts[i * 3], (const u8 *)vdata + stride * i + offset, sizeof(float) * 3);
}
break;
}
default:
_dbg_assert_(false);
}

bool allOutsideLeft = true;
bool allOutsideTop = true;
bool allOutsideRight = true;
bool allOutsideBottom = true;
const float left = gstate.getScissorX1();
const float top = gstate.getScissorY1();
const float right = gstate.getScissorX2();
const float bottom = gstate.getScissorY2();
for (int i = 0; i < vertexCount; i++) {
const float *pos = verts + i * 3;
float x = pos[0];
float y = pos[1];
if (x >= left) {
allOutsideLeft = false;
}
if (x <= right) {
allOutsideRight = false;
}
if (y >= top) {
allOutsideTop = false;
}
if (y <= bottom) {
allOutsideBottom = false;
}
}
if (allOutsideLeft || allOutsideTop || allOutsideRight || allOutsideBottom) {
return false;
}
return true;
}

// TODO: This probably is not the best interface.
bool DrawEngineCommon::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) {
// This is always for the current vertices.
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class DrawEngineCommon {
// This is a less accurate version of TestBoundingBox, but faster. Can have more false positives.
// Doesn't support indexing.
bool TestBoundingBoxFast(const void *control_points, int vertexCount, u32 vertType);
bool TestBoundingBoxThrough(const void *vdata, int vertexCount, u32 vertType);

void FlushSkin() {
bool applySkin = (lastVType_ & GE_VTYPE_WEIGHT_MASK) && decOptions_.applySkinInDecode;
Expand Down
8 changes: 5 additions & 3 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3159,12 +3159,14 @@ void TextureCacheCommon::DrawImGuiDebug(uint64_t &selectedTextureId) const {

if (ImGui::CollapsingHeader("Texture Cache State"), ImGuiTreeNodeFlags_DefaultOpen) {
ImGui::Text("Cache: %d textures, size est %d", (int)cache_.size(), cacheSizeEstimate_);
ImGui::Text("Second: %d textures, size est %d", (int)secondCache_.size(), secondCacheSizeEstimate_);
ImGui::Text("Low memory mode: %d", (int)lowMemoryMode_);
if (!secondCache_.empty()) {
ImGui::Text("Second: %d textures, size est %d", (int)secondCache_.size(), secondCacheSizeEstimate_);
}
ImGui::Text("Standard/shader scale factor: %d/%d", standardScaleFactor_, shaderScaleFactor_);
ImGui::Text("Texels scaled this frame: %d", texelsScaledThisFrame_);
ImGui::Text("Low memory mode: %d", (int)lowMemoryMode_);
if (ImGui::CollapsingHeader("Texture Replacement", ImGuiTreeNodeFlags_DefaultOpen)) {
ImGui::Text("Frame time/budget: %0.3f/%0.3f ms", replacementTimeThisFrame_, replacementFrameBudget_);
ImGui::Text("Frame time/budget: %0.3f/%0.3f ms", replacementTimeThisFrame_ * 1000.0f, replacementFrameBudget_ * 1000.0f);
ImGui::Text("UNLOADED: %d PENDING: %d NOT_FOUND: %d ACTIVE: %d CANCEL_INIT: %d",
replacementStateCounts[(int)ReplacementState::UNLOADED],
replacementStateCounts[(int)ReplacementState::PENDING],
Expand Down
12 changes: 12 additions & 0 deletions GPU/GPUCommonHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,17 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {

uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);

// Through mode early-out for simple float 2D draws, like in Fate Extra CCC (very beneficial there due to avoiding texture loads)
if ((vertexType & (GE_VTYPE_THROUGH_MASK | GE_VTYPE_POS_MASK | GE_VTYPE_IDX_MASK)) == (GE_VTYPE_THROUGH_MASK | GE_VTYPE_POS_FLOAT | GE_VTYPE_IDX_NONE)) {
if (!drawEngineCommon_->TestBoundingBoxThrough(verts, count, vertexType)) {
gpuStats.numCulledDraws++;
int cycles = vertexCost_ * count;
gpuStats.vertexGPUCycles += cycles;
cyclesExecuted += cycles;
return;
}
}

#define MAX_CULL_CHECK_COUNT 6

// For now, turn off culling on platforms where we don't have SIMD bounding box tests, like RISC-V.
Expand Down Expand Up @@ -1039,6 +1050,7 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.
AdvanceVerts(vertexType, count, bytesRead);

int totalVertCount = count;

// PRIMs are often followed by more PRIMs. Save some work and submit them immediately.
Expand Down
Loading