Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Common/Math/lin/matrix4x4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ void Matrix4x4::setViewFrame(const Vec3 &pos, const Vec3 &vRight, const Vec3 &vV
yx = vRight.y; yy = vUp.y; yz=vView.y; yw = 0.0f;
zx = vRight.z; zy = vUp.z; zz=vView.z; zw = 0.0f;

wx = -pos * vRight;
wy = -pos * vUp;
wz = -pos * vView;
wx = dot(-pos, vRight);
wy = dot(-pos, vUp);
wz = dot(-pos, vView);
ww = 1.0f;
}

Expand Down
1 change: 0 additions & 1 deletion Common/Math/lin/vec3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ Vec3 Vec3::operator *(const Matrix4x4 &m) const {
x*m.xy + y*m.yy + z*m.zy + m.wy,
x*m.xz + y*m.yz + z*m.zz + m.wz);
}

Vec3 Vec3::rotatedBy(const Matrix4x4 &m) const {
return Vec3(x*m.xx + y*m.yx + z*m.zx,
x*m.xy + y*m.yy + z*m.zy,
Expand Down
21 changes: 8 additions & 13 deletions Common/Math/lin/vec3.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class Vec3 {
x+=other.x; y+=other.y; z+=other.z;
}
Vec3 operator -(const Vec3 &v) const {
return Vec3(x-v.x,y-v.y,z-v.z);
return Vec3(x-v.x, y-v.y, z-v.z);
}
void operator -= (const Vec3 &other)
{
Expand All @@ -48,9 +48,8 @@ class Vec3 {
Vec3 operator -() const {
return Vec3(-x,-y,-z);
}

Vec3 operator * (const float f) const {
return Vec3(x*f,y*f,z*f);
Vec3 operator *(const float f) const {
return Vec3(x * f, y * f, z * f);
}
Vec3 operator / (const float f) const {
float invf = (1.0f/f);
Expand All @@ -60,9 +59,6 @@ class Vec3 {
{
*this = *this / f;
}
float operator * (const Vec3 &other) const {
return x*other.x + y*other.y + z*other.z;
}
void operator *= (const float f) {
*this = *this * f;
}
Expand All @@ -72,9 +68,6 @@ class Vec3 {
Vec3 scaledBy(const Vec3 &other) const {
return Vec3(x*other.x, y*other.y, z*other.z);
}
Vec3 scaledByInv(const Vec3 &other) const {
return Vec3(x/other.x, y/other.y, z/other.z);
}
Vec3 operator *(const Matrix4x4 &m) const;
void operator *=(const Matrix4x4 &m) {
*this = *this * m;
Expand All @@ -90,7 +83,7 @@ class Vec3 {
return sqrtf(length2());
}
void setLength(const float l) {
(*this) *= l/length();
(*this) *= l / length();
}
Vec3 withLength(const float l) const {
return (*this) * l / length();
Expand All @@ -116,11 +109,13 @@ class Vec3 {
return (*this)*(1-t) + other*t;
}
void setZero() {
memset((void *)this,0,sizeof(float)*3);
x = 0.0f;
y = 0.0f;
z = 0.0f;
}
};

inline Vec3 operator * (const float f, const Vec3 &v) {return v * f;}
inline Vec3 operator * (const float f, const Vec3 &v) { return v * f; }

// In new code, prefer these to the operators.

Expand Down
190 changes: 23 additions & 167 deletions GPU/Common/GPUStateUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,12 +611,6 @@ void ConvertViewportAndScissor(const DisplayLayoutConfig &config, bool useBuffer
renderHeightFactor = renderHeight / 272.0f;
}

// We take care negative offsets of in the projection matrix.
// These come from split framebuffers (Killzone).
// TODO: Might be safe to do get rid of this here and do the same for positive offsets?
renderX = std::max(gstate_c.curRTOffsetX, 0);
renderY = std::max(gstate_c.curRTOffsetY, 0);

// Scissor
int scissorX1 = gstate.getScissorX1();
int scissorY1 = gstate.getScissorY1();
Expand All @@ -629,8 +623,8 @@ void ConvertViewportAndScissor(const DisplayLayoutConfig &config, bool useBuffer
out.scissorW = 0;
out.scissorH = 0;
} else {
out.scissorX = (renderX * renderWidthFactor) + displayOffsetX + scissorX1 * renderWidthFactor;
out.scissorY = (renderY * renderHeightFactor) + displayOffsetY + scissorY1 * renderHeightFactor;
out.scissorX = displayOffsetX + scissorX1 * renderWidthFactor;
out.scissorY = displayOffsetY + scissorY1 * renderHeightFactor;
out.scissorW = (scissorX2 - scissorX1) * renderWidthFactor;
out.scissorH = (scissorY2 - scissorY1) * renderHeightFactor;
}
Expand All @@ -643,165 +637,27 @@ void ConvertViewportAndScissor(const DisplayLayoutConfig &config, bool useBuffer

DepthScaleFactors depthScale = GetDepthScaleFactors(gstate_c.UseFlags());

if (out.throughMode) {
// If renderX/renderY are offset to compensate for a split framebuffer,
// applying the offset to the viewport isn't enough, since the viewport clips.
// We need to apply either directly to the vertices, or to the "through" projection matrix.
out.viewportX = renderX * renderWidthFactor + displayOffsetX;
out.viewportY = renderY * renderHeightFactor + displayOffsetY;
out.viewportW = curRTWidth * renderWidthFactor;
out.viewportH = curRTHeight * renderHeightFactor;
out.depthRangeMin = depthScale.EncodeFromU16(0.0f);
out.depthRangeMax = depthScale.EncodeFromU16(65536.0f);
} else {
// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
float vpXScale = gstate.getViewportXScale();
float vpXCenter = gstate.getViewportXCenter();
float vpYScale = gstate.getViewportYScale();
float vpYCenter = gstate.getViewportYCenter();

// The viewport transform appears to go like this:
// Xscreen = -offsetX + vpXCenter + vpXScale * Xview
// Yscreen = -offsetY + vpYCenter + vpYScale * Yview
// Zscreen = vpZCenter + vpZScale * Zview

// The viewport is normally centered at 2048,2048 but can also be centered at other locations.
// Offset is subtracted from the viewport center and is also set to values in those ranges, and is set so that the viewport will cover
// the desired screen area ([0-480)x[0-272)), so 1808,1912.

// This means that to get the analogue glViewport we must:
float vpX0 = vpXCenter - offsetX - fabsf(vpXScale);
float vpY0 = vpYCenter - offsetY - fabsf(vpYScale);
gstate_c.vpWidth = vpXScale * 2.0f;
gstate_c.vpHeight = vpYScale * 2.0f;

float vpWidth = fabsf(gstate_c.vpWidth);
float vpHeight = fabsf(gstate_c.vpHeight);

float left = renderX + vpX0;
float top = renderY + vpY0;
float right = left + vpWidth;
float bottom = top + vpHeight;

out.widthScale = 1.0f;
out.xOffset = 0.0f;
out.heightScale = 1.0f;
out.yOffset = 0.0f;

// If we're within the bounds, we want clipping the viewport way. So leave it be.
{
float overageLeft = std::max(-left, 0.0f);
float overageRight = std::max(right - bufferWidth, 0.0f);

// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.
if (right < scissorX2) {
overageRight -= scissorX2 - right;
}
if (left > scissorX1) {
overageLeft += scissorX1 - left;
}

// Our center drifted by the difference in overages.
float drift = overageRight - overageLeft;

if (overageLeft != 0.0f || overageRight != 0.0f) {
left += overageLeft;
right -= overageRight;

// Protect against the viewport being entirely outside the scissor.
// Emit a tiny but valid viewport. Really, we should probably emit a flag to ignore draws.
if (right <= left) {
right = left + 1.0f;
}

out.widthScale = vpWidth / (right - left);
out.xOffset = drift / (right - left);
}
}

{
float overageTop = std::max(-top, 0.0f);
float overageBottom = std::max(bottom - bufferHeight, 0.0f);

// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.
if (bottom < scissorY2) {
overageBottom -= scissorY2 - bottom;
}
if (top > scissorY1) {
overageTop += scissorY1 - top;
}
// Our center drifted by the difference in overages.
float drift = overageBottom - overageTop;

if (overageTop != 0.0f || overageBottom != 0.0f) {
top += overageTop;
bottom -= overageBottom;

// Protect against the viewport being entirely outside the scissor.
// Emit a tiny but valid viewport. Really, we should probably emit a flag to ignore draws.
if (bottom <= top) {
bottom = top + 1.0f;
}

out.heightScale = vpHeight / (bottom - top);
out.yOffset = drift / (bottom - top);
}
}

out.viewportX = left * renderWidthFactor + displayOffsetX;
out.viewportY = top * renderHeightFactor + displayOffsetY;
out.viewportW = (right - left) * renderWidthFactor;
out.viewportH = (bottom - top) * renderHeightFactor;

// The depth viewport parameters are the same, but we handle it a bit differently.
// When clipping is enabled, depth is clamped to [0, 65535]. And minz/maxz discard.
// So, we apply the depth range as minz/maxz, and transform for the viewport.
float vpZScale = gstate.getViewportZScale();
float vpZCenter = gstate.getViewportZCenter();
// TODO: This clip the entire draw if minz > maxz.
float minz = gstate.getDepthRangeMin();
float maxz = gstate.getDepthRangeMax();

if (gstate.isDepthClampEnabled() && (minz == 0 || maxz == 65535)) {
// Here, we should "clamp." But clamping per fragment would be slow.
// So, instead, we just increase the available range and hope.
// If depthSliceFactor is 4, it means (75% / 2) of the depth lies in each direction.
float fullDepthRange = 65535.0f * (depthScale.Scale() - 1.0f) * (1.0f / 2.0f);
if (minz == 0) {
minz -= fullDepthRange;
}
if (maxz == 65535) {
maxz += fullDepthRange;
}
} else if (maxz == 65535) {
// This means clamp isn't enabled, but we still want to allow values up to 65535.99.
// If DepthSliceFactor() is 1.0, though, this would make out.depthRangeMax exceed 1.
// Since that would clamp, it would make Z=1234 not match between draws when maxz changes.
if (depthScale.Scale() > 1.0f)
maxz = 65535.99f;
}

// Okay. So, in our shader, -1 will map to minz, and +1 will map to maxz.
float halfActualZRange = (maxz - minz) * (1.0f / 2.0f);
out.depthScale = halfActualZRange < std::numeric_limits<float>::epsilon() ? 1.0f : vpZScale / halfActualZRange;
// This adjusts the center from halfActualZRange to vpZCenter.
out.zOffset = halfActualZRange < std::numeric_limits<float>::epsilon() ? 0.0f : (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;

if (!gstate_c.Use(GPU_USE_ACCURATE_DEPTH)) {
out.depthScale = 1.0f;
out.zOffset = 0.0f;
out.depthRangeMin = depthScale.EncodeFromU16(vpZCenter - vpZScale);
out.depthRangeMax = depthScale.EncodeFromU16(vpZCenter + vpZScale);
} else {
out.depthRangeMin = depthScale.EncodeFromU16(minz);
out.depthRangeMax = depthScale.EncodeFromU16(maxz);
}

// OpenGL will clamp these for us anyway, and Direct3D will error if not clamped.
// Of course, if this happens we've skewed out.depthScale/out.zOffset and may get z-fighting.
out.depthRangeMin = std::max(out.depthRangeMin, 0.0f);
out.depthRangeMax = std::min(out.depthRangeMax, 1.0f);
}
// If renderX/renderY are offset to compensate for a split framebuffer,
// applying the offset to the viewport isn't enough, since the viewport clips.
// We need to apply either directly to the vertices, or to the "through" projection matrix.
out.viewportX = displayOffsetX;
out.viewportY = displayOffsetY;
out.viewportW = curRTWidth * renderWidthFactor;
out.viewportH = curRTHeight * renderHeightFactor;
// TODO: This clip the entire draw if minz > maxz.
float minz = gstate.getDepthRangeMin();
float maxz = gstate.getDepthRangeMax();

out.depthRangeMin = depthScale.EncodeFromU16(minz);
out.depthRangeMax = depthScale.EncodeFromU16(maxz);

// Not really using these now.
out.widthScale = 1.0f;
out.heightScale = 1.0f;
out.depthScale = 1.0f;
out.zOffset = 0.0f;
out.xOffset = 0.0f;
out.yOffset = 0.0f;
}

void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor) {
Expand Down
9 changes: 7 additions & 2 deletions GPU/Common/ShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ enum : uint64_t {
DIRTY_UVSCALEOFFSET = 1ULL << 18,
DIRTY_DEPTHRANGE = 1ULL << 19,

DIRTY_VIEWPORT_UNIFORMS = 1ULL << 20,
// Free bit 20!

DIRTY_WORLDMATRIX = 1ULL << 21,
DIRTY_VIEWMATRIX = 1ULL << 22,
DIRTY_TEXMATRIX = 1ULL << 23,
Expand All @@ -93,12 +96,14 @@ enum : uint64_t {
DIRTY_LIGHT_CONTROL = 1ULL << 38,
DIRTY_TEX_ALPHA_MUL = 1ULL << 39,

// Bits 40-42 are free for new uniforms. Then we're really out and need to start merging.
DIRTY_RASTER_OFFSET = 1ULL << 40,

// Bits 41-42 are free for new uniforms (although the mask below needs updating). Then we're really out and need to start merging.
// Don't forget to update DIRTY_ALL_UNIFORMS when you start using them.

DIRTY_BONE_UNIFORMS = 0xFF000000ULL,

DIRTY_ALL_UNIFORMS = 0x0FFFFFFFFFFULL,
DIRTY_ALL_UNIFORMS = 0x1FFFFFFFFFFULL,

// Other dirty elements that aren't uniforms
DIRTY_CULL_PLANES = 1ULL << 43,
Expand Down
Loading
Loading