diff --git a/com/danitheskunk/skunkworks/backends/gl/FilterFXAA.java b/com/danitheskunk/skunkworks/backends/gl/FilterFXAA.java new file mode 100644 index 0000000..b56453e --- /dev/null +++ b/com/danitheskunk/skunkworks/backends/gl/FilterFXAA.java @@ -0,0 +1,318 @@ +package com.danitheskunk.skunkworks.backends.gl; + +import com.danitheskunk.skunkworks.backends.gl.Filter; +import com.danitheskunk.skunkworks.backends.gl.Framebuffer; +import com.danitheskunk.skunkworks.backends.gl.Program; +import com.danitheskunk.skunkworks.backends.gl.ProgramCopy; + +import static org.lwjgl.opengl.GL11.*; +import static org.lwjgl.opengl.GL20.glUniform2f; + +public class FilterFXAA extends Filter { + private static String vertexSource = """ + layout(location = 0) in vec2 pos; + layout(location = 1) out vec2 out_texCoord; + void main() { + gl_Position = vec4(pos, 0.0f, 1.0f); + out_texCoord = vec2(pos.x * 0.5 + 0.5, pos.y * 0.5 + 0.5); + } + """; + private static String fragmentSource = """ + #if __VERSION__ >= 130 + #define COMPAT_VARYING in + #define COMPAT_TEXTURE texture + out vec4 FragColor; + #else + #define COMPAT_VARYING varying + #define COMPAT_TEXTURE texture2D + #define FragColor gl_FragColor + #endif + + #ifdef GL_ES + #ifdef GL_FRAGMENT_PRECISION_HIGH + precision highp float; + #else + precision mediump float; + #endif + #define COMPAT_PRECISION mediump + #else + #define COMPAT_PRECISION + #endif + + uniform COMPAT_PRECISION int FrameDirection; + uniform COMPAT_PRECISION int FrameCount; + uniform COMPAT_PRECISION vec2 OutputSize; + uniform COMPAT_PRECISION vec2 TextureSize; + uniform COMPAT_PRECISION vec2 InputSize; + layout(binding = 0) uniform sampler2D Texture; + layout(location = 1) COMPAT_VARYING vec2 TEX0; + + // compatibility #defines + #define Source Texture + #define vTexCoord TEX0 + #define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize + #define outsize vec4(OutputSize, 1.0 / OutputSize) + + + + /* + FXAA_PRESET - Choose compile-in knob preset 0-5. + ------------------------------------------------------------------------------ + FXAA_EDGE_THRESHOLD - The minimum amount of local contrast required + to apply algorithm. + 1.0/3.0 - too little + 1.0/4.0 - good start + 1.0/8.0 - applies to more edges + 1.0/16.0 - overkill + ------------------------------------------------------------------------------ + FXAA_EDGE_THRESHOLD_MIN - Trims the algorithm from processing darks. + Perf optimization. + 1.0/32.0 - visible limit (smaller isn't visible) + 1.0/16.0 - good compromise + 1.0/12.0 - upper limit (seeing artifacts) + ------------------------------------------------------------------------------ + FXAA_SEARCH_STEPS - Maximum number of search steps for end of span. + ------------------------------------------------------------------------------ + FXAA_SEARCH_THRESHOLD - Controls when to stop searching. + 1.0/4.0 - seems to be the best quality wise + ------------------------------------------------------------------------------ + FXAA_SUBPIX_TRIM - Controls sub-pixel aliasing removal. + 1.0/2.0 - low removal + 1.0/3.0 - medium removal + 1.0/4.0 - default removal + 1.0/8.0 - high removal + 0.0 - complete removal + ------------------------------------------------------------------------------ + FXAA_SUBPIX_CAP - Insures fine detail is not completely removed. + This is important for the transition of sub-pixel detail, + like fences and wires. + 3.0/4.0 - default (medium amount of filtering) + 7.0/8.0 - high amount of filtering + 1.0 - no capping of sub-pixel aliasing removal + */ + + #ifndef FXAA_PRESET + #define FXAA_PRESET 5 + #endif + #if (FXAA_PRESET == 3) + #define FXAA_EDGE_THRESHOLD (1.0/8.0) + #define FXAA_EDGE_THRESHOLD_MIN (1.0/16.0) + #define FXAA_SEARCH_STEPS 16 + #define FXAA_SEARCH_THRESHOLD (1.0/4.0) + #define FXAA_SUBPIX_CAP (3.0/4.0) + #define FXAA_SUBPIX_TRIM (1.0/4.0) + #endif + #if (FXAA_PRESET == 4) + #define FXAA_EDGE_THRESHOLD (1.0/8.0) + #define FXAA_EDGE_THRESHOLD_MIN (1.0/24.0) + #define FXAA_SEARCH_STEPS 24 + #define FXAA_SEARCH_THRESHOLD (1.0/4.0) + #define FXAA_SUBPIX_CAP (3.0/4.0) + #define FXAA_SUBPIX_TRIM (1.0/4.0) + #endif + #if (FXAA_PRESET == 5) + #define FXAA_EDGE_THRESHOLD (1.0/8.0) + #define FXAA_EDGE_THRESHOLD_MIN (1.0/24.0) + #define FXAA_SEARCH_STEPS 32 + #define FXAA_SEARCH_THRESHOLD (1.0/4.0) + #define FXAA_SUBPIX_CAP (3.0/4.0) + #define FXAA_SUBPIX_TRIM (1.0/4.0) + #endif + + #define FXAA_SUBPIX_TRIM_SCALE (1.0/(1.0 - FXAA_SUBPIX_TRIM)) + + // Return the luma, the estimation of luminance from rgb inputs. + // This approximates luma using one FMA instruction, + // skipping normalization and tossing out blue. + // FxaaLuma() will range 0.0 to 2.963210702. + COMPAT_PRECISION float FxaaLuma(vec3 rgb) { + return rgb.y * (0.587/0.299) + rgb.x; + } + + vec3 FxaaLerp3(vec3 a, vec3 b, float amountOfA) { + return (vec3(-amountOfA) * b) + ((a * vec3(amountOfA)) + b); + } + + vec4 FxaaTexOff(sampler2D tex, vec2 pos, ivec2 off, vec2 rcpFrame) { + float x = pos.x + float(off.x) * rcpFrame.x; + float y = pos.y + float(off.y) * rcpFrame.y; + return COMPAT_TEXTURE(tex, vec2(x, y)); + } + + // pos is the output of FxaaVertexShader interpolated across screen. + // xy -> actual texture position {0.0 to 1.0} + // rcpFrame should be a uniform equal to {1.0/frameWidth, 1.0/frameHeight} + vec3 FxaaPixelShader(vec2 pos, sampler2D tex, vec2 rcpFrame) + { + vec3 rgbN = FxaaTexOff(tex, pos.xy, ivec2( 0,-1), rcpFrame).xyz; + vec3 rgbW = FxaaTexOff(tex, pos.xy, ivec2(-1, 0), rcpFrame).xyz; + vec3 rgbM = FxaaTexOff(tex, pos.xy, ivec2( 0, 0), rcpFrame).xyz; + vec3 rgbE = FxaaTexOff(tex, pos.xy, ivec2( 1, 0), rcpFrame).xyz; + vec3 rgbS = FxaaTexOff(tex, pos.xy, ivec2( 0, 1), rcpFrame).xyz; + + float lumaN = FxaaLuma(rgbN); + float lumaW = FxaaLuma(rgbW); + float lumaM = FxaaLuma(rgbM); + float lumaE = FxaaLuma(rgbE); + float lumaS = FxaaLuma(rgbS); + float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE))); + float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE))); + + float range = rangeMax - rangeMin; + if(range < max(FXAA_EDGE_THRESHOLD_MIN, rangeMax * FXAA_EDGE_THRESHOLD)) + { + return rgbM; + } + + vec3 rgbL = rgbN + rgbW + rgbM + rgbE + rgbS; + + float lumaL = (lumaN + lumaW + lumaE + lumaS) * 0.25; + float rangeL = abs(lumaL - lumaM); + float blendL = max(0.0, (rangeL / range) - FXAA_SUBPIX_TRIM) * FXAA_SUBPIX_TRIM_SCALE; + blendL = min(FXAA_SUBPIX_CAP, blendL); + + vec3 rgbNW = FxaaTexOff(tex, pos.xy, ivec2(-1,-1), rcpFrame).xyz; + vec3 rgbNE = FxaaTexOff(tex, pos.xy, ivec2( 1,-1), rcpFrame).xyz; + vec3 rgbSW = FxaaTexOff(tex, pos.xy, ivec2(-1, 1), rcpFrame).xyz; + vec3 rgbSE = FxaaTexOff(tex, pos.xy, ivec2( 1, 1), rcpFrame).xyz; + rgbL += (rgbNW + rgbNE + rgbSW + rgbSE); + rgbL *= vec3(1.0/9.0); + + float lumaNW = FxaaLuma(rgbNW); + float lumaNE = FxaaLuma(rgbNE); + float lumaSW = FxaaLuma(rgbSW); + float lumaSE = FxaaLuma(rgbSE); + + float edgeVert = + abs((0.25 * lumaNW) + (-0.5 * lumaN) + (0.25 * lumaNE)) + + abs((0.50 * lumaW ) + (-1.0 * lumaM) + (0.50 * lumaE )) + + abs((0.25 * lumaSW) + (-0.5 * lumaS) + (0.25 * lumaSE)); + float edgeHorz = + abs((0.25 * lumaNW) + (-0.5 * lumaW) + (0.25 * lumaSW)) + + abs((0.50 * lumaN ) + (-1.0 * lumaM) + (0.50 * lumaS )) + + abs((0.25 * lumaNE) + (-0.5 * lumaE) + (0.25 * lumaSE)); + + bool horzSpan = edgeHorz >= edgeVert; + float lengthSign = horzSpan ? -rcpFrame.y : -rcpFrame.x; + + if(!horzSpan) + { + lumaN = lumaW; + lumaS = lumaE; + } + + float gradientN = abs(lumaN - lumaM); + float gradientS = abs(lumaS - lumaM); + lumaN = (lumaN + lumaM) * 0.5; + lumaS = (lumaS + lumaM) * 0.5; + + if (gradientN < gradientS) + { + lumaN = lumaS; + lumaN = lumaS; + gradientN = gradientS; + lengthSign *= -1.0; + } + + vec2 posN; + posN.x = pos.x + (horzSpan ? 0.0 : lengthSign * 0.5); + posN.y = pos.y + (horzSpan ? lengthSign * 0.5 : 0.0); + + gradientN *= FXAA_SEARCH_THRESHOLD; + + vec2 posP = posN; + vec2 offNP = horzSpan ? vec2(rcpFrame.x, 0.0) : vec2(0.0, rcpFrame.y); + float lumaEndN = lumaN; + float lumaEndP = lumaN; + bool doneN = false; + bool doneP = false; + posN += offNP * vec2(-1.0, -1.0); + posP += offNP * vec2( 1.0, 1.0); + + for(int i = 0; i < FXAA_SEARCH_STEPS; i++) { + if(!doneN) + { + lumaEndN = FxaaLuma(COMPAT_TEXTURE(tex, posN.xy).xyz); + } + if(!doneP) + { + lumaEndP = FxaaLuma(COMPAT_TEXTURE(tex, posP.xy).xyz); + } + + doneN = doneN || (abs(lumaEndN - lumaN) >= gradientN); + doneP = doneP || (abs(lumaEndP - lumaN) >= gradientN); + + if(doneN && doneP) + { + break; + } + if(!doneN) + { + posN -= offNP; + } + if(!doneP) + { + posP += offNP; + } + } + + float dstN = horzSpan ? pos.x - posN.x : pos.y - posN.y; + float dstP = horzSpan ? posP.x - pos.x : posP.y - pos.y; + bool directionN = dstN < dstP; + lumaEndN = directionN ? lumaEndN : lumaEndP; + + if(((lumaM - lumaN) < 0.0) == ((lumaEndN - lumaN) < 0.0)) + { + lengthSign = 0.0; + } + + + float spanLength = (dstP + dstN); + dstN = directionN ? dstN : dstP; + float subPixelOffset = (0.5 + (dstN * (-1.0/spanLength))) * lengthSign; + vec3 rgbF = COMPAT_TEXTURE(tex, vec2( + pos.x + (horzSpan ? 0.0 : subPixelOffset), + pos.y + (horzSpan ? subPixelOffset : 0.0))).xyz; + return FxaaLerp3(rgbL, rgbF, blendL); + } + + void main() + { + FragColor = vec4(FxaaPixelShader(vTexCoord, Source, vec2(SourceSize.z, SourceSize.w)), 1.0) * 1.0; + } + """; + + private Program program; + + public FilterFXAA(Framebuffer input) { + super(input, false); + program = new Program(vertexSource, fragmentSource); + } + + @Override + protected void process() { + program.use(); + //ProgramCopy.getInstance().use(); + + var size = input.getSize(); + glUniform2f(program.getUniformLocation("OutputSize"), size.getX(), size.getY()); + glUniform2f(program.getUniformLocation("TextureSize"), size.getX(), size.getY()); + glUniform2f(program.getUniformLocation("InputSize"), size.getX(), size.getY()); + + + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + + glBegin(GL_TRIANGLES); + //counterclockwise triangles + glVertex2i(-1, 1); + glVertex2i( 1, -1); + glVertex2i(-1, -1); + + glVertex2i( 1, -1); + glVertex2i(-1, 1); + glVertex2i( 1, 1); + glEnd(); + } +} diff --git a/com/danitheskunk/skunkworks/backends/gl/Window.java b/com/danitheskunk/skunkworks/backends/gl/Window.java index fc300a7..d6dfe14 100644 --- a/com/danitheskunk/skunkworks/backends/gl/Window.java +++ b/com/danitheskunk/skunkworks/backends/gl/Window.java @@ -2,10 +2,8 @@ package com.danitheskunk.skunkworks.backends.gl; import com.danitheskunk.skunkworks.*; import com.danitheskunk.skunkworks.gfx.IPipeline; -import com.danitheskunk.skunkworks.gfx.IRenderContext; import com.danitheskunk.skunkworks.gfx.ITexture; import com.danitheskunk.skunkworks.gfx.Image; -import com.danitheskunk.skunkworks.gfx.threedee.IRenderContext3D; import org.lwjgl.glfw.GLFWErrorCallback; import org.lwjgl.opengl.GL; import org.lwjgl.opengl.GL11; @@ -31,6 +29,7 @@ public class Window extends BaseWindow { private final Pipeline2D pipeline2D; private final Pipeline3D pipeline3D; private final Scaler scaler; + private final FilterFXAA fxaa; public Window(Vec2i size, String title, Engine engine) { super(engine); @@ -70,7 +69,8 @@ public class Window extends BaseWindow { pipeline2D = new Pipeline2D(framebuffer, textureAtlas); pipeline3D = new Pipeline3D(framebuffer, textureAtlas); - scaler = new Scaler(framebuffer); + fxaa = new FilterFXAA(framebuffer); + scaler = new Scaler(fxaa.getOutput()); shouldClose = false; @@ -200,6 +200,7 @@ public class Window extends BaseWindow { @Override public void runScaler() { + fxaa.apply(); scaler.setStretchMode(stretchMode); scaler.apply(); scaler.getOutput().copyToScreen();