thanks to @Crazyblox for the cam angle, twice color vibrant and preprocess.
so peak ![]()
(running on notebook)
shader
--!native
--!optimize 2
type f64 = number
type vec = vector
local S = require(`../../../../`)()
local vec3 = vector.create
local vec_0, vec_1 = vec3(0,0,0), vec3(1,1,1)
local sin, cos, abs, floor, min, max, clamp, sqrt, exp, pow, atan2, log =
math.sin, math.cos, math.abs, math.floor, math.min, math.max, math.clamp, math.sqrt, math.exp, math.pow, math.atan2, math.log
local dot, cross, normalize, magnitude =
vector.dot, vector.cross, vector.normalize, vector.magnitude
local PI: f64 = math.pi
local ro: vec = vec3(0.0, 2.0, 14.0)
local lookAt: vec = vec3(0.0, 0.0, 0.0)
local fwd: vec = vec3(0,0,-1)
local right: vec = vec3(1,0,0)
local up: vec = vec3(0,1,0)
local vec_up: vec = vec3(0,1,0)
local iRes: vec = vec3(S.Display_Res.x, S.Display_Res.y, 0)
local u32B: number = 4
local function vec3_buffer(b: buffer, i: number, v: vector)
v = v * 0.6 -- exposure
local a = 2.51
local b_coeff = vec3(0.0762, 0.0762, 0.0762) -- 2.54 * 0.03
local c_val = 2.43
local d_coeff = vec3(0.59, 0.59, 0.59)
local e_coeff = vec3(0.14, 0.14, 0.14)
-- (x * (a * x + b)) / (x * (c * x + d) + e)
local num = v * (v * a + b_coeff)
local den = v * (v * c_val + d_coeff) + e_coeff
local v_mapped = num / den
v = vector.clamp(v_mapped, vec_0, vec_1) * 255
buffer.writeu32(b, i, v.x + bit32.lshift(v.y,8) + bit32.lshift(v.z,16) + 0xFF000000)
end
local function mix(a: vec, b: vec, t: f64): vec
return vector.lerp(a, b, t)
end
local function fract(x: f64): f64
return x - floor(x)
end
local function smoothstep(edge0: f64, edge1: f64, x: f64): f64
local t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0)
return t * t * (3.0 - 2.0 * t)
end
local function hash12(x: f64, y: f64): f64
local vx = fract(x * 0.1031)
local vy = fract(y * 0.1031)
local dot_val = vx * (vy + 33.33) + vy * (vx + 33.33)
return fract((vx + vy) * dot_val)
end
local function lerp(a, b, t)
return a + (b - a) * t
end
local function noise(x: f64, y: f64): f64
local i_x = floor(x)
local i_y = floor(y)
local f_x = fract(x)
local f_y = fract(y)
local u_x = f_x * f_x * (3.0 - 2.0 * f_x)
local u_y = f_y * f_y * (3.0 - 2.0 * f_y)
local a = hash12(i_x, i_y)
local b = hash12(i_x + 1.0, i_y)
local c = hash12(i_x, i_y + 1.0)
local d = hash12(i_x + 1.0, i_y + 1.0)
return lerp(lerp(a, b, u_x), lerp(c, d, u_x), u_y)
end
local function rotateY(v: vec, a: f64): vec
local c, s = cos(a), sin(a)
return vec3(v.x*c + v.z*s, v.y, -v.x*s + v.z*c)
end
local function rotateX(v: vec, a: f64): vec
local c, s = cos(a), sin(a)
return vec3(v.x, v.y*c - v.z*s, v.y*s + v.z*c)
end
local function blackbody(temp: f64): vec
local t = temp * 3.0
local r = clamp(t * 1.5, 0.0, 1.0) + 0.3 * exp(-pow(t - 3.5, 2.0))
local g = clamp(t - 0.8, 0.0, 1.0)
local b = clamp(t - 1.8, 0.0, 1.0)
local col = vec3(r, g, b)
col *= col
local orange = vec3(1.0, 0.6, 0.1)
local mix_amt = smoothstep(0.0, 0.5, abs(temp - 0.4))
return mix(col, orange, mix_amt * 0.5)
end
local function skyColor(rd: vec): vec
local bg = vec3(0.005, 0.005, 0.01)
local scaled = rd * 350.0
local sid = vec3(floor(scaled.x), floor(scaled.y), floor(scaled.z))
local h = hash12(sid.x + sid.y * 113.0, sid.z * 57.0)
local star = 0.0
if h > 0.992 then
local b = smoothstep(0.992, 1.0, h)
star = b * 3.0
end
return bg + vec3(star, star, star)
end
local function traceBlackHole(ro0: vec, rd0: vec, iTime: f64): vec
local BH_MASS = 1.0
local RS = 2.0 * BH_MASS
local PHOTON_SPHERE = 1.5 * RS
local ACCRETION_IN = 3.0 * RS
local ACCRETION_OUT = 14.0 * RS
local diskTilt = 0.3 -- radians
local cosT, sinT = cos(diskTilt), sin(diskTilt)
local p = ro0
local v = rd0
local col = vec_0
local trans = 1.0 -- transmittance
local min_dist = 1e9
local iter = 0
local max_iter = 300 -- steps
local step_size = 0.15
local escape_limit = 40.0 * RS
local function toDiskSpace(pt: vec): vec
return vec3(pt.x, pt.y * cosT - pt.z * sinT, pt.y * sinT + pt.z * cosT)
end
while iter < max_iter do
iter += 1
local r2 = dot(p, p)
local r = sqrt(r2)
min_dist = min(min_dist, r)
if r < RS * 1.01 then
trans = 0.0
break
end
if r > escape_limit then
col += skyColor(v) * trans
break
end
--a = -1.5 * Rs * (h^2 / r^5) * r_vec
--where h = |r x v| (angular momentum per unit mass)
local crossP = cross(p, v)
local h2 = dot(crossP, crossP) -- magnitude squared of angular momentum
--divide by r^5. Since we have r2 (r^2), r^5 = r2 * r2 * r
local r5 = r2 * r2 * r
local geodesicTerm = -1.5 * RS * h2 / max(r5, 1e-4)
local acc = p * geodesicTerm
local dt = step_size * r * 0.12
dt = max(dt, 0.005)
v = normalize(v + acc * dt)
p += v * dt
local pd = toDiskSpace(p)
local diskH = abs(pd.y)
local diskR = sqrt(pd.x*pd.x + pd.z*pd.z)
local thickness = 0.25 + (diskR * 0.06)
if diskH < thickness and diskR > ACCRETION_IN and diskR < ACCRETION_OUT then
local phi = atan2(pd.z, pd.x)
local angularSpeed = 1.5 / sqrt(diskR)
local rotPhi = phi + iTime * angularSpeed
local noiseScale = 4.0
local n1 = noise(diskR * 0.5 - iTime * 0.5, rotPhi * 3.0 + diskR * 0.2)
local n2 = noise(diskR * 1.5, rotPhi * 12.0 - diskR * 0.5)
local plasma = n1 * 0.7 + n2 * 0.3
local fadeInner = smoothstep(ACCRETION_IN, ACCRETION_IN * 1.1, diskR)
local fadeOuter = smoothstep(ACCRETION_OUT, ACCRETION_OUT * 0.5, diskR)
local verticalFade = smoothstep(thickness, 0.0, diskH)
local density = plasma * fadeInner * fadeOuter * verticalFade * 0.8
if density > 0.01 then
local temp = pow(ACCRETION_IN / diskR, 1.3)
local redshift = sqrt((r - RS) / r)
redshift = clamp(redshift, 0.05, 1.0)
temp *= redshift
local baseColor = blackbody(temp)
local velDirDisk = vec3(-sin(phi), 0, cos(phi))
local velDirWorld = vec3(velDirDisk.x, velDirDisk.y * cosT + velDirDisk.z * sinT, -velDirDisk.y * sinT + velDirDisk.z * cosT)
local viewDotVel = dot(v, velDirWorld)
-- dt = 1 / (g * (1 - b * cos(t)))
local doppler = pow(1.0 / (1.0 - 0.45 * viewDotVel), 3.5)
local emission = baseColor * density * doppler * 1.5 * redshift
local stepAbsorb = density * 0.5 * dt
local w = min(1.0, stepAbsorb)
col += emission * (trans * w)
trans *= (1.0 - w)
if trans < 0.01 then break end
end
end
end
local photonSphere = PHOTON_SPHERE
local ringWidth = 0.2
if trans > 0.0 and min_dist > photonSphere and min_dist < photonSphere + ringWidth then
local intensity = smoothstep(photonSphere + ringWidth, photonSphere, min_dist)
col += vec3(1.0, 0.95, 0.85) * intensity * 0.4 * trans
end
return col
end
local function main(_: vec, fragCoord: vec, iTime: f64): vec
-- [-1, 1]
local uv = vec3(-1, -1, 0) + (vec3(2,2,0) * fragCoord) / vec3(iRes.x, iRes.y, 1)
uv = vec3(uv.x * (iRes.x / iRes.y), uv.y, 0)
local camPos = ro
local camDist = 16.0
local camSpeed = 0.08
camPos = vec3(sin(iTime * camSpeed) * camDist, 3.5 + cos(iTime * 0.15) * 1.5, cos(iTime * camSpeed) * camDist)
local fwd = normalize(vec3(0,0,0) - camPos)
local right = normalize(cross(fwd, vec3(0,1,0)))
local up = normalize(cross(right, fwd))
local rd = normalize(fwd + right * uv.x + up * uv.y)
local col = traceBlackHole(camPos, rd, iTime)
return col
end
local iRes0:vec = iRes-vec3(1,1,0)
local tStart0:vec = vec3(0,S.Actor_Res.y*(S.Actor_RenderNum-1),0)
local tRes0:vec = S.Actor_Res-vec3(1,1,0)
local tEnd0:vec = tStart0+tRes0
local IL_Start:f64 = tStart0.y % 2
local IL = { x = { ext = {tStart0.y % 2, 2}, int = {0, 2} }, y = { ext = {0, 2}, int = {0, 1} } }
local function InterlaceStep(): (f64, f64, f64, f64)
IL.x.int[1] += 1; IL.x.int[1] %= IL.x.int[2]; IL.x.ext[1] += ( IL.x.int[1] == 0 ) and 1 or 0; IL.x.ext[1] %= IL.x.ext[2]
IL.y.int[1] += 1; IL.y.int[1] %= IL.y.int[2]; IL.y.ext[1] += ( IL.y.int[1] == 0 ) and 1 or 0; IL.y.ext[1] %= IL.y.ext[2]
return IL.x.ext[1], IL.y.ext[1], IL.x.ext[2], IL.y.ext[2]
end
for _ = 1, IL_Start do InterlaceStep() end
local function ShaderCall(T:f64, In:buffer, Out:buffer, c:f64, X_i:f64, Y_i:f64)
vec3_buffer(Out, c, main(vec_1, vec3(X_i, Y_i, 0), T))
end
return function(T:f64, In:buffer, Out:buffer, InterlaceOn: boolean?)
local X_Off, Y_Off, X_Jump, Y_Jump = 0, 0, 1, 1
if InterlaceOn then X_Off, Y_Off, X_Jump, Y_Jump = InterlaceStep() end
local X_Jump_Bytes = u32B
local Y_Jump_Bytes = iRes.x * u32B
local c = (X_Jump_Bytes * X_Off) + (Y_Jump_Bytes * Y_Off)
X_Jump_Bytes += X_Jump_Bytes * (X_Jump-1)
Y_Jump_Bytes *= (Y_Jump-1)
for Y_i = (iRes0.y - tStart0.y - Y_Off), (iRes0.y - tEnd0.y), -Y_Jump do
for X_i = (tStart0.x + X_Off), tEnd0.x, X_Jump do
ShaderCall(T, In, Out, c, X_i, Y_i)
c += X_Jump_Bytes
end
c += Y_Jump_Bytes
end
end
replicating https://www.shadertoy.com/view/lty3Rt
shader
--!native
--!optimize 2
type f64 = number
type vec = vector
local S = require(`../../../../`)()
local vec3 = vector.create
local vec_0, vec_1 = vec3(0,0,0), vec3(1,1,1)
local v_tonemap_b = vec3(0.0762, 0.0762, 0.0762)
local v_tonemap_d = vec3(0.59, 0.59, 0.59)
local v_tonemap_e = vec3(0.14, 0.14, 0.14)
local v_uv_offset = vec3(-1, -1, 0)
local v_uv_scale = vec3(2, 2, 0)
local v_glow_col = vec3(2.0, 1.0, 0.5)
local v_moon_haze = vec3(0.8, 0.9, 1.2)
local v_fog_1 = vec3(0.4, 0.5, 1.0)
local v_fog_2 = vec3(0.6, 0.3, 0.6)
local v_fog_3 = vec3(0.6, 0.0, 0.0)
local v_shine = vec3(1.3, 1.2, 1.2)
local v_dust_1 = vec3(2.0, 1.3, 1.0)
local v_dust_2 = vec3(0.1, 0.2, 0.3)
local v_star_1 = vec3(1.0, 0.6, 0.3)
local v_star_2 = vec3(1.0, 1.0, 0.7)
local v_adj_sub = vec3(0.8, 0.75, 0.7)
local v_adj_add = vec3(1.2, 1.2, 1.2)
local v_haze = vec3(0.3, 0.5, 0.9)
local v_vignette = vec3(0.5, 0.5, 0.5)
local sin, cos, tanh, abs, floor, min, max, clamp, sqrt, exp, pow, atan2, log =
math.sin, math.cos, math.tanh, math.abs, math.floor, math.min, math.max, math.clamp, math.sqrt, math.exp, math.pow, math.atan2, math.log
local dot, cross, normalize, magnitude, vec_min =
vector.dot, vector.cross, vector.normalize, vector.magnitude, vector.min
local iRes: vec = vec3(S.Display_Res.x, S.Display_Res.y, 0)
local u32B: number = 4
local a = 2.51
local c_val = 2.43
local function vec3_buffer(b: buffer, i: number, v: vector)
v = v * 0.6
local num = v * (v * a + v_tonemap_b)
local den = v * (v * c_val + v_tonemap_d) + v_tonemap_e
local v_mapped = num / den
v = vector.clamp(v_mapped, vec_0, vec_1) * 255
buffer.writeu32(b, i, v.x + bit32.lshift(v.y,8) + bit32.lshift(v.z,16) + 0xFF000000)
end
local function mix_n(a: f64, b: f64, t: f64): f64
return a + (b - a) * t
end
local function mix_v(a: vec, b: vec, t: f64): vec
return vector.lerp(a, b, t)
end
local function smoothstep(edge0: f64, edge1: f64, x: f64): f64
local t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0)
return t * t * (3.0 - 2.0 * t)
end
local function hash_n(n: f64): f64
return (cos(n) * 41415.92653) % 1
end
local function noise3d_opt(x: f64, y: f64, z: f64): f64
local ix, iy, iz = floor(x), floor(y), floor(z)
local fx, fy, fz = x - ix, y - iy, z - iz
local ux = fx * fx * (3.0 - 2.0 * fx)
local uy = fy * fy * (3.0 - 2.0 * fy)
local uz = fz * fz * (3.0 - 2.0 * fz)
local n = ix + iy * 57.0 + 113.0 * iz
local res = mix_n(
mix_n(
mix_n(hash_n(n + 0.0), hash_n(n + 1.0), ux),
mix_n(hash_n(n + 57.0), hash_n(n + 58.0), ux),
uy
),
mix_n(
mix_n(hash_n(n + 113.0), hash_n(n + 114.0), ux),
mix_n(hash_n(n + 170.0), hash_n(n + 171.0), ux),
uy
),
uz
)
return res
end
local function fbm_transform(x: f64, y: f64, z: f64): (f64, f64, f64)
-- mat3(0.00, 1.60, 1.20, -1.60, 0.72, -0.96, -1.20, -0.96, 1.28)
local nx = y * -1.60 + z * -1.20 -- x*0 is 0
local ny = x * 1.60 + y * 0.72 + z * -0.96
local nz = x * 1.20 + y * -0.96 + z * 1.28
return nx, ny, nz
end
local function fbmslow_opt(px: f64, py: f64, pz: f64): f64
local f = 0.5000 * noise3d_opt(px, py, pz)
-- p = m*p*1.2
px, py, pz = fbm_transform(px, py, pz)
px *= 1.2; py *= 1.2; pz *= 1.2
f += 0.2500 * noise3d_opt(px, py, pz)
-- p = m*p*1.3
px, py, pz = fbm_transform(px, py, pz)
px *= 1.3; py *= 1.3; pz *= 1.3
f += 0.1666 * noise3d_opt(px, py, pz)
-- p = m*p*1.4
px, py, pz = fbm_transform(px, py, pz)
px *= 1.4; py *= 1.4; pz *= 1.4
f += 0.0834 * noise3d_opt(px, py, pz)
-- p = m*p*1.84
px, py, pz = fbm_transform(px, py, pz)
return f
end
local function fbm_opt(px: f64, py: f64, pz: f64): f64
local f, a, s = 0.0, 1.0, 0.0
f += a * noise3d_opt(px, py, pz)
px, py, pz = fbm_transform(px, py, pz)
px *= 1.149; py *= 1.149; pz *= 1.149
s += a; a *= 0.75
f += a * noise3d_opt(px, py, pz)
px, py, pz = fbm_transform(px, py, pz)
px *= 1.41; py *= 1.41; pz *= 1.41
s += a; a *= 0.75
f += a * noise3d_opt(px, py, pz)
px, py, pz = fbm_transform(px, py, pz)
px *= 1.51; py *= 1.51; pz *= 1.51
s += a; a *= 0.65
f += a * noise3d_opt(px, py, pz)
px, py, pz = fbm_transform(px, py, pz)
px *= 1.21; py *= 1.21; pz *= 1.21
s += a; a *= 0.35
f += a * noise3d_opt(px, py, pz)
px, py, pz = fbm_transform(px, py, pz)
px *= 1.41; py *= 1.41; pz *= 1.41
s += a; a *= 0.75
f += a * noise3d_opt(px, py, pz)
return f / s
end
local function main(_: vec, fragCoord: vec, iTime: f64): vec
local time = iTime * 0.1
local uv = v_uv_offset + (v_uv_scale * fragCoord) / iRes
local xy = vec3(uv.x, uv.y, 0)
local fade = min(1.0, time * 1.0) * min(1.0, max(0.0, 15.0 - time))
local fade2 = max(0.0, time - 10.0) * 0.37
local glow = max(-0.25, 1.0 + pow(fade2, 10.0) - 0.001 * pow(fade2, 25.0))
local campos = vec3(500.0, 850.0, -0.0 - cos((time - 1.4) / 2.0) * 2000.0)
local camtar = vec_0
local roll = 0.34
local cw = normalize(camtar - campos)
local cp = vec3(sin(roll), cos(roll), 0.0)
local cu = normalize(cross(cw, cp))
local cv = normalize(cross(cu, cw))
local rd = normalize(cu * xy.x + cv * xy.y + cw * 1.6)
local light = normalize(vec_0 - campos)
local sundot = clamp(dot(light, rd), 0.0, 1.0)
local col = vec_1 * glow * 1.2 * vec_min(vec_1, v_glow_col * pow(sundot, 100.0))
col += v_moon_haze * 0.3 * pow(sundot, 8.0)
local rdx, rdy, rdz = rd.x, rd.y, rd.z
local s_val1 = pow(fbmslow_opt(rdx * 312.0, rdy * 312.0, rdz * 312.0), 7.0)
local stars = vec3(s_val1, s_val1, s_val1) * 85.5
local s_val2 = pow(fbmslow_opt(rdz * 440.3, rdx * 440.3, rdy * 440.3), 8.0)
stars *= vec3(s_val2, s_val2, s_val2)
local cpos = rd * 1500.0 + vec3(831.0 - time * 30.0, 321.0, 1000.0)
col += v_fog_1 * (fbmslow_opt(cpos.x * 0.0035, cpos.y * 0.0035, cpos.z * 0.0035) - 0.5)
cpos += vec3(831.0 - time * 33.0, 321.0, 999.0)
col += v_fog_2 * 10.0 * pow(fbmslow_opt(cpos.x * 0.0045, cpos.y * 0.0045, cpos.z * 0.0045), 10.0)
cpos += vec3(3831.0 - time * 39.0, 221.0, 999.0)
col += v_fog_3 * 0.03 * 10.0 * pow(fbmslow_opt(cpos.x * 0.0145, cpos.y * 0.0145, cpos.z * 0.0145), 2.0)
cpos = rd * 1500.0 + vec3(831.0, 321.0, 999.0)
col += stars * fbm_opt(cpos.x * 0.0021, cpos.y * 0.0021, cpos.z * 0.0021)
local shift = vec3(time * 100.0, time * 180.0, 0)
local sum_w = 0.0
local sum_rgb = vec_0
local c_height = campos.y / rdy
local cpos2 = campos - rd * c_height
local radius_check = magnitude(vec3(cpos2.x, 0, cpos2.z)) / 1000.0
if radius_check < 1.8 then
for q = 10, -10, -1 do
if sum_w > 0.999 then break end
local c = (q * 8.0 - campos.y) / rdy
local cpos = campos + rd * c
local see = dot(normalize(cpos), normalize(campos))
local shine = mix_v(v_shine, vec_0, smoothstep(0.0, 1.0, see))
local radius = magnitude(vec3(cpos.x, 0, cpos.z)) / 999.0
if radius > 1.0 then continue end
local rot = 3.00 * radius - time
local c_rot, s_rot = cos(rot), sin(rot)
local nx = cpos.x * c_rot - cpos.z * s_rot
local nz = cpos.x * s_rot + cpos.z * c_rot
local cpos_x, cpos_y, cpos_z = nx, cpos.y, nz
local mix_rad = mix_n(250.0, 50.0, radius)
cpos_x += 831.0 + shift.x
cpos_y += 321.0 + q * mix_rad - shift.x * 0.2
cpos_z += 1330.0 + shift.y
local zoom = mix_n(0.0025, 0.0028, radius)
cpos_x *= zoom; cpos_y *= zoom; cpos_z *= zoom
local alpha = smoothstep(0.50, 1.0, fbm_opt(cpos_x, cpos_y, cpos_z))
alpha *= 1.3 * pow(smoothstep(1.0, 0.0, radius), 0.3)
local dustcolor = mix_v(v_dust_1, v_dust_2, sqrt(radius))
local localcolor = mix_v(dustcolor, shine, alpha)
local gstar = 2.0 * pow(noise3d_opt(cpos_x * 21.40, cpos_y * 21.40, cpos_z * 21.40), 22.0)
local gstar2 = 3.0 * pow(noise3d_opt(cpos_x * 26.55, cpos_y * 26.55, cpos_z * 26.55), 34.0)
local gholes = 1.0 * pow(noise3d_opt(cpos_x * 11.55, cpos_y * 11.55, cpos_z * 11.55), 14.0)
localcolor += v_star_1 * gstar
localcolor += v_star_2 * gstar2
localcolor -= vec3(gholes, gholes, gholes)
alpha = (1.0 - sum_w) * alpha
sum_rgb += localcolor * alpha
sum_w += alpha
end
for q = 0, 19 do
if sum_w > 0.999 then break end
local c = (q * 4.0 - campos.y) / rdy
local cpos = campos + rd * c
local radius = magnitude(vec3(cpos.x, 0, cpos.z)) / 200.0
if radius > 1.0 then continue end
local rot = 3.2 * radius - time * 1.1
local c_rot, s_rot = cos(rot), sin(rot)
local nx = cpos.x * c_rot - cpos.z * s_rot
local nz = cpos.x * s_rot + cpos.z * c_rot
local cpos_x, cpos_y, cpos_z = nx, cpos.y, nz
cpos_x += 831.0 + shift.x
cpos_y += 321.0 + q * mix_n(250.0, 50.0, radius) - shift.x * 0.2
cpos_z += 1330.0 + shift.y
local alpha = 0.1 + smoothstep(0.6, 1.0, fbm_opt(cpos_x, cpos_y, cpos_z))
alpha *= 1.2 * (pow(smoothstep(1.0, 0.0, radius), 0.72) - pow(smoothstep(1.0, 0.0, radius * 1.875), 0.2))
alpha = (1.0 - sum_w) * alpha
sum_rgb += vec_0 * alpha
sum_w += alpha
end
end
local alpha_final = smoothstep(1.0 - radius_check * 0.5, 1.0, sum_w)
if sum_w > 0.0001 then
sum_rgb /= (sum_w + 0.0001)
end
sum_rgb -= v_adj_sub * 0.2 * pow(sundot, 10.0) * alpha_final
sum_rgb += v_adj_add * min(glow, 10.0) * 0.2 * pow(sundot, 5.0) * (1.0 - alpha_final)
col = mix_v(col, sum_rgb, sum_w)
col = mix_v(col, v_haze, fade * 29.0 * (pow(sundot, 50.0) - pow(sundot, 60.0)) / (2.0 + 9.0 * abs(rd.y)))
local xy2 = fragCoord / iRes -- [0,1]
local vig_val = 0.25 * sqrt(100.0 * xy2.x * xy2.y * (1.0 - xy2.x) * (1.0 - xy2.y))
col *= v_vignette + vec3(vig_val, vig_val, vig_val)
return col
end
local iRes0:vec = iRes-vec3(1,1,0)
local tStart0:vec = vec3(0,S.Actor_Res.y*(S.Actor_RenderNum-1),0)
local tRes0:vec = S.Actor_Res-vec3(1,1,0)
local tEnd0:vec = tStart0+tRes0
local IL_Start:f64 = tStart0.y % 2
local IL = { x = { ext = {tStart0.y % 2, 2}, int = {0, 2} }, y = { ext = {0, 2}, int = {0, 1} } }
local function InterlaceStep(): (f64, f64, f64, f64)
IL.x.int[1] += 1; IL.x.int[1] %= IL.x.int[2]; IL.x.ext[1] += ( IL.x.int[1] == 0 ) and 1 or 0; IL.x.ext[1] %= IL.x.ext[2]
IL.y.int[1] += 1; IL.y.int[1] %= IL.y.int[2]; IL.y.ext[1] += ( IL.y.int[1] == 0 ) and 1 or 0; IL.y.ext[1] %= IL.y.ext[2]
return IL.x.ext[1], IL.y.ext[1], IL.x.ext[2], IL.y.ext[2]
end
for _ = 1, IL_Start do InterlaceStep() end
local function ShaderCall(T:f64, In:buffer, Out:buffer, c:f64, X_i:f64, Y_i:f64)
vec3_buffer(Out, c, main(vec_1, vec3(X_i, Y_i, 0), T))
end
return function(T:f64, In:buffer, Out:buffer, InterlaceOn: boolean?)
local X_Off, Y_Off, X_Jump, Y_Jump = 0, 0, 1, 1
if InterlaceOn then X_Off, Y_Off, X_Jump, Y_Jump = InterlaceStep() end
local X_Jump_Bytes = u32B
local Y_Jump_Bytes = iRes.x * u32B
local c = (X_Jump_Bytes * X_Off) + (Y_Jump_Bytes * Y_Off)
X_Jump_Bytes += X_Jump_Bytes * (X_Jump-1)
Y_Jump_Bytes *= (Y_Jump-1)
for Y_i = (iRes0.y - tStart0.y - Y_Off), (iRes0.y - tEnd0.y), -Y_Jump do
for X_i = (tStart0.x + X_Off), tEnd0.x, X_Jump do
ShaderCall(T, In, Out, c, X_i, Y_i)
c += X_Jump_Bytes
end
c += Y_Jump_Bytes
end
end
This is just absurdly incredible; I pray one day Roblox provides us with some form of GPU service allowing us to take advantage of the power of parallel processing via. the GPU which our developers can use in any scenarios where it could be beneficial (e.g. custom physics and this sort of renderer project!)
It’s going to suck not being able to reply on the dev forum soon, as I am absolutely not going to verify with my face or id. So this might be one of my last replies to this project.
Anyways, I hope this project is going to allow developers to absolutely push roblox studio beyond what was intended in the future, or right now by what I am seeing on this post already.
I don’t want Roblox to be limiting access to engaging in this project, which is why I have hosted it on github.
Please get involved there if you can!
You know I had to do it to them with the balatro shader
Hi folks, the best way to get Penumbra running in Roblox has been updated; you can now grab the immediately-usable .rbxm file via the releases page, updated in the OP.
This should make the tool immediately accessible to users wanting to just hit play without worry about the setup process.
Here’s another example of Penumbra stretching its wings using 16 render threads!
Snippet of the performance of the latest dev build I’ve been working on, which has had architectural improvements leading to improved performance with higher thread counts ![]()
it’s astounding. I’ve never seen anything like this in Roblox, only in advanced C/Rust engines like SDL 3/Bevy/SFML.
Pushing high resolution rendering
Wow this is truly wonderful!
I’ve been attempting to make my own renderer (raster) recently too recently Penumbra really is next level
I’m currently running this on an i5-8365U
Just asking but: How did you achieve 16 threads? From what i’ve known roblox studio is limited to 8 threads max, i’m aware my cpu currently wont benefit from that but just something that would be nice to try if i do ever get a cpu with more threads!
Do you also have any tips?
(normal vertex/triangle raster without raycasts)
Could you add some code examples and also how to set up the module?
Both studio and live client now allow the use of 16 threads.
You can verify this by running 16 actors in parallel and checking the microprofiler labels.
The highest worker letter should be ‘P’.
The live client also spins up its 16 workers AFTER launching a game for the first time, as it starts up with 3.
This has been my experience on macOS. From what I understand the experience should be the same on PC.
If your CPU does not have 16 cores then it may be possible that roblox limits its workers based on that.
Also, cool renderer! I’d like to reach out to you in DMs as you’ve done something really interesting with yours that I’d like to talk about. ![]()
this is insane omg I’m giving you my purse
no more 3 thread limit for parallel lua on the client?? really? when was this change made?
it’s new and mostly undocumented & unnoticed
https://devforum.roblox.com/t/win11-desktop-player-showing-raised-8-taskscheduler-workers-instead-of-previous-3-cap/4357901
is this related to Penumbra revenge on roblox?
A penumbra is the slightly blurry and less dark area at the edges of an objects shadow