Shaders in Roblox have arrived! [Penumbra - Luau Graphics]

thanks to @Crazyblox for the cam angle, twice color vibrant and preprocess.

1 Like

so peak :fire:

(running on notebook)

shader
--!native
--!optimize 2
type f64 = number
type vec = vector

local S = require(`../../../../`)()

local vec3 = vector.create
local vec_0, vec_1 = vec3(0,0,0), vec3(1,1,1)

local sin, cos, abs, floor, min, max, clamp, sqrt, exp, pow, atan2, log =
	math.sin, math.cos, math.abs, math.floor, math.min, math.max, math.clamp, math.sqrt, math.exp, math.pow, math.atan2, math.log
local dot, cross, normalize, magnitude =
	vector.dot, vector.cross, vector.normalize, vector.magnitude

local PI: f64 = math.pi

local ro: vec = vec3(0.0, 2.0, 14.0)
local lookAt: vec = vec3(0.0, 0.0, 0.0)
local fwd: vec = vec3(0,0,-1)
local right: vec = vec3(1,0,0)
local up: vec = vec3(0,1,0)
local vec_up: vec = vec3(0,1,0)

local iRes: vec = vec3(S.Display_Res.x, S.Display_Res.y, 0)

local u32B: number = 4
local function vec3_buffer(b: buffer, i: number, v: vector)
	v = v * 0.6 -- exposure

	local a = 2.51
	local b_coeff = vec3(0.0762, 0.0762, 0.0762) -- 2.54 * 0.03
	local c_val = 2.43
	local d_coeff = vec3(0.59, 0.59, 0.59)
	local e_coeff = vec3(0.14, 0.14, 0.14)

	-- (x * (a * x + b)) / (x * (c * x + d) + e)
	local num = v * (v * a + b_coeff)
	local den = v * (v * c_val + d_coeff) + e_coeff

	local v_mapped = num / den
	v = vector.clamp(v_mapped, vec_0, vec_1) * 255

	buffer.writeu32(b, i, v.x + bit32.lshift(v.y,8) + bit32.lshift(v.z,16) + 0xFF000000)
end

local function mix(a: vec, b: vec, t: f64): vec
	return vector.lerp(a, b, t)
end

local function fract(x: f64): f64
	return x - floor(x)
end

local function smoothstep(edge0: f64, edge1: f64, x: f64): f64
	local t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0)
	return t * t * (3.0 - 2.0 * t)
end

local function hash12(x: f64, y: f64): f64
	local vx = fract(x * 0.1031)
	local vy = fract(y * 0.1031)
	local dot_val = vx * (vy + 33.33) + vy * (vx + 33.33)
	return fract((vx + vy) * dot_val)
end

local function lerp(a, b, t)
	return a + (b - a) * t
end

local function noise(x: f64, y: f64): f64
	local i_x = floor(x)
	local i_y = floor(y)
	local f_x = fract(x)
	local f_y = fract(y)

	local u_x = f_x * f_x * (3.0 - 2.0 * f_x)
	local u_y = f_y * f_y * (3.0 - 2.0 * f_y)

	local a = hash12(i_x, i_y)
	local b = hash12(i_x + 1.0, i_y)
	local c = hash12(i_x, i_y + 1.0)
	local d = hash12(i_x + 1.0, i_y + 1.0)

	return lerp(lerp(a, b, u_x), lerp(c, d, u_x), u_y)
end

local function rotateY(v: vec, a: f64): vec
	local c, s = cos(a), sin(a)
	return vec3(v.x*c + v.z*s, v.y, -v.x*s + v.z*c)
end

local function rotateX(v: vec, a: f64): vec
	local c, s = cos(a), sin(a)
	return vec3(v.x, v.y*c - v.z*s, v.y*s + v.z*c)
end

local function blackbody(temp: f64): vec
	local t = temp * 3.0
	local r = clamp(t * 1.5, 0.0, 1.0) + 0.3 * exp(-pow(t - 3.5, 2.0))
	local g = clamp(t - 0.8, 0.0, 1.0)
	local b = clamp(t - 1.8, 0.0, 1.0) 

	local col = vec3(r, g, b)
	col *= col

	local orange = vec3(1.0, 0.6, 0.1)
	local mix_amt = smoothstep(0.0, 0.5, abs(temp - 0.4))
	return mix(col, orange, mix_amt * 0.5)
end

local function skyColor(rd: vec): vec
	local bg = vec3(0.005, 0.005, 0.01)

	local scaled = rd * 350.0
	local sid = vec3(floor(scaled.x), floor(scaled.y), floor(scaled.z))
	local h = hash12(sid.x + sid.y * 113.0, sid.z * 57.0)

	local star = 0.0
	if h > 0.992 then
		local b = smoothstep(0.992, 1.0, h)
		star = b * 3.0
	end

	return bg + vec3(star, star, star)
end

local function traceBlackHole(ro0: vec, rd0: vec, iTime: f64): vec
	local BH_MASS = 1.0
	local RS = 2.0 * BH_MASS
	local PHOTON_SPHERE = 1.5 * RS
	local ACCRETION_IN = 3.0 * RS
	local ACCRETION_OUT = 14.0 * RS

	local diskTilt = 0.3 -- radians
	local cosT, sinT = cos(diskTilt), sin(diskTilt)

	local p = ro0
	local v = rd0
	local col = vec_0
	local trans = 1.0 -- transmittance
	local min_dist = 1e9

	local iter = 0
	local max_iter = 300 -- steps
	local step_size = 0.15

	local escape_limit = 40.0 * RS

	local function toDiskSpace(pt: vec): vec
		return vec3(pt.x, pt.y * cosT - pt.z * sinT, pt.y * sinT + pt.z * cosT)
	end

	while iter < max_iter do
		iter += 1

		local r2 = dot(p, p)
		local r = sqrt(r2)
		min_dist = min(min_dist, r)

		if r < RS * 1.01 then
			trans = 0.0
			break
		end

		if r > escape_limit then
			col += skyColor(v) * trans
			break
		end

		--a = -1.5 * Rs * (h^2 / r^5) * r_vec
		--where h = |r x v| (angular momentum per unit mass)
		local crossP = cross(p, v)
		local h2 = dot(crossP, crossP) -- magnitude squared of angular momentum

		--divide by r^5. Since we have r2 (r^2), r^5 = r2 * r2 * r
		local r5 = r2 * r2 * r
		local geodesicTerm = -1.5 * RS * h2 / max(r5, 1e-4)

		local acc = p * geodesicTerm

		local dt = step_size * r * 0.12

		dt = max(dt, 0.005)

		v = normalize(v + acc * dt)
		p += v * dt

		local pd = toDiskSpace(p)
		local diskH = abs(pd.y)
		local diskR = sqrt(pd.x*pd.x + pd.z*pd.z)

		local thickness = 0.25 + (diskR * 0.06)

		if diskH < thickness and diskR > ACCRETION_IN and diskR < ACCRETION_OUT then
			local phi = atan2(pd.z, pd.x)

			local angularSpeed = 1.5 / sqrt(diskR)
			local rotPhi = phi + iTime * angularSpeed

			local noiseScale = 4.0
			local n1 = noise(diskR * 0.5 - iTime * 0.5, rotPhi * 3.0 + diskR * 0.2)
			local n2 = noise(diskR * 1.5, rotPhi * 12.0 - diskR * 0.5)
			local plasma = n1 * 0.7 + n2 * 0.3

			local fadeInner = smoothstep(ACCRETION_IN, ACCRETION_IN * 1.1, diskR)
			local fadeOuter = smoothstep(ACCRETION_OUT, ACCRETION_OUT * 0.5, diskR)
			local verticalFade = smoothstep(thickness, 0.0, diskH)

			local density = plasma * fadeInner * fadeOuter * verticalFade * 0.8

			if density > 0.01 then
				local temp = pow(ACCRETION_IN / diskR, 1.3)

				local redshift = sqrt((r - RS) / r)
				redshift = clamp(redshift, 0.05, 1.0)
				temp *= redshift

				local baseColor = blackbody(temp)

				local velDirDisk = vec3(-sin(phi), 0, cos(phi))
				local velDirWorld = vec3(velDirDisk.x, velDirDisk.y * cosT + velDirDisk.z * sinT, -velDirDisk.y * sinT + velDirDisk.z * cosT)

				local viewDotVel = dot(v, velDirWorld)
				--  dt = 1 / (g * (1 - b * cos(t)))
				local doppler = pow(1.0 / (1.0 - 0.45 * viewDotVel), 3.5)

				local emission = baseColor * density * doppler * 1.5 * redshift

				local stepAbsorb = density * 0.5 * dt
				local w = min(1.0, stepAbsorb)

				col += emission * (trans * w)
				trans *= (1.0 - w)

				if trans < 0.01 then break end
			end
		end
	end

	local photonSphere = PHOTON_SPHERE
	local ringWidth = 0.2
	if trans > 0.0 and min_dist > photonSphere and min_dist < photonSphere + ringWidth then
		local intensity = smoothstep(photonSphere + ringWidth, photonSphere, min_dist)
		col += vec3(1.0, 0.95, 0.85) * intensity * 0.4 * trans
	end

	return col
end

local function main(_: vec, fragCoord: vec, iTime: f64): vec
	-- [-1, 1]
	local uv = vec3(-1, -1, 0) + (vec3(2,2,0) * fragCoord) / vec3(iRes.x, iRes.y, 1)
	uv = vec3(uv.x * (iRes.x / iRes.y), uv.y, 0)

	local camPos = ro
	local camDist = 16.0
	local camSpeed = 0.08
	camPos = vec3(sin(iTime * camSpeed) * camDist, 3.5 + cos(iTime * 0.15) * 1.5, cos(iTime * camSpeed) * camDist)

	local fwd = normalize(vec3(0,0,0) - camPos)
	local right = normalize(cross(fwd, vec3(0,1,0)))
	local up = normalize(cross(right, fwd))

	local rd = normalize(fwd + right * uv.x + up * uv.y)
	local col = traceBlackHole(camPos, rd, iTime)
	return col
end

local iRes0:vec = iRes-vec3(1,1,0)
local tStart0:vec = vec3(0,S.Actor_Res.y*(S.Actor_RenderNum-1),0)
local tRes0:vec = S.Actor_Res-vec3(1,1,0)
local tEnd0:vec = tStart0+tRes0
local IL_Start:f64 = tStart0.y % 2
local IL = { x = { ext = {tStart0.y % 2, 2}, int = {0, 2} }, y = { ext = {0, 2}, int = {0, 1} } }
local function InterlaceStep(): (f64, f64, f64, f64)
	IL.x.int[1] += 1; IL.x.int[1] %= IL.x.int[2]; IL.x.ext[1] += ( IL.x.int[1] == 0 ) and 1 or 0; IL.x.ext[1] %= IL.x.ext[2]
	IL.y.int[1] += 1; IL.y.int[1] %= IL.y.int[2]; IL.y.ext[1] += ( IL.y.int[1] == 0 ) and 1 or 0; IL.y.ext[1] %= IL.y.ext[2]
	return IL.x.ext[1], IL.y.ext[1], IL.x.ext[2], IL.y.ext[2]
end
for _ = 1, IL_Start do InterlaceStep() end

local function ShaderCall(T:f64, In:buffer, Out:buffer, c:f64, X_i:f64, Y_i:f64)
	vec3_buffer(Out, c, main(vec_1, vec3(X_i, Y_i, 0), T))
end

return function(T:f64, In:buffer, Out:buffer, InterlaceOn: boolean?)
	local X_Off, Y_Off, X_Jump, Y_Jump = 0, 0, 1, 1
	if InterlaceOn then X_Off, Y_Off, X_Jump, Y_Jump = InterlaceStep() end
	local X_Jump_Bytes = u32B
	local Y_Jump_Bytes = iRes.x * u32B
	local c = (X_Jump_Bytes * X_Off) + (Y_Jump_Bytes * Y_Off)
	X_Jump_Bytes += X_Jump_Bytes * (X_Jump-1)
	Y_Jump_Bytes *= (Y_Jump-1)

	for Y_i = (iRes0.y - tStart0.y - Y_Off), (iRes0.y - tEnd0.y), -Y_Jump do
		for X_i = (tStart0.x + X_Off), tEnd0.x, X_Jump do
			ShaderCall(T, In, Out, c, X_i, Y_i)
			c += X_Jump_Bytes
		end
		c += Y_Jump_Bytes
	end
end
3 Likes

replicating https://www.shadertoy.com/view/lty3Rt

shader
--!native
--!optimize 2
type f64 = number
type vec = vector

local S = require(`../../../../`)()

local vec3 = vector.create
local vec_0, vec_1 = vec3(0,0,0), vec3(1,1,1)

local v_tonemap_b = vec3(0.0762, 0.0762, 0.0762)
local v_tonemap_d = vec3(0.59, 0.59, 0.59)
local v_tonemap_e = vec3(0.14, 0.14, 0.14)

local v_uv_offset = vec3(-1, -1, 0)
local v_uv_scale = vec3(2, 2, 0)
local v_glow_col = vec3(2.0, 1.0, 0.5)
local v_moon_haze = vec3(0.8, 0.9, 1.2)
local v_fog_1 = vec3(0.4, 0.5, 1.0)
local v_fog_2 = vec3(0.6, 0.3, 0.6)
local v_fog_3 = vec3(0.6, 0.0, 0.0)
local v_shine = vec3(1.3, 1.2, 1.2)
local v_dust_1 = vec3(2.0, 1.3, 1.0)
local v_dust_2 = vec3(0.1, 0.2, 0.3)
local v_star_1 = vec3(1.0, 0.6, 0.3)
local v_star_2 = vec3(1.0, 1.0, 0.7)
local v_adj_sub = vec3(0.8, 0.75, 0.7)
local v_adj_add = vec3(1.2, 1.2, 1.2)
local v_haze = vec3(0.3, 0.5, 0.9)
local v_vignette = vec3(0.5, 0.5, 0.5)

local sin, cos, tanh, abs, floor, min, max, clamp, sqrt, exp, pow, atan2, log =
	math.sin, math.cos, math.tanh, math.abs, math.floor, math.min, math.max, math.clamp, math.sqrt, math.exp, math.pow, math.atan2, math.log
local dot, cross, normalize, magnitude, vec_min =
	vector.dot, vector.cross, vector.normalize, vector.magnitude, vector.min

local iRes: vec = vec3(S.Display_Res.x, S.Display_Res.y, 0)

local u32B: number = 4

local a = 2.51
local c_val = 2.43

local function vec3_buffer(b: buffer, i: number, v: vector)
	v = v * 0.6

	local num = v * (v * a + v_tonemap_b)
	local den = v * (v * c_val + v_tonemap_d) + v_tonemap_e

	local v_mapped = num / den
	v = vector.clamp(v_mapped, vec_0, vec_1) * 255

	buffer.writeu32(b, i, v.x + bit32.lshift(v.y,8) + bit32.lshift(v.z,16) + 0xFF000000)
end

local function mix_n(a: f64, b: f64, t: f64): f64
	return a + (b - a) * t
end

local function mix_v(a: vec, b: vec, t: f64): vec
	return vector.lerp(a, b, t)
end

local function smoothstep(edge0: f64, edge1: f64, x: f64): f64
	local t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0)
	return t * t * (3.0 - 2.0 * t)
end

local function hash_n(n: f64): f64
	return (cos(n) * 41415.92653) % 1
end

local function noise3d_opt(x: f64, y: f64, z: f64): f64
	local ix, iy, iz = floor(x), floor(y), floor(z)
	local fx, fy, fz = x - ix, y - iy, z - iz
	
	local ux = fx * fx * (3.0 - 2.0 * fx)
	local uy = fy * fy * (3.0 - 2.0 * fy)
	local uz = fz * fz * (3.0 - 2.0 * fz)
	
	local n = ix + iy * 57.0 + 113.0 * iz
	
	local res = mix_n(
		mix_n(
			mix_n(hash_n(n + 0.0), hash_n(n + 1.0), ux),
			mix_n(hash_n(n + 57.0), hash_n(n + 58.0), ux),
			uy
		),
		mix_n(
			mix_n(hash_n(n + 113.0), hash_n(n + 114.0), ux),
			mix_n(hash_n(n + 170.0), hash_n(n + 171.0), ux),
			uy
		),
		uz
	)
	return res
end

local function fbm_transform(x: f64, y: f64, z: f64): (f64, f64, f64)
	-- mat3(0.00, 1.60, 1.20, -1.60, 0.72, -0.96, -1.20, -0.96, 1.28)
	local nx = y * -1.60 + z * -1.20 -- x*0 is 0
	local ny = x * 1.60 + y * 0.72 + z * -0.96
	local nz = x * 1.20 + y * -0.96 + z * 1.28
	return nx, ny, nz
end

local function fbmslow_opt(px: f64, py: f64, pz: f64): f64
	local f = 0.5000 * noise3d_opt(px, py, pz)
	-- p = m*p*1.2
	px, py, pz = fbm_transform(px, py, pz)
	px *= 1.2; py *= 1.2; pz *= 1.2

	f += 0.2500 * noise3d_opt(px, py, pz)
	-- p = m*p*1.3
	px, py, pz = fbm_transform(px, py, pz)
	px *= 1.3; py *= 1.3; pz *= 1.3

	f += 0.1666 * noise3d_opt(px, py, pz)
	-- p = m*p*1.4
	px, py, pz = fbm_transform(px, py, pz)
	px *= 1.4; py *= 1.4; pz *= 1.4

	f += 0.0834 * noise3d_opt(px, py, pz)
	-- p = m*p*1.84
	px, py, pz = fbm_transform(px, py, pz)
	return f
end

local function fbm_opt(px: f64, py: f64, pz: f64): f64
	local f, a, s = 0.0, 1.0, 0.0
	
	f += a * noise3d_opt(px, py, pz)
	px, py, pz = fbm_transform(px, py, pz)
	px *= 1.149; py *= 1.149; pz *= 1.149
	s += a; a *= 0.75

	f += a * noise3d_opt(px, py, pz)
	px, py, pz = fbm_transform(px, py, pz)
	px *= 1.41; py *= 1.41; pz *= 1.41
	s += a; a *= 0.75

	f += a * noise3d_opt(px, py, pz)
	px, py, pz = fbm_transform(px, py, pz)
	px *= 1.51; py *= 1.51; pz *= 1.51
	s += a; a *= 0.65

	f += a * noise3d_opt(px, py, pz)
	px, py, pz = fbm_transform(px, py, pz)
	px *= 1.21; py *= 1.21; pz *= 1.21
	s += a; a *= 0.35

	f += a * noise3d_opt(px, py, pz)
	px, py, pz = fbm_transform(px, py, pz)
	px *= 1.41; py *= 1.41; pz *= 1.41
	s += a; a *= 0.75

	f += a * noise3d_opt(px, py, pz)
	return f / s
end

local function main(_: vec, fragCoord: vec, iTime: f64): vec
	local time = iTime * 0.1
	
	local uv = v_uv_offset + (v_uv_scale * fragCoord) / iRes
	local xy = vec3(uv.x, uv.y, 0) 

	local fade = min(1.0, time * 1.0) * min(1.0, max(0.0, 15.0 - time))
	local fade2 = max(0.0, time - 10.0) * 0.37
	local glow = max(-0.25, 1.0 + pow(fade2, 10.0) - 0.001 * pow(fade2, 25.0))

	local campos = vec3(500.0, 850.0, -0.0 - cos((time - 1.4) / 2.0) * 2000.0)
	local camtar = vec_0

	local roll = 0.34
	local cw = normalize(camtar - campos)
	local cp = vec3(sin(roll), cos(roll), 0.0)
	local cu = normalize(cross(cw, cp))
	local cv = normalize(cross(cu, cw))
	
	local rd = normalize(cu * xy.x + cv * xy.y + cw * 1.6)

	local light = normalize(vec_0 - campos)
	local sundot = clamp(dot(light, rd), 0.0, 1.0)

	local col = vec_1 * glow * 1.2 * vec_min(vec_1, v_glow_col * pow(sundot, 100.0))
	
	col += v_moon_haze * 0.3 * pow(sundot, 8.0)

	local rdx, rdy, rdz = rd.x, rd.y, rd.z
	
	local s_val1 = pow(fbmslow_opt(rdx * 312.0, rdy * 312.0, rdz * 312.0), 7.0)
	local stars = vec3(s_val1, s_val1, s_val1) * 85.5
	
	local s_val2 = pow(fbmslow_opt(rdz * 440.3, rdx * 440.3, rdy * 440.3), 8.0)
	stars *= vec3(s_val2, s_val2, s_val2)

	local cpos = rd * 1500.0 + vec3(831.0 - time * 30.0, 321.0, 1000.0)
	col += v_fog_1 * (fbmslow_opt(cpos.x * 0.0035, cpos.y * 0.0035, cpos.z * 0.0035) - 0.5)

	cpos += vec3(831.0 - time * 33.0, 321.0, 999.0)
	col += v_fog_2 * 10.0 * pow(fbmslow_opt(cpos.x * 0.0045, cpos.y * 0.0045, cpos.z * 0.0045), 10.0)

	cpos += vec3(3831.0 - time * 39.0, 221.0, 999.0)
	col += v_fog_3 * 0.03 * 10.0 * pow(fbmslow_opt(cpos.x * 0.0145, cpos.y * 0.0145, cpos.z * 0.0145), 2.0)

	cpos = rd * 1500.0 + vec3(831.0, 321.0, 999.0)
	col += stars * fbm_opt(cpos.x * 0.0021, cpos.y * 0.0021, cpos.z * 0.0021)

	local shift = vec3(time * 100.0, time * 180.0, 0)
	local sum_w = 0.0
	local sum_rgb = vec_0
	
	local c_height = campos.y / rdy
	local cpos2 = campos - rd * c_height
	local radius_check = magnitude(vec3(cpos2.x, 0, cpos2.z)) / 1000.0

	if radius_check < 1.8 then
		for q = 10, -10, -1 do
			if sum_w > 0.999 then break end
			
			local c = (q * 8.0 - campos.y) / rdy
			local cpos = campos + rd * c
			
			local see = dot(normalize(cpos), normalize(campos))
			local shine = mix_v(v_shine, vec_0, smoothstep(0.0, 1.0, see))

			local radius = magnitude(vec3(cpos.x, 0, cpos.z)) / 999.0
			if radius > 1.0 then continue end

			local rot = 3.00 * radius - time
			local c_rot, s_rot = cos(rot), sin(rot)
			local nx = cpos.x * c_rot - cpos.z * s_rot
			local nz = cpos.x * s_rot + cpos.z * c_rot
			local cpos_x, cpos_y, cpos_z = nx, cpos.y, nz

			local mix_rad = mix_n(250.0, 50.0, radius)
			cpos_x += 831.0 + shift.x
			cpos_y += 321.0 + q * mix_rad - shift.x * 0.2
			cpos_z += 1330.0 + shift.y
			
			local zoom = mix_n(0.0025, 0.0028, radius)
			cpos_x *= zoom; cpos_y *= zoom; cpos_z *= zoom

			local alpha = smoothstep(0.50, 1.0, fbm_opt(cpos_x, cpos_y, cpos_z))
			alpha *= 1.3 * pow(smoothstep(1.0, 0.0, radius), 0.3)
			
			local dustcolor = mix_v(v_dust_1, v_dust_2, sqrt(radius))
			local localcolor = mix_v(dustcolor, shine, alpha)

			local gstar = 2.0 * pow(noise3d_opt(cpos_x * 21.40, cpos_y * 21.40, cpos_z * 21.40), 22.0)
			local gstar2 = 3.0 * pow(noise3d_opt(cpos_x * 26.55, cpos_y * 26.55, cpos_z * 26.55), 34.0)
			local gholes = 1.0 * pow(noise3d_opt(cpos_x * 11.55, cpos_y * 11.55, cpos_z * 11.55), 14.0)
			
			localcolor += v_star_1 * gstar
			localcolor += v_star_2 * gstar2
			localcolor -= vec3(gholes, gholes, gholes)

			alpha = (1.0 - sum_w) * alpha
			sum_rgb += localcolor * alpha
			sum_w += alpha
		end
		
		for q = 0, 19 do
			if sum_w > 0.999 then break end

			local c = (q * 4.0 - campos.y) / rdy
			local cpos = campos + rd * c
			
			local radius = magnitude(vec3(cpos.x, 0, cpos.z)) / 200.0
			if radius > 1.0 then continue end
			
			local rot = 3.2 * radius - time * 1.1
			local c_rot, s_rot = cos(rot), sin(rot)
			local nx = cpos.x * c_rot - cpos.z * s_rot
			local nz = cpos.x * s_rot + cpos.z * c_rot
			
			local cpos_x, cpos_y, cpos_z = nx, cpos.y, nz

			cpos_x += 831.0 + shift.x
			cpos_y += 321.0 + q * mix_n(250.0, 50.0, radius) - shift.x * 0.2
			cpos_z += 1330.0 + shift.y

			local alpha = 0.1 + smoothstep(0.6, 1.0, fbm_opt(cpos_x, cpos_y, cpos_z))
			alpha *= 1.2 * (pow(smoothstep(1.0, 0.0, radius), 0.72) - pow(smoothstep(1.0, 0.0, radius * 1.875), 0.2))
			
			alpha = (1.0 - sum_w) * alpha
			sum_rgb += vec_0 * alpha
			sum_w += alpha
		end
	end

	local alpha_final = smoothstep(1.0 - radius_check * 0.5, 1.0, sum_w)
	if sum_w > 0.0001 then
		sum_rgb /= (sum_w + 0.0001)
	end

	sum_rgb -= v_adj_sub * 0.2 * pow(sundot, 10.0) * alpha_final
	sum_rgb += v_adj_add * min(glow, 10.0) * 0.2 * pow(sundot, 5.0) * (1.0 - alpha_final)

	col = mix_v(col, sum_rgb, sum_w)

	col = mix_v(col, v_haze, fade * 29.0 * (pow(sundot, 50.0) - pow(sundot, 60.0)) / (2.0 + 9.0 * abs(rd.y)))

	local xy2 = fragCoord / iRes -- [0,1]
	local vig_val = 0.25 * sqrt(100.0 * xy2.x * xy2.y * (1.0 - xy2.x) * (1.0 - xy2.y))
	col *= v_vignette + vec3(vig_val, vig_val, vig_val)

	return col
end

local iRes0:vec = iRes-vec3(1,1,0)
local tStart0:vec = vec3(0,S.Actor_Res.y*(S.Actor_RenderNum-1),0)
local tRes0:vec = S.Actor_Res-vec3(1,1,0)
local tEnd0:vec = tStart0+tRes0
local IL_Start:f64 = tStart0.y % 2
local IL = { x = { ext = {tStart0.y % 2, 2}, int = {0, 2} }, y = { ext = {0, 2}, int = {0, 1} } }
local function InterlaceStep(): (f64, f64, f64, f64)
	IL.x.int[1] += 1; IL.x.int[1] %= IL.x.int[2]; IL.x.ext[1] += ( IL.x.int[1] == 0 ) and 1 or 0; IL.x.ext[1] %= IL.x.ext[2]
	IL.y.int[1] += 1; IL.y.int[1] %= IL.y.int[2]; IL.y.ext[1] += ( IL.y.int[1] == 0 ) and 1 or 0; IL.y.ext[1] %= IL.y.ext[2]
	return IL.x.ext[1], IL.y.ext[1], IL.x.ext[2], IL.y.ext[2]
end
for _ = 1, IL_Start do InterlaceStep() end

local function ShaderCall(T:f64, In:buffer, Out:buffer, c:f64, X_i:f64, Y_i:f64)
	vec3_buffer(Out, c, main(vec_1, vec3(X_i, Y_i, 0), T))
end

return function(T:f64, In:buffer, Out:buffer, InterlaceOn: boolean?)
	local X_Off, Y_Off, X_Jump, Y_Jump = 0, 0, 1, 1
	if InterlaceOn then X_Off, Y_Off, X_Jump, Y_Jump = InterlaceStep() end
	local X_Jump_Bytes = u32B
	local Y_Jump_Bytes = iRes.x * u32B
	local c = (X_Jump_Bytes * X_Off) + (Y_Jump_Bytes * Y_Off)
	X_Jump_Bytes += X_Jump_Bytes * (X_Jump-1)
	Y_Jump_Bytes *= (Y_Jump-1)

	for Y_i = (iRes0.y - tStart0.y - Y_Off), (iRes0.y - tEnd0.y), -Y_Jump do
		for X_i = (tStart0.x + X_Off), tEnd0.x, X_Jump do
			ShaderCall(T, In, Out, c, X_i, Y_i)
			c += X_Jump_Bytes
		end
		c += Y_Jump_Bytes
	end
end
3 Likes

This is just absurdly incredible; I pray one day Roblox provides us with some form of GPU service allowing us to take advantage of the power of parallel processing via. the GPU which our developers can use in any scenarios where it could be beneficial (e.g. custom physics and this sort of renderer project!)

2 Likes

It’s going to suck not being able to reply on the dev forum soon, as I am absolutely not going to verify with my face or id. So this might be one of my last replies to this project.

Anyways, I hope this project is going to allow developers to absolutely push roblox studio beyond what was intended in the future, or right now by what I am seeing on this post already.

I don’t want Roblox to be limiting access to engaging in this project, which is why I have hosted it on github.
Please get involved there if you can!

2 Likes

You know I had to do it to them with the balatro shader

13 Likes

Hi folks, the best way to get Penumbra running in Roblox has been updated; you can now grab the immediately-usable .rbxm file via the releases page, updated in the OP.

This should make the tool immediately accessible to users wanting to just hit play without worry about the setup process.

Here’s another example of Penumbra stretching its wings using 16 render threads!

5 Likes

Snippet of the performance of the latest dev build I’ve been working on, which has had architectural improvements leading to improved performance with higher thread counts :slight_smile:

5 Likes

it’s astounding. I’ve never seen anything like this in Roblox, only in advanced C/Rust engines like SDL 3/Bevy/SFML.

2 Likes

Pushing high resolution rendering

3 Likes

Wow this is truly wonderful!
I’ve been attempting to make my own renderer (raster) recently too recently Penumbra really is next level

https://cdn.discordapp.com/attachments/449312743116505101/1465928442381074607/bandicam_2026-01-28_05-33-09-151.mp4?ex=69a11f40&is=699fcdc0&hm=1ad23c7c2195814cbac65d5ab54494482ade92511afda3e08ab83308a1bbeead&

I’m currently running this on an i5-8365U
Just asking but: How did you achieve 16 threads? From what i’ve known roblox studio is limited to 8 threads max, i’m aware my cpu currently wont benefit from that but just something that would be nice to try if i do ever get a cpu with more threads!
Do you also have any tips?

(normal vertex/triangle raster without raycasts)

1 Like

Could you add some code examples and also how to set up the module?

1 Like

Both studio and live client now allow the use of 16 threads.

You can verify this by running 16 actors in parallel and checking the microprofiler labels.

The highest worker letter should be ‘P’.

The live client also spins up its 16 workers AFTER launching a game for the first time, as it starts up with 3.

This has been my experience on macOS. From what I understand the experience should be the same on PC.

If your CPU does not have 16 cores then it may be possible that roblox limits its workers based on that.

Also, cool renderer! I’d like to reach out to you in DMs as you’ve done something really interesting with yours that I’d like to talk about. :slight_smile:

3 Likes

this is insane omg I’m giving you my purse

3 Likes

no more 3 thread limit for parallel lua on the client?? really? when was this change made?

it’s new and mostly undocumented & unnoticed
https://devforum.roblox.com/t/win11-desktop-player-showing-raised-8-taskscheduler-workers-instead-of-previous-3-cap/4357901

2 Likes

is this related to Penumbra revenge on roblox?

A penumbra is the slightly blurry and less dark area at the edges of an objects shadow

3 Likes