Parallel luau isn't parallelin' (need help with evenly distributing workload into all of the threads)

I have a 3d particle emitter script
My goal is to really aggresively optimize it as much as I can (while it’s still visible, so not including things like culling and render distance), just because I’m bored

rn I’m using parallel luau, so that I can distribute the workload for calculating the cframes of each part

if i call :SendMessage() for each part, it will cause frame drops due to it taking up time during serial execution

my solution was to send these parts in sets of 12 tables (1 for each actor)
problem is, the workload in parallel execution stops being evenly distributed if i do that

so my question is: how do i make it so that i can curb the time SendMessage takes (like in pic 2), while also making sure the workload for calculations is even (like in pic 1)?

this is the code (the part that matters, at least)

local handlers: {Actor} = new_table(12) -- the workers that do the calculations in parallel
if runServ:IsServer() then
	for i = 1, 12 do
		local new = script.Server:Clone()
		new.Name = `3DParticles_Worker`
		new.PartHandle.Enabled = true
		new.Parent = servStorage
		handlers[i] = new
	end
else
	for i = 1, 12 do
		local new = script.Client:Clone()
		new.Name = `3DParticles_Worker`
		new.PartHandle.Enabled = true
		new.Parent = replStorage
		handlers[i] = new
	end
end

-- task.spawn but i recycle threads bc it's more performant
local free_thread: thread?
local function run_in_thread<T>(func: (...T) -> (), ...: T): ()
	local hold = free_thread :: thread
	free_thread = nil
	func(...)
	free_thread = hold
end

local function new_thread(): ()
	free_thread = coroutine.running()
	while true do run_in_thread(coroutine.yield()) end
end

local part_batches: {{BasePart}} = table.create(12, {}) -- tables for sending to actors
local prog_batches: {{number}} = table.create(12, {})
local iter_batches: {{number}} = table.create(12, {})
runServ.Heartbeat:Connect(function(deltaTime: number)
	for _, self in module.Emitters do
		local cache = self.Cache
		local props = self.Properties
		local rate = 1 / props.Rate
		local parts = self.Parts
		local lifetime = self.Properties.Lifetime
		local timeElapsed = self.TimeElapsed
		
		if self.Enabled then
			while true do
				if timeElapsed < rate then break end
				timeElapsed = timeElapsed - rate
				do_particle(self)
			end
			
			self.TimeElapsed = timeElapsed + deltaTime
		end
		
		local i = 0
		
		for part, progress in parts do			
			if progress[1] >= 1 then
				cache:ReturnPart(part)
				parts[part] = nil
				local id = part_uids[part]
				if id then
					cframes_list[id] = nil -- SharedTables, don't worry about these for now
					random_size_list[id] = nil
					random_pos_list[id] = nil
					random_rot_list[id] = nil
				end
				
				part_uids[part] = nil -- roblox let us use UniqueId when :sob:
				part:SetAttribute("3DParticle_ID", nil)
				continue
			end

			i = i + 1
			if i > 12 then i = 1 end
			
			if type(props.ConstantFunction) == "function" then
				if not free_thread then task.spawn(new_thread) end
				task.spawn(free_thread :: thread, props.ConstantFunction, part)
			end
			
			progress[2] += 1
			insert_table(part_batches[i], part) -- putting parts and their "progress" in tables
			insert_table(prog_batches[i], progress[1])
			insert_table(iter_batches[i], progress[2])--]]
			--handlers[i]:SendMessage("HandlePart", props, part, progress[1], progress[2])
			
			local new_progress = progress[1] + (1 / rand:NextNumber(lifetime.Min, lifetime.Max)) * deltaTime
			progress[1] = new_progress
			if new_progress >= 0.5 then
				self.HalfLife:Fire() -- hmm i wonder where this idea came from
			end
		end
		
		
		for index = 1, 12 do
			-- this *should* in theory send one message to each actor
			handlers[index]:SendMessage("HandlePart", props, part_batches[index], prog_batches[index], iter_batches[index])
			table.clear(part_batches[index])
			table.clear(prog_batches[index])
			table.clear(iter_batches[index])
		end--]]
	end
end)

(i think it’s cool that i can handle over 255 parts a second and still have 50+ fps on a laptop with subpar specs, but i just feel like it could be much better)

2 Likes

what i have tried:

  • increasing the number of tables
  • decreasing the number of tables
  • checking for literally any post related to :SendMessage()

as of right now, i’m still unable to find any clues as to how to fix this
i’m really certain that if i find a fix, my fps can almost double