How can I do a Deep Reinforcement learning AI?

I recently discovered this, and from the examples I treid adapting the Genetic algorithm to what I needed (Table soccer AI) and after setting the inputs and outputs the outputs were almost the same every time. I’m probably doing something wrong.

but still is there a way to make a Deep RL ai using that library? (I didnt find any function for it)

anyway if is there a fix to the problem I’m having here’s the function for the inputs

local function CalculateInputs(CurrentRig,CurrentGoal)
	local CurrentBallInst = CurrentBall:FindFirstChild("Ball")
	local Return = {BallVelocity = 0,BallDirectionX = 0,BallDirectionZ = 0}
	
	local AllPlrs = {}
	
	local function getDisFromBall(Model:Model)
		local ModelCenter
		if Model:IsA("BasePart") then
			ModelCenter = Model.CFrame
		else
			ModelCenter = Model:GetBoundingBox()
		end
		
		local Ball = CurrentBall:WaitForChild("Ball")

		if Ball then
			local distance = (Ball.Position-ModelCenter.Position).Magnitude
			return distance
		else
			warn("no balls! heh")
		end
	end
	
	
	local function getPosFromCenter(Model:Instance)
		local center = Soccer:WaitForChild("Pavement").Position
		local ModelPos
		if Model:IsA("BasePart") then
			ModelPos = Model.Position
		elseif Model:IsA("Model") then
			ModelPos = Model:GetBoundingBox().Position
		end
		
		local dis3 = center-ModelPos
		local disX = dis3.X
		local disZ = dis3.Z
		
		local newX,newZ = 0,0
		if CurrentRig.Team.Value == "Red" then
			if center.X < ModelPos.X then
				newX = disX
			else
				newX = -disX
			end
			if center.Z < ModelPos.Z then
				newZ = disZ
			else
				newZ = -disZ
			end
		else
			if center.X > ModelPos.X then
				newX = disX
			else
				newX = -disX
			end
			if center.Z > ModelPos.Z then
				newZ = disZ
			else
				newZ = -disZ
			end
		end
		
		
		return Vector3.new(newX,0,newZ)
	end
	
	
	if CurrentRig.Team.Value == "Red" then
		for i=1,#RedPlayer do
			local plr = RedPlayer[i]
			table.insert(AllPlrs,plr)
		end
		--[[
		for i=1,#BluePlayer do
			local plr = BluePlayer[i]
			table.insert(AllPlrs,plr)
		end
		--]]
		
	elseif CurrentRig.Team.Value == "Blue" then
		for i=1,#BluePlayer do
			local plr = BluePlayer[i]
			table.insert(AllPlrs,plr)
		end
		--[[
		for i=1,#RedPlayer do
			local plr = RedPlayer[i]
			table.insert(AllPlrs,plr)
		end
		--]]
	end
	
	local CurrentBall = CurrentBall:FindFirstChild("Ball")

	local Pos1 = CurrentBall.Position
	task.wait(0.01)
	local Pos2 = CurrentBall.Position
	local vector = Pos1-Pos2
	
	local ballPos = getPosFromCenter(Ball)
	Return.BallPositionX = ballPos.X
	Return.BallPositionZ = ballPos.Z

	Return.BallVelocity = vector.Magnitude --speed

	if vector.Magnitude < .001 then
		vector = Vector3.new(0, 0, -1) -- default direction
	else
		vector = vector.unit
	end
	
	
	
	Return.BallDirectionX = vector.X --direction x
	Return.BallDirectionZ = vector.Z --direction z
	
	Return.DistFromGoal = getDisFromBall(CurrentGoal)
	
	for i=1,#AllPlrs do
		local v = AllPlrs[i]
		local Pos = getPosFromCenter(v)
		Return["Plr"..i.."_Pos_X"] = Pos.X
		Return["Plr"..i.."_Pos_Z"] = Pos.Z
	end



	return Return
end

and for the AI

local geneticSetting = {
	ScoreFunction = function(net)
		
		local score = 0
		CurrentIplr += 1

		AddBall()
		local CurrentBall = CurrentBall:WaitForChild("Ball")

		CurrentBall.Touched:Connect(function(Part)
			if Part.Name == "RedGoal" then
				score += 1
				Running.Value = false
			elseif Part.Name == "BlueGoal" then
				score += 1
				Running.Value = false
			end
		end)


		Running.Value = true



		--████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
		print(CurrentIplr)
		local CurrentRig 
		if CurrentIplr == 1 then
			CurrentRig = workspace:WaitForChild("AI1")
		else
			CurrentRig = workspace:WaitForChild("AI2")
		end

		if singlePlrMode == true then
			CurrentRig = script.CurrentRig.Value
		end



		local CurrentGoal
		local OppositeGoal
		if CurrentRig.Team.Value == "Blue" then
			CurrentGoal = Soccer:WaitForChild("BlueGoal")
			OppositeGoal = Soccer:WaitForChild("RedGoal")
		elseif CurrentRig.Team.Value == "Red" then
			CurrentGoal = Soccer:WaitForChild("RedGoal")
			OppositeGoal = Soccer:WaitForChild("BlueGoal")
		end
		local CurrentRigManage = 2
		local teamFolder = Players:WaitForChild(CurrentRig.Team.Value)

		local function Switch()
			if CurrentRigManage == 1 then
				CurrentRigManage = 2
			else
				CurrentRigManage = 1
			end
			--task.wait(0.1)
		end
		--████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
		local cor = coroutine.wrap(function()
			repeat

				local input = CalculateInputs(CurrentRig,CurrentGoal)
				local output = net(input)
				print(output)
				print(input)

				local Reward = 0

				local OldDist = (CurrentBall.Position-CurrentGoal.Position).Magnitude

				if output.DesiredControlPosition < 0 and CurrentRigManage == 2 then
					Switch()
				end
				if output.DesiredControlPosition >= 0 and CurrentRigManage == 1 then
					Switch()
				end

				if output.DesiredControlPosition < 0.5 then
					Move(CurrentRig,CurrentRigManage,"1",output.PlrMoveForce_1)
					Move(CurrentRig,CurrentRigManage,"2",output.PlrMoveForce_2)
					Reward += Rotate(CurrentRig,CurrentRigManage,"1",output.PlrRotForce_1*20)
					Reward += Rotate(CurrentRig,CurrentRigManage,"2",output.PlrRotForce_2*20)
				else
					Move(CurrentRig,CurrentRigManage,"1",output.PlrMoveForce_3)
					Move(CurrentRig,CurrentRigManage,"2",output.PlrMoveForce_4)
					Reward += Rotate(CurrentRig,CurrentRigManage,"1",output.PlrRotForce_3*20)
					Reward += Rotate(CurrentRig,CurrentRigManage,"2",output.PlrRotForce_4*20)
				end
				score += Reward




				task.wait(0.01)
				local NewDist = (CurrentBall.Position-CurrentGoal.Position).Magnitude

				score += (NewDist-OldDist)

			until Running.Value == false
		end)
		cor()
		--████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████


			if os.clock()-clock >= 0.1 then
				clock = os.clock()
				wait()
			end
		if CurrentIplr > 1 then
			repeat task.wait(0.1) until Running.Value == false
			CurrentIplr = 0
		end
		
		task.wait(0.1)
		
		score*= 30
		return score
	end;
	PostFunction = function(geneticAlgo)
		local info = geneticAlgo:GetInfo()
		print("Generation "..info.Generation..", Best Score: "..info.BestScore)
		--print("Generation "..info.Generation..", Best Score: "..info.BestScore/(100)^2*(100).."%")
	end;
}
local tempNet = FeedforwardNetwork.new(NetInputs,3,12,NetOutputs,setting)
local geneticAlgo = ParamEvo.new(tempNet,population,geneticSetting)

geneticAlgo:ProcessGenerations(generations)
local net = geneticAlgo:GetBestNetwork()

Well… You might as well use DataPredict for this.

The mathematics are already hard for everyone understand because they are catered for PhD level researchers.

thanks for the answer, but I already tried using your library but when I saw how inputs were working I just didnt know why they were like that, so maybe can you explain better how to feed some inputs to the neural network? And also when I create a neural network, how can I use the PPO for it?

Have you actually read the tutorials from the documentation? I really recommend you to read it.

You need to use two neural networks, one for the actor model and one for the critic model. Usually the actor model handles all the action outputs, so the last layer must be equal to the number of actions. Meanwhile for the critic, it needs to output one continuous value. In other words, one neuron.

Here’s a code snippet on how to setting it up.


local DataPredictLibrary = require(DataPredict)

local function buildActor()

-- Your actor neural network here

end

local function buildCritic()

-- Your critic neural network here

end

local ActorModel = buildActor()

local CriticModel = buildCritic()

local PPO = DataPredict.Models.ProximalPolicyOptimization.new()

PPO:setActorModel(ActorModel)

PPO:setCriticModel(CriticModel)

-- Once you are at this point, I recommend you to read up the documentation on how to setting up the functions for reinforcement learning models.

I watched the tutorials on the documentation but its still not very clear to me, as it was much more simple on the library I was using before. I’m actually kind of new on AI, but actually I already abandoned this project so …