OpenML - Machine Learning

I added saving the neuron network, when I use Train() in the while task.wait() loop it work perfectly fine. But when I use Run() instead (The loaded neuron network is supposed to be already trained before) , the AI is making significantly different decisions such as high value when lava is far away, low value when lava is close. This only happened when I use Run(). I am confused if I saved/loaded the network incorrectly.

local DataStoreService = game:GetService("DataStoreService")
local ServerScriptService = game:GetService("ServerScriptService")

-- Create/Get our datastore called "Networks"
local NetworkDataStore = DataStoreService:GetDataStore("Networks")

local OpenML = require(game:GetService("ServerScriptService").OpenML)

local States = {"Distance"}
local Actions = {"Jump"} -- Removed Idle State to keep it simple. Either jump or dont jump.

local Propagator = OpenML.Algorithms.Propagator
local MyNetwork = NetworkDataStore:GetAsync("MyNetwork")
local NeuralNetwork
if MyNetwork then
	NeuralNetwork = OpenML.Resources.MLP.Decompress(MyNetwork, "ASCII")
	print(NeuralNetwork)
else
	NeuralNetwork = OpenML.Resources.MLP.new({ #States, 10, #Actions }, function() -- reduced hidden layer size to 1 instead of 2, doesnt need to be big only for simple actions
		return math.random() * 3 - 1.5 --made it initialize with negative valeus (produces better output)
	end)
	print("NEW NETWORK")
end

setmetatable(NeuralNetwork, { __index = Propagator })

local ActivationFunction = OpenML.ActivationFunctions.TanH -- Changed Activation function from ReLU to TanH
-- ReLU suffers from Dying ReLU so we changed it to TanH

local DQL = OpenML.Algorithms.DQL.new()

DQL.OnForwardPropagation = function(states) return NeuralNetwork:ForwardPropagation(states, ActivationFunction) end
DQL.OnBackPropagation = function(activations, target) return NeuralNetwork:BackPropagation(activations, target, { ActivationFunction = ActivationFunction, LearningRate = 0.01 }) end -- changed the learning rate to 0.01

function Run()
	local distance = (script.Parent:GetPivot().Position - workspace.Part.Position).Magnitude
	local state = { distance }

	local activations = NeuralNetwork:ForwardPropagation(state, ActivationFunction)
	local actions = activations[#activations]

	if actions[1] > 0.5 then -- If the first action is greater than the threshold 0.5 then it'll jump anything under then it doesnt jump
		script.Parent.Humanoid.Jump = true
	end
	
	--print(distance)
	print("Jump: "..actions[1])

	return state
end

function Train()
	local state = Run()
	local distance = state[1]

	DQL:Learn{
		State = state,
		Action = 1, -- Jump
		Reward = distance < 7 and 1 or -1, -- changed distance from 5 to 7 so it jump faster
	}
end

game:BindToClose(function()
	warn("Game is shutting down.")
	local CompressedASCII = OpenML.Resources.MLP.Compress(NeuralNetwork, "ASCII")
	NetworkDataStore:SetAsync("MyNetwork", CompressedASCII)
	warn("saved")
	-- Add your custom logic here
end)

while task.wait() do -- changed time from 0.1 to just the server framerate
	Run()
end

Here is the improvised version of your code.

All I did was change the learning rate a bit, and the compression method ā€œASCIIā€ was recently changed to ā€œIEEE754ā€ in the update logs, so i changed it to that.

local DataStoreService = game:GetService("DataStoreService")
local ServerScriptService = game:GetService("ServerScriptService")

-- Create/Get our datastore called "Networks"
local NetworkDataStore = DataStoreService:GetDataStore("Networks", "Refresh2")

local OpenML = require(game:GetService("ServerScriptService").OpenML)

local States = {"Distance"}
local Actions = {"Jump"} -- Removed Idle State to keep it simple. Either jump or dont jump.

local Propagator = OpenML.Algorithms.Propagator
local MyNetwork = NetworkDataStore:GetAsync("MyNetwork")
local NeuralNetwork
if MyNetwork then
	NeuralNetwork = OpenML.Resources.MLP.Decompress(MyNetwork, "IEEE754")
	print(NeuralNetwork)
else
	NeuralNetwork = OpenML.Resources.MLP.new({ #States, 10, #Actions }, function() -- reduced hidden layer size to 1 instead of 2, doesnt need to be big only for simple actions
		return math.random() * 3 - 1.5 --made it initialize with negative valeus (produces better output)
	end)
	print("NEW NETWORK")
end

setmetatable(NeuralNetwork, { __index = Propagator })

local ActivationFunction = OpenML.ActivationFunctions.TanH -- Changed Activation function from ReLU to TanH
-- ReLU suffers from Dying ReLU so we changed it to TanH

local DQL = OpenML.Algorithms.DQL.new()

DQL.OnForwardPropagation = function(states) return NeuralNetwork:ForwardPropagation(states, ActivationFunction) end
DQL.OnBackPropagation = function(activations, target) return NeuralNetwork:BackPropagation(activations, target, { ActivationFunction = ActivationFunction, LearningRate = 0.001 }) end -- changed the learning rate to 0.01

function Run()
	local distance = (script.Parent:GetPivot().Position - workspace.Part.Position).Magnitude
	local state = { distance }

	local activations = NeuralNetwork:ForwardPropagation(state, ActivationFunction)
	local actions = activations[#activations]

	if actions[1] > 0.5 then -- If the first action is greater than the threshold 0.5 then it'll jump anything under then it doesnt jump
		script.Parent.Humanoid.Jump = true
	end

	--print(distance)
	print("Jump: "..actions[1])

	return state
end

function Train()
	local state = Run()
	local distance = state[1]

	DQL:Learn{
		State = state,
		Action = 1, -- Jump
		Reward = distance < 7 and 1 or -1, -- changed distance from 5 to 7 so it jump faster
	}
end

game:BindToClose(function()
	warn("Game is shutting down.")
	local CompressedASCII = OpenML.Resources.MLP.Compress(NeuralNetwork, "IEEE754")
	NetworkDataStore:SetAsync("MyNetwork", CompressedASCII)
	warn("Saved")
	-- Add your custom logic here
end)

while task.wait() do -- changed time from 0.1 to just the server framerate
	Run()
end

I realized on some numbers ALP Base64 doesnā€™t work. I created new version ALP UTF-8 which uses pure UTF8 characters. The patch will be released on 1.2.3. ALP UTF-8 will have better compression
as compared to IEEE 754. Performance would be the same as regular ALP just in different format so datastores can use it.

For now use ā€œIEEE754ā€ compression until release 1.2.3

I made a rig that have 4 Move directions, Front, Back, Left, Right. It find the closest character, and determine which direction to go based on how close it is now compared to what the distance was before. It get rewarded for getting closer and punished for getting further. But the rig seems to be inaccurate, it kept going whatever direction even when the difference (LastDistance - CurrentDistance) is negative and supposed to be punished for going that direction.

local OpenML = require(game:GetService("ServerScriptService").OpenML)
local Model = script.Parent
local Humanoid = Model.Humanoid
Humanoid.AutoRotate = false

local States = {"Distance"}
local Actions = {"WalkF", "WalkB", "WalkL", "WalkR"}

local Propagator = OpenML.Algorithms.Propagator

local NeuralNetwork = OpenML.Resources.MLP.new({ #States, 10, #Actions }, function()
	return math.random() * 3 - 1.5
end)

setmetatable(NeuralNetwork, { __index = Propagator })

local ActivationFunction = OpenML.ActivationFunctions.TanH

local DQL = OpenML.Algorithms.DQL.new()

DQL.OnForwardPropagation = function(states) return NeuralNetwork:ForwardPropagation(states, ActivationFunction) end
DQL.OnBackPropagation = function(activations, target) return NeuralNetwork:BackPropagation(activations, target, { ActivationFunction = ActivationFunction, LearningRate = 0.01 }) end

function FindClosestChar()
	local MaxDist = math.huge
	local Char
	for i, v in pairs(workspace:GetChildren()) do
		if v:IsA("Model") then
			if v ~= Model then
				local Humanoid = v:FindFirstChildOfClass("Humanoid")
				if Humanoid then
					if Humanoid.Health > 0 then
						local Dist = (v:GetPivot().Position - Model:GetPivot().Position).Magnitude
						if Dist < MaxDist then
							MaxDist = Dist
							Char = v
						end
					end
				end
			end
		end
	end
	return Char.PrimaryPart, MaxDist
end

function findMaxValue(t)
	local max_value = -math.huge -- Start with the smallest possible number
	local max_value_key

	for k, v in pairs(t) do
		if v > max_value then
			max_value = v
			max_value_key = k
		end
	end

	return max_value_key, max_value
end

local Whatever, LastDistance = FindClosestChar()
local Difference
local BestAction, ActionValue

function Run()
	local RootPart, Distance = FindClosestChar()
	Difference = LastDistance - Distance
	print("Difference"..Difference)
	LastDistance = Distance
	States = {Distance}
	local Activations = NeuralNetwork:ForwardPropagation(States, ActivationFunction)
	Actions = Activations[#Activations]
	
	BestAction, ActionValue = findMaxValue(Actions)
	print(BestAction)
	
	if 1 == BestAction then
		Humanoid:Move(Model:GetPivot().LookVector)
	end
	if 2 == BestAction then
		Humanoid:Move((Model:GetPivot() * CFrame.Angles(0, -90, 0)).LookVector) -- TurnLeft
	end
	if 3 == BestAction then
		Humanoid:Move((Model:GetPivot() * CFrame.Angles(0, 90, 0)).LookVector) -- TurnRight
	end
	if 4 == BestAction then
		Humanoid:Move((Model:GetPivot() * CFrame.Angles(0, 180, 0)).LookVector) -- TurnBack
	end
	
	return States
end

function Check()
	if Difference > 0 then
		return 1
	else
		return -1
	end
end

function Train()
	States = Run()
	local Distance = States[1]
	DQL:Learn{
		State = States,
		Action = BestAction,
		Reward = Check()
	}
	
	print("Front"..Actions[1])
	print("Back"..Actions[2])
	print("Left"..Actions[3])
	print("Right"..Actions[4])
end

while task.wait() do
	Train()
end

You only have one input {"Distance"} that could mean anything. you need to give it the direction to the enemy or else the AI doesnā€™t know where the enemy is and where to go. Ex: if I told you thereā€™s a person near you within 150 and told you to go to them. You donā€™t know where they are, so what do you do? You donā€™t do anything because you donā€™t have enough information. You need to give it more information like direction to the enemy. {"DirectionX", "DirectionZ"} ā€“ both of those inputs would be numbers between -1 and 1.

And also the way you have set up. They can find a way to not do what you want it to do while getting rewards. I Changed it so now it will go to where it gets the rewards, but now It found a way to cheat and get rewards without doing whats intended.

You need a better rewarding system. Maybe check if that was the best option out of them all? Did that option get you closer to the target than the others? Thatā€™s what you should be modifying it to. You can add that to this. Just change the way it rewards and it should work.

local OpenML = require(game:GetService("ServerScriptService").OpenML)
local Model = script.Parent
local Humanoid = Model.Humanoid
Humanoid.AutoRotate = false

local States = {"DirectionX", "DirectionZ"}
local Actions = {"WalkF", "WalkB", "WalkL", "WalkR"}

local Propagator = OpenML.Algorithms.Propagator

local NeuralNetwork = OpenML.Resources.MLP.new({ #States, 10, #Actions }, function()
	return math.random() * 3 - 1.5
end)

setmetatable(NeuralNetwork, { __index = Propagator })

local ActivationFunction = OpenML.ActivationFunctions.TanH

local DQL = OpenML.Algorithms.DQL.new()

DQL.OnForwardPropagation = function(states) return NeuralNetwork:ForwardPropagation(states, ActivationFunction) end
DQL.OnBackPropagation = function(activations, target) return NeuralNetwork:BackPropagation(activations, target, { ActivationFunction = ActivationFunction, LearningRate = 0.001 }) end

function FindClosestChar()
	local MaxDist = math.huge
	local Char
	for i, v in pairs(workspace:GetChildren()) do
		if v:IsA("Model") then
			if v ~= Model then
				local Humanoid = v:FindFirstChildOfClass("Humanoid")
				if Humanoid then
					if Humanoid.Health > 0 then
						local Dist = (v:GetPivot().Position - Model:GetPivot().Position).Magnitude
						if Dist < MaxDist then
							MaxDist = Dist
							Char = v
						end
					end
				end
			end
		end
	end
	return Char.PrimaryPart, MaxDist
end

function findMaxValue(t)
	local max_value = -math.huge -- Start with the smallest possible number
	local max_value_key

	for k, v in pairs(t) do
		if v > max_value then
			max_value = v
			max_value_key = k
		end
	end

	return max_value_key, max_value
end

local Whatever, LastDistance = FindClosestChar()
local Difference
local BestAction, ActionValue

local epsillon = 2 -- exploration
local epsillonDecay = 0.99
local epsillonUseDecay = 0.99

function Run()
	local RootPart, Distance = FindClosestChar()
	Difference = LastDistance - Distance
	--print("Difference"..Difference)
	LastDistance = Distance
	local direction = (RootPart.Position - Model.HumanoidRootPart.Position).Unit
	States = {direction.X, direction.Z}
	local Activations = NeuralNetwork:ForwardPropagation(States, ActivationFunction)
	Actions = Activations[#Activations]

	BestAction, ActionValue = findMaxValue(Actions)
	if epsillon > math.random() then
		BestAction = math.random(#Actions)
		ActionValue = Actions[BestAction]
		epsillon *= epsillonUseDecay
	end
	--print("SELECTED: ", BestAction)

	if 1 == BestAction then
		Humanoid:Move(Model:GetPivot().LookVector)
	end
	if 2 == BestAction then
		Humanoid:Move((Model:GetPivot() * CFrame.Angles(0, -90, 0)).LookVector) -- TurnLeft
	end
	if 3 == BestAction then
		Humanoid:Move((Model:GetPivot() * CFrame.Angles(0, 90, 0)).LookVector) -- TurnRight
	end
	if 4 == BestAction then
		Humanoid:Move((Model:GetPivot() * CFrame.Angles(0, 180, 0)).LookVector) -- TurnBack
	end

	return States
end

function Check()
	if Difference > 0 then
		print("Rewarded")
		epsillon *= epsillonDecay
		return 1
	else
		return -1
	end
end

function Train()
	States = Run()
	local Distance = States[1]
	DQL:Learn{
		State = States,
		Action = BestAction,
		Reward = Check()
	}
	
	--[[print("------------")
	print("Front "..Actions[1])
	print("Back "..Actions[2])
	print("Left "..Actions[3])
	print("Right "..Actions[4])]]
end

while task.wait() do
	Train()
end
1 Like