Neural Network printing same output

I am making a number guessing neural network, the training is i suppose correct. But i put 2 feedforward with 2 random numbers to guess and it guesses the same number with the same accuracy. (Every number has 9 data images which makes every number possible to be trained.

The issue is the same output being printed every time no matter the input (I checked the input and it does change)

Code:

Matrix = {}
Matrix.__index = Matrix

function Matrix.new(rows, cols)
	local self = setmetatable({}, Matrix)
	self.rows = rows
	self.cols = cols
	self.data = {}
	for i = 1, rows do
		self.data[i] = {}
		for j = 1, cols do
			self.data[i][j] = math.random() * 2 - 1 -- initialize with random weights
		end
	end
	return self
end

function Matrix.multiply(a, b)
	if a.cols ~= b.rows then
		error("Columns of A must match rows of B")
	end
	local result = Matrix.new(a.rows, b.cols)
	for i = 1, result.rows do
		for j = 1, result.cols do
			local sum = 0
			for k = 1, a.cols do
				sum = sum + a.data[i][k] * b.data[k][j]
			end
			result.data[i][j] = sum
		end
	end
	return result
end

function Matrix.add(a, b)
	local result = Matrix.new(a.rows, a.cols)
	for i = 1, result.rows do
		for j = 1, result.cols do
			result.data[i][j] = a.data[i][j] + b.data[i][j]
		end
	end
	return result
end

function Matrix.applyFunction(a, func)
	for i = 1, a.rows do
		for j = 1, a.cols do
			a.data[i][j] = func(a.data[i][j])
		end
	end
end

function Matrix.transpose(a)
	local result = Matrix.new(a.cols, a.rows)
	for i = 1, a.rows do
		for j = 1, a.cols do
			result.data[j][i] = a.data[i][j]
		end
	end
	return result
end

function Matrix.subtract(a, b)
	local result = Matrix.new(a.rows, a.cols)
	for i = 1, result.rows do
		for j = 1, result.cols do
			result.data[i][j] = a.data[i][j] - b.data[i][j]
		end
	end
	return result
end

function Matrix.copy(matrix)
	local result = Matrix.new(matrix.rows, matrix.cols)
	for i = 1, matrix.rows do
		for j = 1, matrix.cols do
			result.data[i][j] = matrix.data[i][j]
		end
	end
	return result
end

function Matrix.randomize(a)
	for i = 1, a.rows do
		for j = 1, a.cols do
			a.data[i][j] = math.random() * 2 - 1
		end
	end
end

function Matrix.scale(a, scalar)
	for i = 1, a.rows do
		for j = 1, a.cols do
			a.data[i][j] = a.data[i][j] * scalar
		end
	end
end

NeuralNetwork = {}
NeuralNetwork.__index = NeuralNetwork

function NeuralNetwork.new(trainingData, inputNodes, hiddenNodes, outputNodes, hiddenLayers)
	local self = setmetatable({}, NeuralNetwork)

	if trainingData then
		self.inputNodes = trainingData.inputNodes
		self.hiddenNodes = trainingData.hiddenNodes
		self.outputNodes = trainingData.outputNodes
		self.hiddenLayers = trainingData.hiddenLayers or 1
		self.learningRate = trainingData.learningRate

		self.weights = trainingData.weights
		self.biases = trainingData.biases

		return self
	end

	self.inputNodes = inputNodes
	self.hiddenNodes = hiddenNodes
	self.outputNodes = outputNodes
	self.hiddenLayers = hiddenLayers or 1
	self.learningRate = 0.2

	self.weights = {}
	self.biases = {}

	local function initWeights(rows, cols)
		local matrix = Matrix.new(rows, cols)
		for i = 1, rows do
			for j = 1, cols do
				matrix.data[i][j] = math.random() * 2 - 1 -- Random values between -1 and 1
			end
		end
		return matrix
	end

	self.weights[1] = initWeights(self.hiddenNodes, self.inputNodes)
	self.biases[1] = Matrix.new(self.hiddenNodes, 1)

	for i = 2, self.hiddenLayers do
		self.weights[i] = initWeights(self.hiddenNodes, self.hiddenNodes)
		self.biases[i] = Matrix.new(self.hiddenNodes, 1)
	end

	self.weights[self.hiddenLayers + 1] = initWeights(self.outputNodes, self.hiddenNodes)
	self.biases[self.hiddenLayers + 1] = Matrix.new(self.outputNodes, 1)

	return self
end

function NeuralNetwork.feedforward(self, inputArray)
	local inputs = Matrix.new(#inputArray, 1)
	for i = 1, #inputArray do
		inputs.data[i][1] = inputArray[i]
	end

	self.hiddenOutputs = {}

	self.hiddenOutputs[1] = Matrix.multiply(self.weights[1], inputs)
	self.hiddenOutputs[1] = Matrix.add(self.hiddenOutputs[1], self.biases[1])
	Matrix.applyFunction(self.hiddenOutputs[1], function(x) return 1 / (1 + math.exp(-x)) end) -- Sigmoid activation

	print("Hidden Layer 1 Outputs: ▼")
	for i, v in ipairs(self.hiddenOutputs[1].data) do
		print(string.format("[%d] = %f", i, v[1]))
	end

	for i = 2, self.hiddenLayers do
		self.hiddenOutputs[i] = Matrix.multiply(self.weights[i], self.hiddenOutputs[i-1])
		self.hiddenOutputs[i] = Matrix.add(self.hiddenOutputs[i], self.biases[i])
		Matrix.applyFunction(self.hiddenOutputs[i], function(x) return 1 / (1 + math.exp(-x)) end) -- Sigmoid activation

		print(string.format("Hidden Layer %d Outputs: ▼", i))
		for i, v in ipairs(self.hiddenOutputs[i].data) do
			print(string.format("[%d] = %f", i, v[1]))
		end
	end

	local outputs = Matrix.multiply(self.weights[self.hiddenLayers + 1], self.hiddenOutputs[self.hiddenLayers])
	outputs = Matrix.add(outputs, self.biases[self.hiddenLayers + 1])
	Matrix.applyFunction(outputs, function(x) return math.exp(x) end) -- Softmax pre-processing

	for i, v in ipairs(outputs.data) do
		print(string.format("[%d] = %f", i, v[1]))
	end

	local sum = 0
	for i = 1, outputs.rows do
		sum = sum + outputs.data[i][1]
	end
	Matrix.applyFunction(outputs, function(x) return x / sum end)

	return outputs
end

function NeuralNetwork.train(self, batchInputs, batchTargets, batchSize)
	for b = 1, batchSize do
		local inputs = batchInputs[b]
		local targets = batchTargets[b]

		local outputs = self:feedforward(inputs)

		local targetMatrix = Matrix.new(#targets, 1)
		for i = 1, #targets do
			targetMatrix.data[i][1] = targets[i]
		end
		local outputErrors = Matrix.subtract(targetMatrix, outputs)

		local gradients = Matrix.copy(outputErrors)
		Matrix.applyFunction(gradients, function(x) return x * (1 - x) end) -- Derivative of sigmoid
		Matrix.scale(gradients, self.learningRate)

		print("Gradients: ▼")
		for i, v in ipairs(gradients.data) do
			print(string.format("[%d] = %f", i, v[1]))
		end

		local deltaWeights = Matrix.multiply(gradients, Matrix.transpose(self.hiddenOutputs[self.hiddenLayers]))
		self.weights[self.hiddenLayers + 1] = Matrix.add(self.weights[self.hiddenLayers + 1], deltaWeights)
		self.biases[self.hiddenLayers + 1] = Matrix.add(self.biases[self.hiddenLayers + 1], gradients)

		local error = Matrix.copy(outputErrors)
		for i = self.hiddenLayers, 1, -1 do
			local hiddenErrors = Matrix.multiply(Matrix.transpose(self.weights[i + 1]), error)
			local hiddenGradients = Matrix.copy(hiddenErrors)
			Matrix.applyFunction(hiddenGradients, function(x) return x * (1 - x) end) -- Derivative of sigmoid
			Matrix.scale(hiddenGradients, self.learningRate)

			print(string.format("Hidden Layer %d Gradients: ▼", i))
			for i, v in ipairs(hiddenGradients.data) do
				print(string.format("[%d] = %f", i, v[1]))
			end

			local deltaHiddenWeights = Matrix.multiply(hiddenGradients, Matrix.transpose(self.hiddenOutputs[i - 1]))
			self.weights[i] = Matrix.add(self.weights[i], deltaHiddenWeights)
			self.biases[i] = Matrix.add(self.biases[i], hiddenGradients)

			error = hiddenErrors
		end
	end
end

function NeuralNetwork:getData()
	local data = {
		weights = {},
		biases = {},
		inputNodes = self.inputNodes,
		hiddenNodes = self.hiddenNodes,
		outputNodes = self.outputNodes,
		hiddenLayers = self.hiddenLayers,
		learningRate = self.learningRate
	}

	for i = 1, #self.weights do
		table.insert(data.weights, self.weights[i])
		table.insert(data.biases, self.biases[i])
	end

	return data
end

function flattenTable(inputTable)
	local flatTable = {}
	for i = 1, #inputTable do
		for j = 1, #inputTable[i] do
			table.insert(flatTable, inputTable[i][j])
		end
	end
	return flatTable
end

local nn = NeuralNetwork.new(trainingData, 28 * 28, 128, 10, 2) -- Updated hiddenNodes to 128

function findBest(data)
	local bestNumber, bestValue = 0, 0
	for i, v in pairs(data.data) do
		v = data.data[i][1]
		if v > bestValue then
			bestNumber = tonumber(i) - 1
			bestValue = v
		end
	end
	return bestNumber, math.floor(bestValue * 100)
end

local function trainNeuralNetwork()
	for j = 1, 500 do
		for epoch = 1, 100 do
			local batchInputs, batchTargets = {}, {}
			for b = 1, 10 do
				local targetNumber = math.random(0, 9)
				local random = tostring(math.random(1, 8))
				local inputData = flattenTable(sort[tostring(targetNumber) .. random])

				table.insert(batchInputs, inputData)

				local targetData = {}
				for i = 1, 10 do
					targetData[i] = (i - 1 == targetNumber) and 1 or 0
				end
				table.insert(batchTargets, targetData)
			end

			nn:train(batchInputs, batchTargets, 10)
		end

		print("------------------")
		local testNumber = math.random(0, 9)
		local testRandom = tostring(math.random(1, 8))
		local testInput = flattenTable(sort[tostring(testNumber) .. testRandom])
		local testGuess = nn:feedforward(testInput)
		print("Test Predicted number:", findBest(testGuess))
		print("Actual number:", testNumber)

		local testNumber = math.random(0, 9)
		local testRandom = tostring(math.random(1, 8))
		local testInput = flattenTable(sort[tostring(testNumber) .. testRandom])
		local testGuess = nn:feedforward(testInput)
		print("Test Predicted number:", findBest(testGuess))
		print("Actual number:", testNumber)
		print("------------------")

		task.wait()
	end

	local data = nn:getData()
	saveTrainingData(data, "key")
end

trainNeuralNetwork()