Somewhat finished "RNN" model for word prediction

Since my previous post I have made some serious progress, and despite ROBLOX’s limitation, I have gotten to a point where I am happy with.

Here is some general knowledge to have before reading this:

(look into RNN’s, LSTM’s, and ML)

n_h, h_prev = hidden state
c_prev = cell state
W_fh, W_fx, b_f = weights, bias vectors
dW_fh, dW_fx, db_f = gradients

Given input: “mason”
Desired output: “walks” or “walking”

also, due to ROBLOX’s limitations I could only pass 8 tokenized sentences for training data with between 600-800 iteration at a learning rate of 0.01 before I received exhaustion errors.

local n_h = 64 
local vocab_size = 10000
local embedding_size = 50

local function sigmoid(x)
	return 1 / (1 + math.exp(-x))
end

local W_fh = {}
local W_fx = {}
local b_f = {}

for i = 1, n_h do
	W_fh[i] = {}
	W_fx[i] = {}
	for j = 1, n_h do
		W_fh[i][j] = math.random()
	end
	for j = 1, embedding_size do
		W_fx[i][j] = math.random()
	end
	b_f[i] = math.random()
end

local c_prev = {}
local h_prev = {}
for i = 1, n_h do
	c_prev[i] = 0
	h_prev[i] = 0
end

local learning_rate = 0.01
local number_of_iterations = 600

local sequence_of_inputs = {
	{"today", "mason", "went", "on", "a", "walk"},
	{"mason", "likes", "going", "on", "walks"},
	{"tim", "walked", "with", "mason", "today"},
	{"mason", "walked", "outside", "today"},
	{"mason", "walks", "and", "talks"},
	{"tim", "joins", "mason", "on", "walks"},
	{"walking", "is", "masons", "favorite"},
	{"mason", "walking", "is", "so", "cool"}
}

local word_to_index = {}
local index_to_word = {}
local index = 1
for _, sentence in ipairs(sequence_of_inputs) do
	for _, word in ipairs(sentence) do
		if not word_to_index[word] then
			word_to_index[word] = index
			index_to_word[index] = word
			index = index + 1
		end
	end
end

local word_embeddings = {}
for i = 1, vocab_size do
	word_embeddings[i] = {}
	for j = 1, embedding_size do
		word_embeddings[i][j] = math.random()
	end
end

local dW_fh = {}
local dW_fx = {}
local db_f = {}
for i = 1, n_h do
	dW_fh[i] = {}
	dW_fx[i] = {}
	for j = 1, n_h do
		dW_fh[i][j] = 0
	end
	for j = 1, embedding_size do
		dW_fx[i][j] = 0
	end
	db_f[i] = 0
end

for iteration = 1, number_of_iterations do
	
	for i = 1, n_h do
		for j = 1, n_h do
			dW_fh[i][j] = 0
		end
		for j = 1, embedding_size do
			dW_fx[i][j] = 0
		end
		db_f[i] = 0
	end

	local total_loss = 0

	for _, sentence in ipairs(sequence_of_inputs) do
		for i = 1, #sentence - 1 do

			local input_word_index = word_to_index[sentence[i]]
			local target_word_index = word_to_index[sentence[i + 1]]

			local x_t = word_embeddings[input_word_index]

			local weighted_sum = {}
			for j = 1, n_h do
				local sum = 0
				for k = 1, n_h do
					sum = sum + W_fh[j][k] * h_prev[k]
				end
				for k = 1, embedding_size do
					sum = sum + W_fx[j][k] * x_t[k]
				end
				table.insert(weighted_sum, sum + b_f[j])
			end

			local f_t = {}
			for j = 1, n_h do
				table.insert(f_t, sigmoid(weighted_sum[j]))
			end

			local target_f = {}
			for j = 1, vocab_size do
				target_f[j] = (j == target_word_index) and 1 or 0
			end
			local loss = 0
			for j = 1, n_h do
				loss = loss + (f_t[j] - target_f[j]) ^ 2
			end
			total_loss = total_loss + loss

			local delta_f_t = {}
			for j = 1, n_h do
				delta_f_t[j] = 2 * (f_t[j] - target_f[j]) * f_t[j] * (1 - f_t[j])
			end
			local delta_h_t = {}
			for j = 1, n_h do
				local sum = 0
				for k = 1, n_h do
					sum = sum + W_fh[k][j] * delta_f_t[k]
				end
				delta_h_t[j] = sum
			end

			for j = 1, n_h do
				for k = 1, n_h do
					dW_fh[j][k] = dW_fh[j][k] + delta_f_t[j] * h_prev[k]
				end
				for k = 1, embedding_size do
					dW_fx[j][k] = dW_fx[j][k] + delta_f_t[j] * x_t[k]
				end
				db_f[j] = db_f[j] + delta_f_t[j]
			end

			for j = 1, n_h do
				h_prev[j] = h_prev[j] - learning_rate * delta_h_t[j]
			end
		end
	end

	for i = 1, n_h do
		for j = 1, n_h do
			W_fh[i][j] = W_fh[i][j] - learning_rate * dW_fh[i][j]
		end
		for j = 1, embedding_size do
			W_fx[i][j] = W_fx[i][j] - learning_rate * dW_fx[i][j]
		end
		b_f[i] = b_f[i] - learning_rate * db_f[i]
	end

	if iteration % 100 == 0 then
		print("Iteration: " .. iteration .. ", Loss: " .. total_loss)
	end
end

local function predict_next_word(input_word)

	local input_word_index = word_to_index[input_word]
	if not input_word_index then
		return
	end

	local x_t = word_embeddings[input_word_index]

	local weighted_sum = {}
	for j = 1, n_h do
		local sum = 0
		for k = 1, n_h do
			sum = sum + W_fh[j][k] * h_prev[k]
		end
		for k = 1, embedding_size do
			sum = sum + W_fx[j][k] * x_t[k]
		end
		table.insert(weighted_sum, sum + b_f[j])
	end

	local f_t = {}
	for j = 1, n_h do
		table.insert(f_t, sigmoid(weighted_sum[j]))
	end

	local max_prob_index = 1
	for i = 2, vocab_size do
		if f_t[i] and f_t[i] > f_t[max_prob_index] then
			max_prob_index = i
		end
	end

	local predicted_word = index_to_word[max_prob_index]

	return predicted_word
end


local input_word = "mason"
--// goal: walks/walking
local predicted_word = predict_next_word(input_word)
print("Predicted next word for '" .. input_word .. "': " .. predicted_word)

As you can see, it is “possible” to make your own “AI” or RNN models, but it serves no purpose, due to all the limitations and since luau isn’t made for it lol.

If you wish to do anything similar, just use OpenAi with the HTTP service.

Any feedback is welcome, I am no expert so don’t talk down on me, anyways, thanks for your time, and peace.

(Note: when running at barebones, the loss can reach all the way to 0.001, rather than around 26 which is what it averages due to limitations)

3 Likes

Just remember that loss is not the only indicator as to how good your model is. In fact, a really low loss can indicate that the model is actually overfitting, which means it is less capable of doing things it has not seen before (which is the point of training).

1 Like

Honestly surprised no other sources I looked into referenced overfitting, thank you for this

1 Like