I made a lexer, which generates tokens with their types and values based on what character/word it is. It seems to be reading identifiers and numbers, but not parenthesis.
Lexer
local token_types = {
NUMBER = "NUMBER",
STRING = "STRING",
BOOLEAN = "BOOLEAN",
IDENTIFIER = "IDENTIFIER",
KEYWORD = "KEYWORD",
LPAREN = "LPAREN",
RPAREN = "RPAREN",
EQ = "EQ"
}
local lexer = {}
lexer.__index = lexer
function lexer:advance()
if (self.pos == string.len(self.text)) then
self.char = nil
return
end
self.pos += 1
self.char = string.sub(self.text, self.pos, self.pos)
end
function lexer:make_identifier()
local result = ""
for _=1,string.len(self.text) do
if string.match(self.char, "%a") then
result ..= self.char
self:advance()
else
break
end
end
if result == "let" or result == "println" then
return {token_types.KEYWORD, result}
elseif result == "true" or result == "false" then
return {token_types.BOOLEAN, result}
else
return {token_types.IDENTIFIER, result}
end
end
function lexer:make_number()
local result = ""
for _=1,string.len(self.text) do
if table.find({"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}, self.char) then
result ..= self.char
self:advance()
else
break
end
end
return {token_types.NUMBER, result}
end
function lexer.new(source : string)
local self = setmetatable({}, {__index=lexer})
self.text = source
self.pos = 0
self:advance()
return self
end
function lexer:generate_tokens()
local tokens = {}
while self.char ~= nil do
if table.find({"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}, self.char) then
table.insert(tokens, self:make_number())
self:advance()
elseif string.match(self.char, "%a") then
table.insert(tokens, self:make_identifier())
self:advance()
elseif self.char == "(" then
table.insert(tokens, {token_types.LPAREN, self.char})
self:advance()
elseif self.char == ")" then
table.insert(tokens, {token_types.RPAREN, self.char})
self:advance()
else
return nil, "Error: unknown character '"..self.char.."'"
end
end
return tokens, nil
end
return lexer
The text I’m giving it is println(12)
, it gives the println
and 12
, no parenthesis.