I know it has been a while but I figure I might make this.
Right to the explaining!
PARSER
Html is render this text by reading the info type, we don’t need a lexer.
Instead we need a complier that complies the html to readable tables.
That’s were the parser comes in, it complies the Html text to tables.
Also blokav made 99.87% of it.
Whitespace remover.
function removeWhiteSpace(source)
local newText = ""
local char, before, after
--print(#source)
for i = 1, #source do
char = string.sub(source, i, i)
if (char == "\\" and (string.sub(source, i - 1, i - 1) == "<" or string.sub(source, i + 1, i + 1) == "/")) then
else
newText = newText..char
end
end
source = newText
newText = ""
for i = 1, #source do
char = string.sub(source, i, i)
--if (char == " " or char == "\n" or char == " ") then
if (whitespace(char)) then
before = string.sub(source, i - 1, i - 1)
after = string.sub(source, i + 1, i + 1)
--if (not (before == ">" or before == " " or before == "\n" or before == " " or after == "<" or after == " " or after == "\n" or after == " ")) then
if (not (whitespace(before) or whitespace(after) or before == ">" or after == "<")) then
newText = newText..char
end
else
newText = newText..char
end
end
return newText
end
The name says it all.
Since whitespace in Html is not sensitive unlike Makefile, we need a whitespace remover which the function does.
It just checks for any whitespace and sub it by starting from the first letter which is not " " to the end.
Set up.
function setUp(source1)
local source = removeWhiteSpace(source1)
local segments = {}
local start = 1
local scan = false
local char, word, x, y
for i = 1, #source do
char = string.sub(source, i, i)
if (not scan) then
if (char == "<") then
scan = true
if (start ~= i) then
word = string.sub(source, start, i - 1)
while (string.find(word, " ", 1, true)) do
x, y = string.find(word, " ", 1, true)
word = string.sub(word, 1, x - 1).." "..string.sub(word, y + 1)
end
table.insert(segments, word)
end
start = i
end
else
if (char == ">") then
scan = false
table.insert(segments, string.lower(string.sub(source, start, i)))
start = i + 1
end
end
end
return segments
end
First it removes the whitespace.
Then loops the source to separate text into its own table.
After puts it in a table.
And at last returns the big table.
getTagInfo.
function getTagInfo(tag)
local info = {
["attr"] = {},
["content"] = {},
["fulltext"] = tag
}
if (string.sub(tag, 1, 2) == "</") then
info["close"] = true
local char
for i = 3, #tag do
char = string.sub(tag, i, i)
--if (char == " " or char == ">") then
if (whitespace(char) or char == ">") then
info["type"] = string.sub(tag, 3, i - 1)
break
end
end
else
info["close"] = false
local char, found, status, name, mark
for i = 2, #tag do
char = string.sub(tag, i, i)
if (not found) then
--if (char == " " or char == ">") then
if (whitespace(char) or char == ">") then
found = true
status = "name"
name = ""
info["type"] = string.sub(tag, 2, i - 1)
end
else
if (status == "name") then
if (char == "=") then
status = "value1"
--elseif (char ~= " ") then
elseif (not whitespace(char)) then
name = name..char
end
elseif (status == "value1") then
if (char == '"') then
status = "value2"
mark = i
elseif (char == "'") then
status = "value3"
mark = i
end
elseif (status == "value2") then
if (char == '"') then
info["attr"][name] = string.sub(tag, mark + 1, i - 1)
status = "name"
name = ""
end
elseif (status == "value3") then
if (char == "'") then
info["attr"][name] = string.sub(tag, mark + 1, i - 1)
status = "name"
name = ""
end
end
end
end
end
return info
end
Returns tag info. Not much to say.
Later I’m gonna put stuff here. This is just a placeholder.
whitespace.
function whitespace(char)
local b = string.byte(char)
return (b == 9 or b == 10 or b == 32)
end
Checks for any whitespace.
If its a whitespace character then return true, if not then return false.
isTag.
function isTag(segment)
local b = (string.sub(segment, 1, 1) == "<" and string.sub(segment, #segment) == ">" and string.sub(segment, 1, 2) ~= "<!")
if (not b) then
return false
end
local test
for tag, _ in pairs(tags) do
test = string.sub(segment, 1, 2 + #tag)
if (test == "<"..tag..">" or test == "<"..tag.." ") then
return true
end
test = string.sub(segment, 1, 3 + #tag)
if (test == "</"..tag..">") then
return true
end
end
for tag, _ in pairs(singletons) do
test = string.sub(segment, 1, 2 + #tag)
if (test == "<"..tag..">" or test == "<"..tag.." ") then
return true
end
end
return false
end
Checks if theres < or > around it. If yes then true, if not then false.
getContext(I made this one!)
function getContext(tag)
local split = tag:split(" ")
local tagsT = {}
for index,v in pairs(split) do
if index == #split then
v = v:sub(1,#v-1)
end
if not tags[v] then
if v:find("=") then
local sub = v:find("=")
v = v:sub(1, sub - 1)
tagsT[v] = v
end
end
end
return tagsT
end
A unused function.
It failed.
Try’s to find a “=” then forwards to the next number.
Get the string with " " "and returns it in a table.
t.Parse(I also made this one!)
function t.Parse(file)
local segments = setUp(file)
local tab = {}
for _,v in pairs(segments) do
table.insert(tab,{(isTag(v) and getTagInfo(v)["type"] or "text"), v, getContext(v)})
end
return tab
end
Returns a table with isTag, getTagInfo and getContext(ununsed).
Thanks for take time to pass by and see you in the next post!