This regex is not based on some standard, some patterns might works differently
I made a regex library for lua, think It could be useful for developers. The module WIP, not all features are implemented yet, but the basics like:
- sequence
/[abcd]/
- group
/(var)?(let)?/
- alternative
/(yellow)|(green)/
How to use
local gex = require(...) -- path to gex module
local yellowOrGreenRegex = gex.compile("(yellow)|(green)")
print(yellowOrGreenRegex:find("yellow and green colors", 1, true))
print(yellowOrGreenRegex:match("yellow and green colors"))
local vowels = gex.compile("[aeuio]*")
print(vowels:find("yellow and green colors", 1, true))
Code to Test
local function testMatch(test: string, regex: string, match: string, expect: string, expectFound: boolean)
local r = gex.compile(regex)
local f, a, b = r:match(match)
local c = match:sub(a, b)
if f == expectFound and c == expect then
print(("[MATCH][%s] Successful Found: %s Expected: %s Regex: /%s/ Got: \"%s\" Expected: \"%s\""):format(test, tostring(f), tostring(expectFound), regex, c, expect))
else
warn(("[MATCH][%s] Failed Found: %s Expected: %s Regex: /%s/ Got: \"%s\" Expected: \"%s\""):format(test, tostring(f), tostring(expectFound), regex, c, expect))
end
end
local function generateFakeEmails(count: number): {string}
local validsymbols = "qwertyuiopasdfghjklzxcvbnm"
local validsymbolsL = validsymbols:len()
local fakeEmails = {}
for i = 1, count do
local email = ""
for _ = 1, math.random(5, 20) do
local r = math.random(1, validsymbolsL)
email ..= validsymbols:sub(r, r)
end
if math.random(1, 2) == 1 then
email ..= "@"
end
for _ = 1, math.random(5, 10) do
local r = math.random(1, validsymbolsL)
email ..= validsymbols:sub(r, r)
end
if math.random(1, 2) == 1 then
email ..= "."
end
for _ = 1, math.random(5, 8) do
local r = math.random(1, validsymbolsL)
email ..= validsymbols:sub(r, r)
end
table.insert(fakeEmails, email)
end
return fakeEmails
end
local function runTests()
testMatch("exact", "a", "a", "a", true)
testMatch("exact", "ab", "ab", "ab", true)
testMatch("one or more", "a+", "aaaa", "aaaa", true)
testMatch("multiple quanitities", "a+b", "aaaabbb", "aaaab", true)
testMatch("optional", "a?", "b", "", false)
testMatch("optional", "a?", "aaaa", "a", true)
testMatch("zero or more", "a*", "", "", false)
testMatch("zero or more", "a*", "aaaaaaaaaaaa", "aaaaaaaaaaaa", true)
testMatch("alphanumeric", "\\w+", "wo3rd50", "wo3rd50", true)
testMatch("digit", "\\d+", "165xyz", "165", true)
testMatch("alphabetic", "\\a+", "word100", "word", true)
testMatch("wildcard", "\\.+", "d293 *&($# HJDS nckd 127", "d293 *&($# HJDS nckd 127", true)
testMatch("alternative", "a+|b+", "a", "a", true)
testMatch("alternative", "a+|b+", "b", "b", true)
testMatch("email", "\\w+@\\w+.\\w+", "fakeemail@fake.domen", "fakeemail@fake.domen", true)
testMatch("email", "\\w+@\\w+.\\w+", "fakeemail@fake", "fakeemail@fake", false)
testMatch("group", "(var)|(let)", "var let be that", "var", true)
testMatch("group", "(var)|(let)", "let var be this", "let", true)
testMatch("sequence", "[abcd]", "a", "a", true)
testMatch("sequence", "[abcd]", "b", "b", true)
testMatch("sequence", "[abcd]", "c", "c", true)
testMatch("sequence", "[abcd]", "d", "d", true)
testMatch("sequence(find `f`)", "[abcd]", "f", "", false)
testMatch("range(3)", "\\d{3}", "10000", "100", true)
testMatch("range(2, 5)", "\\d{2, 5}", "", "", false)
testMatch("range(2, 5)", "\\d{2, 5}", "1234567890", "12345", true)
testMatch("range(2,)", "\\d{2,}", "100", "100", true)
testMatch("start and end line", "\\^\\w+\\$", "hello world", "hello", true)
end
local function speedTest()
local function speedTestFakeEmails(emails: number)
local fakeEmails = generateFakeEmails(emails)
local fakeEmailsString = table.concat(fakeEmails, ",\n")
local start = tick()
local ValidEmails = gex.compile("%w+@%w+.%w+"):find(fakeEmailsString, 1, true)
print(("tooks: %fs for calculate string length of %i; %s"):format(tick() - start, fakeEmailsString:len(), ("%i%% valid emails of %i"):format(#ValidEmails / #fakeEmails * 100, #fakeEmails)))
end
speedTestFakeEmails(1)
speedTestFakeEmails(5)
speedTestFakeEmails(10)
speedTestFakeEmails(100)
speedTestFakeEmails(1000)
speedTestFakeEmails(2000)
speedTestFakeEmails(5000)
end
runTests()
speedTest()
Methods
-
gex.compile(regex: string): Gex
-
gex:find(text: string, init: number?, multiple: boolean?): {{value: string, start: number, finish: number}}
-
gex:match(text: string, init: number?, multiple: boolean?): {value: string, start: number, finish: number}