I’ve seen alot of people use unicode characters (eg. Ⅰ ⅼIlו ן ⵏ Ι І Ӏ🅸
, all of those are just for ‘I’) to send bypassed messages and roblox’s filter hasn’t been effective in dealing with this even after months of people reporting about it.
I made this script to convert almost all characters that could be used to bypass the filter into the regular equivalent
local HTTP = game:GetService("HttpService")
local confusables = {
["【"] = "[",
["】"] = "]"
}
task.spawn(function()
local data
local tries = 0
repeat
tries += 1
local success, result = pcall(HTTP.GetAsync, HTTP, "https://www.unicode.org/Public/security/latest/confusablesSummary.txt")
if success then data = result
else
warn("[ERROR] Couldn't get unicode confusable data:", result)
task.wait(4)
end
until (data or tries > 5)
if not data then return end
for _, line in next, string.split(data, "\n\n#"), 1 do
local newline = string.find(line, "\n")
local split = string.split(string.sub(line, 2, newline - 1), " ")
local basecharacter
for i, char in split do
if i == 1 then
if utf8.len(char) ~= string.len(char) or (not string.match(char, "[%w/\\%[%]%(%){}]")) then break end -- exclude the base characters which are symbols or containing unicode
basecharacter = char
elseif utf8.len(char) == 1 and string.len(char) > 1 then -- exclude unicode with multiple characters, and regular characters
confusables[char] = basecharacter
end
end
end
local exemptions = {
"АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя", -- remove cyrillic characters from filter
}
for _, characters in exemptions do
local start = 1
while true do
local charstart, charend = string.find(characters, utf8.charpattern, start)
if charstart then
start = charend + 1
confusables[string.sub(characters, charstart, charend)] = nil
else break end
end
end
end)
do
local blockalphabets = {
"🄰🄱🄲🄳🄴🄵🄶🄷🄸🄹🄺🄻🄼🄽🄾🄿🅀🅁🅂🅃🅄🅅🅆🅇🅈🅉",
"🅰🅱🅲🅳🅴🅵🅶🅷🅸🅹🅺🅻🅼🅽🅾🅿🆀🆁🆂🆃🆄🆅🆆🆇🆈🆉",
'ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ',
}
for _, characters in blockalphabets do
local start = 1
for i = 1, 26 do
local charstart, charend = string.find(characters, utf8.charpattern, start)
start = charend + 1
local real = string.char(64 + i)
confusables[string.sub(characters, charstart, charend)] = real
end
end
end
local pattern = "[%z\x01-\x7F\xC2-\xF4][\x80-\xBF]+"
-- Returns the same string but with all unicode characters resembling regular text converted into their equivalent text.
return function(str: string): string
if utf8.len(str) == string.len(str) then return str end -- same length -> no unicode characters
return string.gsub(str, pattern, confusables) :: string
end
- This converts almost all characters that resemble english into their equivalent, with the exception of cyrillic letters as that would make people who speak in languages using them unable to talk properly, do please tell me if there’s other languages that could be affected by this!
- Most characters resembling symbols are not converted except those resembling
\ / ( ) [ ] { }