I wrote a text compression program which can be used to fit more data inside of a datastore key. It’s based on the LZW algorithm and uses the 127 different characters from \1 to \127 that datastores support.
edit: Some characters take up more than one character worth of space when encoded and stored in a datastore. Control characters mostly take up 6 characters each, and " and \ each take up 2 characters. This has been updated to avoid using such characters in the compressed output. 93 unique characters are used.
For an example benchmark, I gathered the sources of all of the default chat and camera scripts and concatenated them into one string. It was able to be compressed from 126590 characters to 62317, less than half the original size, in under 0.2 seconds. If you’re compressing data that’s repetitive and contains lots of common substrings then it will get a compression ratio much better than half.
I should warn that if you’re taking something that’s larger than what can be stored in a datastore, compressing it isn’t a guarantee that the result will be small enough to fit in a single key.
Source:
local dictionary, length = {}, 0
for i = 32, 127 do
if i ~= 34 and i ~= 92 then
local c = string.char(i)
dictionary[c], dictionary[length] = length, c
length = length + 1
end
end
local escapemap = {}
for i = 1, 34 do
i = ({34, 92, 127})[i-31] or i
local c, e = string.char(i), string.char(i + 31)
escapemap[c], escapemap[e] = e, c
end
local function escape(s)
return (s:gsub("[%c\"\\]", function(c)
return "\127"..escapemap[c]
end))
end
local function unescape(s)
return (s:gsub("\127(.)", function(c)
return escapemap[c]
end))
end
local function copy(t)
local new = {}
for k, v in pairs(t) do
new[k] = v
end
return new
end
local function tobase93(n)
local value = ""
repeat
local remainder = n%93
value = dictionary[remainder]..value
n = (n - remainder)/93
until n == 0
return value
end
local function tobase10(value)
local n = 0
for i = 1, #value do
n = n + 93^(i-1)*dictionary[value:sub(-i, -i)]
end
return n
end
local function compress(text)
local dictionary = copy(dictionary)
local key, sequence, size = "", {}, #dictionary
local width, spans, span = 1, {}, 0
local function listkey(key)
local value = tobase93(dictionary[key])
if #value > width then
width, span, spans[width] = #value, 0, span
end
sequence[#sequence+1] = (" "):rep(width - #value)..value
span = span + 1
end
text = escape(text)
for i = 1, #text do
local c = text:sub(i, i)
local new = key..c
if dictionary[new] then
key = new
else
listkey(key)
key, size = c, size+1
dictionary[new], dictionary[size] = size, new
end
end
listkey(key)
spans[width] = span
return table.concat(spans, ",").."|"..table.concat(sequence)
end
local function decompress(text)
local dictionary = copy(dictionary)
local sequence, spans, content = {}, text:match("(.-)|(.*)")
local groups, start = {}, 1
for span in spans:gmatch("%d+") do
local width = #groups+1
groups[width] = content:sub(start, start + span*width - 1)
start = start + span*width
end
local previous;
for width = 1, #groups do
for value in groups[width]:gmatch(('.'):rep(width)) do
local entry = dictionary[tobase10(value)]
if previous then
if entry then
sequence[#sequence+1] = entry
dictionary[#dictionary+1] = previous..entry:sub(1, 1)
else
entry = previous..previous:sub(1, 1)
sequence[#sequence+1] = entry
dictionary[#dictionary+1] = entry
end
else
sequence[1] = entry
end
previous = entry
end
end
return unescape(table.concat(sequence))
end
previous source
This code uses all 127 characters that datastores support, however, most control characters take up 6 characters worth of space when encoded, so the result is much bigger.
local dictionary = {}
for i = 1, 127 do
local c = string.char(i)
dictionary[c], dictionary[i-1] = i-1, c
end
local function copy(t)
local new = {}
for k, v in pairs(t) do
new[k] = v
end
return new
end
local function tobase127(n)
local value = ""
repeat
local remainder = n%127
value = dictionary[remainder]..value
n = (n - remainder)/127
until n == 0
return value
end
local function tobase10(value)
local n = 0
for i = 1, #value do
n = n + 127^(i-1)*dictionary[value:sub(-i, -i)]
end
return n
end
local function compress(text)
local dictionary = copy(dictionary)
local key, sequence, size = "", {}, #dictionary
local width, spans, span = 1, {}, 0
local function listkey(key)
local value = tobase127(dictionary[key])
if #value > width then
width, span, spans[width] = #value, 0, span
end
sequence[#sequence+1] = ("\1"):rep(width - #value)..value
span = span + 1
end
for i = 1, #text do
local c = text:sub(i, i)
local new = key..c
if dictionary[new] then
key = new
else
listkey(key)
key, size = c, size+1
dictionary[new], dictionary[size] = size, new
end
end
listkey(key)
spans[width] = span
return table.concat(spans, ",").."|"..table.concat(sequence)
end
local function decompress(text)
local dictionary = copy(dictionary)
local sequence, spans, content = {}, text:match("(.-)|(.*)")
local groups, start = {}, 1
for span in spans:gmatch("%d+") do
local width = #groups+1
groups[width] = content:sub(start, start + span*width - 1)
start = start + span*width
end
local previous;
for width = 1, #groups do
for value in groups[width]:gmatch(('.'):rep(width)) do
local entry = dictionary[tobase10(value)]
if previous then
if entry then
sequence[#sequence+1] = entry
dictionary[#dictionary+1] = previous..entry:sub(1, 1)
else
entry = previous..previous:sub(1, 1)
sequence[#sequence+1] = entry
dictionary[#dictionary+1] = entry
end
else
sequence[1] = entry
end
previous = entry
end
end
return table.concat(sequence)
end
The two functions compress(text)
and decompress(text)
are used to compress and decompress a string. If you want to use this as a module, you can add return {compress = compress, decompress = decompress}
to the end of it.