How to optimize this math module?

Essentially this math module is intended for people who want to do math with more then 32 bits. I am not happy about the speed of it as it takes about a quarter of a second to do an operation. Here it is:

local math128 = {} --128 bit math library
self = math128
local bits = 128

assert(bits % 16 == 0, "Bits mult be divisable by 16")

local base65536 = {}
for i = 0, 65535 do
	base65536[bit32.lrotate(i,8)] = string.char(bit32.extract(i,8,8), bit32.extract(i,0,8))
	base65536[string.char(bit32.extract(i,8,8), bit32.extract(i,0,8))] = bit32.lrotate(i,8)
end

local hex = {}
for i = 0, 65535 do
	hex[bit32.lrotate(i,8)] = string.format("%X", i + 2^17):sub(2)
	hex[string.format("%X", i + 2^17):sub(2)] = bit32.lrotate(i,8)
end

Get16BitBinary = function(number)
	local resault = ""
	for i = 0, 15 do
		resault = bit32.extract(number,i,1) .. resault 
	end
	return resault
end

local binary = {}
for i = 0, 65535 do
	binary[bit32.lrotate(i,8)] = Get16BitBinary(i)
	binary[Get16BitBinary(i)] = bit32.lrotate(i,8)
end

function create_default_nk(oldnk)
	local nk = {
		underflow = false,
		overflow = false, --overflow is also carry
	}
	
	nk.lrotate = self.RotateLeft
	nk.rrotate = self.RotateRight
	
	nk.lshift = self.ShiftLeft
	nk.rshift = self.ShiftRight
	
	nk.band = self.BitAnd
	nk.bor  = self.BitOr
	nk.bxor = self.BitXor
	nk.bnot = self.BitNot
	
	nk.GetBitString = self.GetBitString
	
	nk.ExportBinary = self.ExportBinary
	nk.ExportHex = self.ExportHex
	nk.ExportASCII = self.ExportASCII
	nk.ExportNumber = self.ExportNumber
	nk.ExportDecimal = self.ExportDecimal
	nk.ExportCustom = self.ExportCustom
	
	local nkmetatable = {}

	nkmetatable.__add = self.Add
	nkmetatable.__sub = self.Subtract
	nkmetatable.__div = self.Divide
	nkmetatable.__mul = self.Multiply
	nkmetatable.__mod = self.Modulo
	nkmetatable.__pow = self.Power

	nkmetatable.__eq = self.EQ
	nkmetatable.__lt = self.LT
	nkmetatable.__le = self.LE
	
	setmetatable(nk, nkmetatable)
	
	if oldnk then
		for i = bits/16-1, 0, -1 do
			nk[i] = oldnk[i]
		end
	else
		for i = bits/16-1, 0, -1 do
			nk[i] = 0
		end
	end
	
	return nk
end

--some useful valuables, not used in actual module but may be useful for user in ExportCustom
self.Base64 = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-+"
self.Base94 = " !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~"

self.new = function(number)
	if type(number) == "table" then 
		return create_default_nk(number) 
	end
	
	number = tonumber(number)
	local nk = create_default_nk()
	
	nk[1] = bit32.lrotate(bit32.extract(number, 16,16),8)
	nk[0] = bit32.lrotate(bit32.extract(number, 0, 16),8)

	return nk
end


self.ExportNumber = function(nk)
	return bit32.lrotate(bit32.extract(nk[1] or 0 ,8,16),16) + bit32.extract(nk[0],8,16)
end

self.ExportBinary = function(nk)
	local hk = ""
	for i = bits/16-1, 0, -1 do
		hk = hk .. binary[nk[i]]
	end
	return hk
end

self.ExportHex = function(nk)
	local hk = ""
	for i = bits/16-1, 0, -1 do
		hk = hk .. hex[nk[i]]
	end
	return hk
end

self.ExportASCII = function(nk)
	local tk = ""
	for i = bits/16-1, 0, -1 do
		tk = tk .. base65536[nk[i]]
	end
	return tk
end

self.ExportDecimal = function(nk)
	nk = self.new(nk)
	local dk = ""
	
	repeat
		dk = (nk % 10):ExportNumber() .. dk
		nk /= 10
	until self.EQ(nk, 0)
	
	return dk
end

self.ExportCustom = function(nk, basecharacters)
	local base = basecharacters:split("")
	local ck = ""
	
	repeat
		ck = base[(nk % #base):ExportNumber()+1] .. ck
		nk /= #base
	until self.EQ(nk, 0)

	return ck
end


self.ImportNumber = self.new

self.ImportBinary = function(hk)
	local nk = create_default_nk()

	for i = 0, (bits/16-1)*8, 8 do
		nk[(bits/16-1)-i/8] = hex[hk:sub(i+1,i+8)]
	end

	return nk
end

self.ImportHex = function(hk)
	local nk = create_default_nk()
	
	for i = 0, (bits/16-1)*4, 4 do
		nk[(bits/16-1)-i/4] = hex[hk:sub(i+1,i+4)]
	end
	
	return nk
end

self.ImportASCII = function(tk)
	local nk = create_default_nk()
	
	for i = 0, (bits/16-1)*2, 2 do
		nk[(bits/16-1)-i/2] = base65536[tk:sub(i+1,i+2)]
	end

	return nk
end

self.ImportDecimal = function(dk)
	local nk = create_default_nk()
	
	for i in dk:gmatch(".") do
		nk += i
		nk *= 10
	end
	
	nk /= 10
	
	return nk
end

self.ImportCustom = function(ck, basecharacters)
	local base = basecharacters:split("")
	for i, v in ipairs(base) do base[v] = i end
	
	local nk = create_default_nk()

	for i in ck:gmatch(".") do
		nk += base[i] - 1
		nk *= #base
	end

	nk /= #base

	return nk
end


function checkoverflow(n)
	return bit32.extract(n,24,1) == 1
end

function checkunderflow(n)
	return bit32.extract(n,7,1) == 1
end

function setoverflow(n)
	return n + 2^24
end

function setunderflow(n)
	return n + 2^7
end

function getroot(n)
	return bit32.band(n, 0b00000000111111111111111100000000)
end

function first(n)
	return bit32.band(n, 0b100000000) == 0b100000000
end


self.RotateLeft = function(nk)
	nk = self.new(nk)

	nk.overflow = checkoverflow(bit32.lrotate(nk[(bits/16-1)],1))
	for i = 0, (bits/16-1) do
		if nk.overflow then
			nk[i] = setunderflow(nk[i])
		end
		
		nk[i] = bit32.lrotate(nk[i],1)
		nk.overflow = checkoverflow(nk[i])
		nk[i] = getroot(nk[i])
	end
	
	return nk
end

self.RotateRight = function(nk)
	nk = self.new(nk)
	
	nk.underflow = checkunderflow(bit32.rrotate(nk[0],1))
	for i = (bits/16-1), 0, -1 do
		if nk.underflow then
			nk[i] = setoverflow(nk[i])
		end
		
		nk[i] = bit32.rrotate(nk[i],1)
		nk.underflow = checkunderflow(nk[i])
		nk[i] = getroot(nk[i])
	end
	
	return nk
end

self.ShiftLeft = function(nk, overflow)
	nk = self.new(nk)
	
	nk.overflow = overflow
	for i = 0, (bits/16-1) do
		if nk.overflow then
			nk[i] = setunderflow(nk[i])
		end
		
		nk[i] = bit32.lrotate(nk[i],1)
		nk.overflow = checkoverflow(nk[i])
		nk[i] = getroot(nk[i])
	end

	return nk
end

self.ShiftRight = function(nk, underflow)
	nk = self.new(nk)
	
	nk.underflow = underflow
	for i = (bits/16-1), 0, -1 do
		if nk.underflow then
			nk[i] = setoverflow(nk[i])
		end
		
		nk[i] = bit32.rrotate(nk[i],1)
		nk.underflow = checkunderflow(nk[i])
		nk[i] = getroot(nk[i])
	end

	return nk
end


self.BitAnd = function(nk1, nk2)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)
	
	for i = 0, (bits/16-1) do
		nk1[i] = bit32.band(nk1[i], nk2[i])
	end
	
	return nk1
end

self.BitOr = function(nk1, nk2)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)

	for i = 0, (bits/16-1) do
		nk1[i] = bit32.bor(nk1[i], nk2[i])
	end

	return nk1
end

self.BitXor = function(nk1, nk2)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)

	for i = 0, (bits/16-1) do
		nk1[i] = bit32.bxor(nk1[i], nk2[i])
	end

	return nk1
end

self.BitNot = function(nk1, nk2)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)

	for i = 0, (bits/16-1) do
		nk1[i] = bit32.bnot(nk1[i], nk2[i])
	end

	return nk1
end

Get32BitBinary = function(number)
	local resault = ""
	for i = 0, 31 do
		resault = bit32.extract(number,i,1) .. resault 
	end
	return resault
end

self.Add = function(nk1, nk2)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)
	
	nk1.overflow = false
	for i = 0, (bits/16-1) do
		if nk1.overflow then
			nk1[i] += 2^8
		end
		
		nk1[i] += nk2[i]
		nk1.overflow = checkoverflow(nk1[i])
		nk1[i] = getroot(nk1[i])
	end
	
	return nk1
end

self.Subtract = function(nk1, nk2)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)
	
	nk1.overflow = true
	for i = 0, (bits/16-1) do
		nk1[i] = setoverflow(nk1[i])
		if not nk1.overflow then
			nk1[i] -= 2^8
		end
		
		nk1[i] -= nk2[i]
		nk1.overflow = checkoverflow(nk1[i])
		nk1[i] = getroot(nk1[i])
	end
	
	return nk1
end

self.Multiply = function(nk1, nk2)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)
	
	local resault = create_default_nk()
	
	for i = 1, bits do
		if first(nk2[0]) then
			resault = resault + nk1
		end
		nk1 = nk1:lshift()
		nk2 = nk2:rshift()
	end
	
	return resault
end

self.Divide = function(nk1, nk2)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)
	
	local division_register = create_default_nk()
	local subtraction_register = create_default_nk()
	subtraction_register.overflow = false
	
	for i = 1, bits do
		if subtraction_register.overflow then
			division_register = subtraction_register
		end
		
		nk1 = nk1:lshift(subtraction_register.overflow)
		division_register = division_register:lshift(nk1.overflow)
		
		subtraction_register = create_default_nk(division_register)
		subtraction_register -= nk2
	end
	
	nk1 = nk1:lshift(subtraction_register.overflow)
	return nk1
end

self.Modulo = function(nk1, nk2) --for some reason i couldn't get it out of the division register
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)
	
	return nk1 - nk2 * (nk1 / nk2)
end

self.Power = function(nk1, nk2)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)
	local resault = self.new(1)
	
	local exponent_first = false
	for i = 1, bits do
		nk2 = nk2:lrotate()

		resault *= resault
		if first(nk2[0]) then
			resault *= nk1
		end
	end
	
	return resault
end

self.PowerWithMod = function(nk1, nk2, nk3)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)
	nk3 = self.new(nk3)
	local resault = self.new(1)

	local exponent_first = false
	for i = 1, bits do
		nk2 = nk2:lrotate()

		resault *= resault
		if first(nk2[0]) then
			resault *= nk1
		end
		resault %= nk3
	end

	return resault
end


self.EQ = function(nk1, nk2)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)
	
	for i = (bits/16-1), 0, -1 do
		if nk1[i] ~= nk2[i] then
			return false
		end
	end
	return true
end

self.LT = function(nk1, nk2)
	nk1 = self.new(nk1)
	nk2 = self.new(nk2)
	
	for i = (bits/16-1), 0, -1 do
		if nk1[i] ~= nk2[i] then
			return nk1[i] < nk2[i]
		end
	end
	return false
end

self.LTE = function(nk1, nk2)
	return nk1 < nk2 or nk1 == nk2
end

self.GT = function(nk1, nk2)
	return nk1 > nk2
end

self.GTE = function(nk1, nk2)
	return nk1 >= nk2
end


self.GetBitString = function(nk)
	local resault = ""
	for p = 0, bits/16-1 do
		for i = 8, 23 do
			resault = bit32.extract(nk[p],i,1) .. resault 
		end
	end
	return resault
end

return self

If anyone has any questions about how it works feel free to ask, I just need it to work faster so any suggestions are enjoyed.

(also i’m bad at commenting stuff in fact there are like no comments in the module)

How fast does your computer to do operations with bits? Aim for that.

before even reviewing this im not even sure the use case of this
value of 2^32 is 4.3 billion
value of 2^128 is 18.4 quintillion

can’t you just work with smaller numbers and grand smack a “quintillion” string after it?
unless I completely misinterpreted the point of this module, and if thats the case apologies

The point of this module is to do operations with numbers 649258613960422130613513641289 * 16942571395138581369460262451 without running into floating point numbers and still get the exact expected resault of the calculation.