--[[ This is a recontruction of the Huffman Compression method included in LibCompress. The origional was creating too much garbage so I have included this modified algorythm. The compressed data is also escaped for safe transmits through the AddonMessage Channel. (Thanks to Stewarta). ]] -- includes local WIM = WIM; local _G = _G; local table = table; local pairs = pairs; local string = string; local type = type; local unpack = unpack; local math = math; local bit = bit; local tostring = tostring; local setmetatable = setmetatable; local rawset = rawset; local rawget = rawget; local next = next; local select = select; -- set namespace setfenv(1, WIM); -- clears all data found within the passed tables. local function clearTables(...) for i=1, select("#", ...) do local tbl = select(i, ...); if(type(tbl) == "table") then setmetatable(tbl, nil); for j=#tbl, 1, -1 do table.remove(tbl, j); end for k, v in pairs(tbl) do tbl[k] = nil; end end end end -- Table Recycling tblInUse = {}; tblAvailable = {}; local function getNewTable() if(#tblAvailable > 0) then local tbl = table.remove(tblAvailable, #tblAvailable); table.insert(tblInUse, tbl); return tbl; else local tbl = {}; table.insert(tblInUse, tbl); return tbl; end end local function freeTables() clearTables(unpack(tblInUse)); for i=#tblInUse, 1, -1 do table.insert(tblAvailable, table.remove(tblInUse, i)); end end -- thank you stewarta local function huff_encode(thestr) thestr=thestr:gsub("\001","\002\003"); thestr=thestr:gsub("%z","\002\004"); return thestr; end local function huff_decode(thestr) thestr=thestr:gsub("\002\004","\000"); thestr=thestr:gsub("\002\003","\001"); return thestr; end -------------------------------------------------------------------------------- -- Huffman codec -- implemented by Galmok of European Stormrage (Horde), galmok@gmail.com local function addCode(tree, bcode,len) if tree then tree.bcode = bcode; tree.blength = len; if tree.c1 then addCode(tree.c1, bit.bor(bcode, bit.lshift(1,len)), len+1); end if tree.c2 then addCode(tree.c2, bcode, len+1); end end end local function escape_code(code, len) local escaped_code = 0; local b; local l = 0; for i = len-1, 0,- 1 do b = bit.band( code, bit.lshift(1,i))==0 and 0 or 1; escaped_code = bit.lshift(escaped_code,1+b) + b; l = l + b; end return escaped_code, len+l; end local Huffman_compressed = {}; local Huffman_large_compressed = {}; local compressed_size = 0 local remainder; local remainder_length; local function addBits(tbl, code, len) remainder = remainder + bit.lshift(code, remainder_length); remainder_length = len + remainder_length; if remainder_length > 32 then return true; -- Bits lost due to too long code-words. end while remainder_length>=8 do compressed_size = compressed_size + 1; tbl[compressed_size] = string.char(bit.band(remainder, 255)); remainder = bit.rshift(remainder, 8); remainder_length = remainder_length -8; end end local hist = {}; local leafs = {}; local symbols = {}; local huff = {}; local function cleanCompress() compressed_size = 0; freeTables(); clearTables(Huffman_compressed, Huffman_large_compressed, hist, leafs, symbols, huff); end -- word size for this huffman algorithm is 8 bits (1 byte). This means the best compression is representing 1 byte with 1 bit, i.e. compress to 0.125 of original size. local function CompressHuffman(uncompressed) compressed_size = 0; if type(uncompressed) ~= "string" then cleanCompress(); return nil, "Can only compress strings"; end -- make histogram local n = 0; -- dont have to use all datat to make the histogram local uncompressed_size = string.len(uncompressed) local c; for i = 1, uncompressed_size do c = string.byte(uncompressed, i) hist[c] = (hist[c] or 0) + 1 end --Start with as many leaves as there are symbols. local leaf; for symbol, weight in pairs(hist) do leaf = getNewTable(); leaf.symbol=string.char(symbol); leaf.weight=weight; symbols[symbol] = leaf; table.insert(leafs, leaf); end --Enqueue all leaf nodes into the first queue (by probability in increasing order so that the least likely item is in the head of the queue). table.sort(leafs, function(a,b) if a.weightb.weight then return false else return nil end end) local nLeafs = #leafs --While there is more than one node in the queues: local l,h, li, hi, leaf1, leaf2 local newNode; while (#leafs+#huff > 1) do -- Dequeue the two nodes with the lowest weight. -- Dequeue first if not next(huff) then li, leaf1 = next(leafs) table.remove(leafs, li) elseif not next(leafs) then hi, leaf1 = next(huff) table.remove(huff, hi) else li, l = next(leafs); hi, h = next(huff); if l.weight<=h.weight then leaf1 = l; table.remove(leafs, li); else leaf1 = h; table.remove(huff, hi); end end -- Dequeue second if not next(huff) then li, leaf2 = next(leafs); table.remove(leafs, li); elseif not next(leafs) then hi, leaf2 = next(huff); table.remove(huff, hi); else li, l = next(leafs); hi, h = next(huff); if l.weight<=h.weight then leaf2 = l; table.remove(leafs, li); else leaf2 = h; table.remove(huff, hi); end end --Create a new internal node, with the two just-removed nodes as children (either node can be either child) and the sum of their weights as the new weight. newNode = getNewTable(); newNode.c1 = leaf1; newNode.c2 = leaf2; newNode.weight = leaf1.weight+leaf2.weight; table.insert(huff,newNode); end if #leafs>0 then li, l = next(leafs); table.insert(huff, l); table.remove(leafs, li); end local _huff = huff[1]; -- assign codes to each symbol -- c1 = "0", c2 = "1" -- As a common convention, bit '0' represents following the left child and bit '1' represents following the right child. -- c1 = left, c2 = right addCode(_huff,0,0); if _huff then _huff.bcode = 0 _huff.blength = 1 end -- WRITING remainder = 0; remainder_length = 0; local compressed = Huffman_compressed; --compressed_size = 0 -- first byte is version info. 0 = uncompressed, 1 = 8-bit word huffman compressed compressed[1] = "\003"; -- Header: byte 0=#leafs, byte 1-3=size of uncompressed data -- max 2^24 bytes l = string.len(uncompressed); compressed[2] = string.char(bit.band(nLeafs-1, 255)); -- number of leafs compressed[3] = string.char(bit.band(l, 255)); -- bit 0-7 compressed[4] = string.char(bit.band(bit.rshift(l, 8), 255)); -- bit 8-15 compressed[5] = string.char(bit.band(bit.rshift(l, 16), 255)); -- bit 16-23 compressed_size = 5; -- create symbol/code map for symbol, anotherLeaf in pairs(symbols) do addBits(compressed, symbol, 8); if addBits(compressed, escape_code(anotherLeaf.bcode, anotherLeaf.blength)) then -- code word too long. Needs new revision to be able to handle more than 32 bits cleanCompress(); return string_char(0)..uncompressed end addBits(compressed, 3, 2); end -- create huffman code local large_compressed = Huffman_large_compressed; local large_compressed_size = 0; local ulimit; for i = 1, l, 200 do ulimit = l<(i+199) and l or (i+199); for sub_i = i, ulimit do c = string.byte(uncompressed, sub_i); addBits(compressed, symbols[c].bcode, symbols[c].blength); end large_compressed_size = large_compressed_size + 1; large_compressed[large_compressed_size] = table.concat(compressed, "", 1, compressed_size); compressed_size = 0; end -- add remainding bits (if any) if remainder_length>0 then large_compressed_size = large_compressed_size + 1; large_compressed[large_compressed_size] = string.char(remainder); end local compressed_string = table.concat(large_compressed, "", 1, large_compressed_size); -- is compression worth it? If not, return uncompressed data. if (#uncompressed+1) <= #compressed_string then cleanCompress(); --return "\001"..uncompressed; return uncompressed; end cleanCompress(); return compressed_string; end -- lookuptable (cached between calls) local lshiftMask = {}; local lshiftMask_metamap = { __index = function (t, k) local v = bit.lshift(1, k); rawset(t, k, v); return v; end } -- lookuptable (cached between calls) local lshiftMinusOneMask = {}; local lshiftMinusOneMask_metamap = { __index = function (t, k) local v = bit.lshift(1, k)-1; rawset(t, k, v); return v; end }; local function getCode(bitfield, field_len) if field_len>=2 then local b; local p = 0; for i = 0, field_len-1 do b = bit.band(bitfield, lshiftMask[i]); if p ~= 0 and b ~= 0 then -- found 2 bits set right after each other (stop bits) return bit.band( bitfield, lshiftMinusOneMask[i-1]), i-1, bit.rshift(bitfield, i+1), field_len-i-1; end p = b; end end return nil; end local function unescape_code(code, code_len) local unescaped_code=0; local b; local l = 0; local i = 0 while i < code_len do b = bit.band( code, lshiftMask[i]); if b ~= 0 then unescaped_code = bit.bor(unescaped_code, lshiftMask[l]); i = i + 1; end i = i + 1; l = l + 1; end return unescaped_code, l; end local Huffman_uncompressed = {}; local Huffman_large_uncompressed = {}; -- will always be as big as the larges string ever decompressed. Bad, but clearing i every timetakes precious time. local map = {}; -- only table not reused in Huffman decode. local mt = {}; local function cleanDecompress() freeTables(); clearTables(Huffman_uncompressed, Huffman_large_uncompressed, map, lshiftMask, lshiftMinusOneMask); end local map_metatable1 = { __index = function (t, k) local v = getNewTable(); rawset(t, k, v) return v end }; local map_metatable2 = { __index = function (t, k) return mt end }; local function DecompressHuffman(compressed) setmetatable(lshiftMask, lshiftMask_metamap); setmetatable(lshiftMinusOneMask, lshiftMinusOneMask_metamap); if type(compressed) ~= "string" then cleanDecompress(); return nil, "Can only uncompress strings" end local compressed_size = string.len(compressed); --decode header local info_byte = string.byte(compressed) -- is data compressed if info_byte==1 then cleanDecompress(); return compressed:sub(2) --return uncompressed data end if info_byte ~=3 then cleanDecompress(); return compressed --, "Can only decompress Huffman compressed data ("..tostring(info_byte)..")" end local num_symbols = string.byte(string.sub(compressed, 2, 2)) + 1 local c0 = string.byte(string.sub(compressed, 3, 3)) local c1 = string.byte(string.sub(compressed, 4, 4)) local c2 = string.byte(string.sub(compressed, 5, 5)) local orig_size = c2*65536 + c1*256 + c0 if orig_size==0 then cleanDecompress(); return ""; end -- decode code->symbal map local bitfield = 0; local bitfield_len = 0; setmetatable(map, map_metatable1) local i = 6; -- byte 1-5 are header bytes local c, cl; local minCodeLen = 1000; local maxCodeLen = 0; local symbol, code, code_len, _bitfield, _bitfield_len; local n = 0; local state = 0; -- 0 = get symbol (8 bits), 1 = get code (varying bits, ends with 2 bits set) while ncompressed_size then cleanDecompress(); return nil, "Cannot decode map" end c = string.byte(compressed, i) bitfield = bit.bor(bitfield, bit.lshift(c, bitfield_len)) bitfield_len = bitfield_len + 8 if state == 0 then symbol = bit.band(bitfield, 255) bitfield = bit.rshift(bitfield, 8) bitfield_len = bitfield_len -8 state = 1 -- search for code now else code, code_len, _bitfield, _bitfield_len = getCode(bitfield, bitfield_len) if code then bitfield, bitfield_len = _bitfield, _bitfield_len c, cl = unescape_code(code, code_len) map[cl][c]=string.char(symbol) minCodeLen = math.min(minCodeLen, cl); --clmaxCodeLen and cl or maxCodeLen --print("symbol: "..string_char(symbol).." code: "..tobinary(c, cl)) n = n + 1 state = 0 -- search for next symbol (if any) end end i=i+1 end -- dont create new subtables for entries not in the map. Waste of space. -- But do return an empty table to prevent runtime errors. (instead of returning nil) setmetatable(map, map_metatable2); local uncompressed = Huffman_uncompressed local large_uncompressed = Huffman_large_uncompressed local uncompressed_size = 0 local large_uncompressed_size = 0 local test_code local test_code_len = minCodeLen; local dec_size = 0; compressed_size = compressed_size + 1 local temp_limit = 200; -- first limit of uncompressed data. large_uncompressed will hold strings of length 200 while true do if test_code_len<=bitfield_len then test_code=bit.band( bitfield, lshiftMinusOneMask[test_code_len]) symbol = map[test_code_len][test_code] if symbol then uncompressed_size = uncompressed_size + 1 uncompressed[uncompressed_size]=symbol dec_size = dec_size + 1 if dec_size>=orig_size then -- checked here for speed reasons break; end if dec_size >= temp_limit then -- process compressed bytes in smaller chunks large_uncompressed_size = large_uncompressed_size + 1 large_uncompressed[large_uncompressed_size] = table.concat(uncompressed, "", 1, uncompressed_size) uncompressed_size = 0 temp_limit = temp_limit + 200 -- repeated chunk size is 200 uncompressed bytes temp_limit = temp_limit > orig_size and orig_size or temp_limit end bitfield = bit.rshift(bitfield, test_code_len) bitfield_len = bitfield_len - test_code_len test_code_len = minCodeLen else test_code_len = test_code_len + 1 if test_code_len>maxCodeLen then cleanDecompress(); return nil, "Decompression error at "..tostring(i).."/"..tostring(#compressed) end end else c = string.byte(compressed, i) bitfield = bitfield + bit.lshift(c or 0, bitfield_len) bitfield_len = bitfield_len + 8 i = i + 1 if i > temp_limit then if i > compressed_size then break; end end end end local out = table.concat(large_uncompressed, "", 1, large_uncompressed_size)..table.concat(uncompressed, "", 1, uncompressed_size); cleanDecompress(); return out; end function Compress(uncompressed) if(not uncompressed) then return uncompressed; end local str, err = CompressHuffman(uncompressed); return huff_encode(str); end function Decompress(compressed) return DecompressHuffman(huff_decode(compressed)); end