Lempel-Ziv-Stac is a simple (and a bit exotic) compression algorithm,
used on embedded devices, for example for config files, for example on routers,
for example on those that expose the config file on the public internet. Just sayin’…
There is not a Python implementation of it, so here is my Lempel-Ziv-Stac decompression routine.
#!/usr/bin/env python
#-*- coding:utf-8 -*-
##############################################################
# Lempel-Ziv-Stac decompression
# BitReader and RingList classes
#
# Copyright (C) 2011 Filippo Valsorda - FiloSottile
# filosottile.wiki gmail.com - www.pytux.it
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
##############################################################
import collections
class BitReader:
"""
Gets a string or a iterable of chars (also mmap)
representing bytes (ord) and permits to extract
bits one by one like a stream
"""
def __init__(self, bytes):
self._bits = collections.deque()
for byte in bytes:
byte = ord(byte)
for n in xrange(8):
self._bits.append(bool((byte >> (7-n)) & 1))
def getBit(self):
return self._bits.popleft()
def getBits(self, num):
res = 0
for i in xrange(num):
res += self.getBit() << num-1-i
return res
def getByte(self):
return self.getBits(8)
def __len__(self):
return len(self._bits)
class RingList:
"""
When the list is full, for every item appended
the older is removed
"""
def __init__(self, length):
self.__data__ = collections.deque()
self.__full__ = False
self.__max__ = length
def append(self, x):
if self.__full__:
self.__data__.popleft()
self.__data__.append(x)
if self.size() == self.__max__:
self.__full__ = True
def get(self):
return self.__data__
def size(self):
return len(self.__data__)
def maxsize(self):
return self.__max__
def __getitem__(self, n):
if n >= self.size():
return None
return self.__data__[n]
def LZSDecompress(data, window = RingList(2048)):
"""
Gets a string or a iterable of chars (also mmap)
representing bytes (ord) and an optional
pre-populated dictionary; return the decompressed
string and the final dictionary
"""
reader = BitReader(data)
result = ''
while True:
bit = reader.getBit()
if not bit:
char = reader.getByte()
result += chr(char)
window.append(char)
else:
bit = reader.getBit()
if bit:
offset = reader.getBits(7)
if offset == 0:
# EOF
break
else:
offset = reader.getBits(11)
lenField = reader.getBits(2)
if lenField < 3:
lenght = lenField + 2
else:
lenField <<= 2
lenField += reader.getBits(2)
if lenField < 15:
lenght = (lenField & 0x0f) + 5
else:
lenCounter = 0
lenField = reader.getBits(4)
while lenField == 15:
lenField = reader.getBits(4)
lenCounter += 1
lenght = 15*lenCounter + 8 + lenField
for i in xrange(lenght):
char = window[-offset]
result += chr(char)
window.append(char)
return result, window