# PaCkAgE DaTaStReAm
xz 1 3766
# end of header
07070100099671000081a40000000000000000000000015174a54c000000ef000000b600010002ffffffffffffffff0000000b00000000xz/pkginfo PKG=xz
NAME=xz 5.0.4 i86pc Solaris 11
VERSION=5.0.4
PSTAMP=22nd April 2013
VENDOR=XZ Project
EMAIL=http://tukaani.org/xz/
DESC=Data compression software
ARCH=i386
CATEGORY=utility
CLASSES=none
BASEDIR=/
ISTATES=S s 1 2 3
RSTATES=S s 1 2 3
07070100099670000081a40000000000000000000000015174a54c00001ce6000000b600010002ffffffffffffffff0000000a00000000xz/pkgmap : 1 3766
1 d none /usr ? ? ?
1 d none /usr/local ? ? ?
1 d none /usr/local/bin 0755 root root
1 s none /usr/local/bin/lzcat=xz
1 s none /usr/local/bin/lzcmp=xzdiff
1 s none /usr/local/bin/lzdiff=xzdiff
1 s none /usr/local/bin/lzegrep=xzgrep
1 s none /usr/local/bin/lzfgrep=xzgrep
1 s none /usr/local/bin/lzgrep=xzgrep
1 s none /usr/local/bin/lzless=xzless
1 s none /usr/local/bin/lzma=xz
1 f none /usr/local/bin/lzmadec 0755 root root 15860 13554 1366598958
1 f none /usr/local/bin/lzmainfo 0755 root root 15640 64380 1366598959
1 s none /usr/local/bin/lzmore=xzmore
1 s none /usr/local/bin/unlzma=xz
1 s none /usr/local/bin/unxz=xz
1 f none /usr/local/bin/xz 0755 root root 112624 26681 1366598959
1 s none /usr/local/bin/xzcat=xz
1 s none /usr/local/bin/xzcmp=xzdiff
1 f none /usr/local/bin/xzdec 0755 root root 15796 9853 1366598958
1 f none /usr/local/bin/xzdiff 0755 root root 5265 39463 1366598959
1 s none /usr/local/bin/xzegrep=xzgrep
1 s none /usr/local/bin/xzfgrep=xzgrep
1 f none /usr/local/bin/xzgrep 0755 root root 5351 9832 1366598959
1 f none /usr/local/bin/xzless 0755 root root 1800 16509 1366598959
1 f none /usr/local/bin/xzmore 0755 root root 2161 39069 1366598959
1 d none /usr/local/include 0755 root root
1 d none /usr/local/include/lzma 0755 root root
1 f none /usr/local/include/lzma.h 0644 root root 9274 31367 1366598958
1 f none /usr/local/include/lzma/base.h 0644 root root 22174 53754 1366598958
1 f none /usr/local/include/lzma/bcj.h 0644 root root 2630 11762 1366598958
1 f none /usr/local/include/lzma/block.h 0644 root root 20078 61138 1366598958
1 f none /usr/local/include/lzma/check.h 0644 root root 4255 10592 1366598958
1 f none /usr/local/include/lzma/container.h 0644 root root 17396 53084 1366598958
1 f none /usr/local/include/lzma/delta.h 0644 root root 1865 20096 1366598958
1 f none /usr/local/include/lzma/filter.h 0644 root root 16396 61526 1366598958
1 f none /usr/local/include/lzma/hardware.h 0644 root root 2058 41658 1366598958
1 f none /usr/local/include/lzma/index.h 0644 root root 23200 47314 1366598958
1 f none /usr/local/include/lzma/index_hash.h 0644 root root 3902 63647 1366598958
1 f none /usr/local/include/lzma/lzma.h 0644 root root 14741 44256 1366598958
1 f none /usr/local/include/lzma/stream_flags.h 0644 root root 8253 5772 1366598958
1 f none /usr/local/include/lzma/version.h 0644 root root 3497 17096 1366598958
1 f none /usr/local/include/lzma/vli.h 0644 root root 6547 57528 1366598958
1 d none /usr/local/lib 0755 root root
1 f none /usr/local/lib/liblzma.a 0644 root root 529872 37731 1366598958
1 f none /usr/local/lib/liblzma.la 0755 root root 948 17722 1366598958
1 s none /usr/local/lib/liblzma.so=liblzma.so.5.0.4
1 s none /usr/local/lib/liblzma.so.5=liblzma.so.5.0.4
1 f none /usr/local/lib/liblzma.so.5.0.4 0755 root root 467540 15665 1366598958
1 d none /usr/local/lib/pkgconfig 0755 root root
1 f none /usr/local/lib/pkgconfig/liblzma.pc 0644 root root 424 37537 1366598958
1 d none /usr/local/share 0755 root root
1 d none /usr/local/share/doc 0755 root root
1 d none /usr/local/share/doc/xz 0755 root root
1 f none /usr/local/share/doc/xz/AUTHORS 0644 root root 1043 22516 1366598959
1 f none /usr/local/share/doc/xz/COPYING 0644 root root 2774 38083 1366598959
1 f none /usr/local/share/doc/xz/COPYING.GPLv2 0644 root root 17987 30406 1366598959
1 f none /usr/local/share/doc/xz/NEWS 0644 root root 6373 60263 1366598959
1 f none /usr/local/share/doc/xz/README 0644 root root 13527 7858 1366598959
1 f none /usr/local/share/doc/xz/THANKS 0644 root root 1843 19857 1366598959
1 f none /usr/local/share/doc/xz/TODO 0644 root root 2294 64967 1366598959
1 d none /usr/local/share/doc/xz/examples 0755 root root
1 f none /usr/local/share/doc/xz/examples/00_README.txt 0644 root root 851 64011 1366598959
1 f none /usr/local/share/doc/xz/examples/01_compress_easy.c 0644 root root 9534 58430 1366598959
1 f none /usr/local/share/doc/xz/examples/02_decompress.c 0644 root root 8912 62312 1366598959
1 f none /usr/local/share/doc/xz/examples/03_compress_custom.c 0644 root root 5017 8124 1366598959
1 f none /usr/local/share/doc/xz/examples/Makefile 0644 root root 314 23350 1366598959
1 d none /usr/local/share/doc/xz/examples_old 0755 root root
1 f none /usr/local/share/doc/xz/examples_old/xz_pipe_comp.c 0644 root root 3043 43628 1366598959
1 f none /usr/local/share/doc/xz/examples_old/xz_pipe_decomp.c 0644 root root 3130 52378 1366598959
1 f none /usr/local/share/doc/xz/faq.txt 0644 root root 9409 24766 1366598959
1 f none /usr/local/share/doc/xz/history.txt 0644 root root 7427 52939 1366598959
1 f none /usr/local/share/doc/xz/lzma-file-format.txt 0644 root root 5689 39338 1366598959
1 f none /usr/local/share/doc/xz/xz-file-format.txt 0644 root root 43305 16916 1366598959
1 d none /usr/local/share/locale 0755 root root
1 d none /usr/local/share/locale/cs 0755 root root
1 d none /usr/local/share/locale/cs/LC_MESSAGES 0755 root root
1 f none /usr/local/share/locale/cs/LC_MESSAGES/xz.mo 0644 root root 21945 54807 1366598959
1 d none /usr/local/share/locale/de 0755 root root
1 d none /usr/local/share/locale/de/LC_MESSAGES 0755 root root
1 f none /usr/local/share/locale/de/LC_MESSAGES/xz.mo 0644 root root 22197 6167 1366598959
1 d none /usr/local/share/locale/fr 0755 root root
1 d none /usr/local/share/locale/fr/LC_MESSAGES 0755 root root
1 f none /usr/local/share/locale/fr/LC_MESSAGES/xz.mo 0644 root root 22435 44957 1366598959
1 d none /usr/local/share/locale/it 0755 root root
1 d none /usr/local/share/locale/it/LC_MESSAGES 0755 root root
1 f none /usr/local/share/locale/it/LC_MESSAGES/xz.mo 0644 root root 22174 56372 1366598959
1 d none /usr/local/share/locale/pl 0755 root root
1 d none /usr/local/share/locale/pl/LC_MESSAGES 0755 root root
1 f none /usr/local/share/locale/pl/LC_MESSAGES/xz.mo 0644 root root 21850 38651 1366598959
1 d none /usr/local/share/man 0755 root root
1 d none /usr/local/share/man/man1 0755 root root
1 s none /usr/local/share/man/man1/lzcat.1=xz.1
1 s none /usr/local/share/man/man1/lzcmp.1=xzdiff.1
1 s none /usr/local/share/man/man1/lzdiff.1=xzdiff.1
1 s none /usr/local/share/man/man1/lzegrep.1=xzgrep.1
1 s none /usr/local/share/man/man1/lzfgrep.1=xzgrep.1
1 s none /usr/local/share/man/man1/lzgrep.1=xzgrep.1
1 s none /usr/local/share/man/man1/lzless.1=xzless.1
1 s none /usr/local/share/man/man1/lzma.1=xz.1
1 s none /usr/local/share/man/man1/lzmadec.1=xzdec.1
1 f none /usr/local/share/man/man1/lzmainfo.1 0644 root root 1250 39717 1366598959
1 s none /usr/local/share/man/man1/lzmore.1=xzmore.1
1 s none /usr/local/share/man/man1/unlzma.1=xz.1
1 s none /usr/local/share/man/man1/unxz.1=xz.1
1 f none /usr/local/share/man/man1/xz.1 0644 root root 59193 50023 1366598959
1 s none /usr/local/share/man/man1/xzcat.1=xz.1
1 s none /usr/local/share/man/man1/xzcmp.1=xzdiff.1
1 f none /usr/local/share/man/man1/xzdec.1 0644 root root 2837 43521 1366598959
1 f none /usr/local/share/man/man1/xzdiff.1 0644 root root 1441 47726 1366598959
1 s none /usr/local/share/man/man1/xzegrep.1=xzgrep.1
1 s none /usr/local/share/man/man1/xzfgrep.1=xzgrep.1
1 f none /usr/local/share/man/man1/xzgrep.1 0644 root root 1446 46514 1366598959
1 f none /usr/local/share/man/man1/xzless.1 0644 root root 1360 46087 1366598959
1 f none /usr/local/share/man/man1/xzmore.1 0644 root root 1167 30313 1366598959
1 i checkinstall 790 2505 1366599166
1 i pkginfo 239 18016 1366598988
07070100000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000b00000000TRAILER!!! 07070100099671000081a40000000000000000000000015174a54c000000ef000000b600010002ffffffffffffffff0000000800000000pkginfo PKG=xz
NAME=xz 5.0.4 i86pc Solaris 11
VERSION=5.0.4
PSTAMP=22nd April 2013
VENDOR=XZ Project
EMAIL=http://tukaani.org/xz/
DESC=Data compression software
ARCH=i386
CATEGORY=utility
CLASSES=none
BASEDIR=/
ISTATES=S s 1 2 3
RSTATES=S s 1 2 3
07070100099670000081a40000000000000000000000015174a54c00001ce6000000b600010002ffffffffffffffff0000000700000000pkgmap : 1 3766
1 d none /usr ? ? ?
1 d none /usr/local ? ? ?
1 d none /usr/local/bin 0755 root root
1 s none /usr/local/bin/lzcat=xz
1 s none /usr/local/bin/lzcmp=xzdiff
1 s none /usr/local/bin/lzdiff=xzdiff
1 s none /usr/local/bin/lzegrep=xzgrep
1 s none /usr/local/bin/lzfgrep=xzgrep
1 s none /usr/local/bin/lzgrep=xzgrep
1 s none /usr/local/bin/lzless=xzless
1 s none /usr/local/bin/lzma=xz
1 f none /usr/local/bin/lzmadec 0755 root root 15860 13554 1366598958
1 f none /usr/local/bin/lzmainfo 0755 root root 15640 64380 1366598959
1 s none /usr/local/bin/lzmore=xzmore
1 s none /usr/local/bin/unlzma=xz
1 s none /usr/local/bin/unxz=xz
1 f none /usr/local/bin/xz 0755 root root 112624 26681 1366598959
1 s none /usr/local/bin/xzcat=xz
1 s none /usr/local/bin/xzcmp=xzdiff
1 f none /usr/local/bin/xzdec 0755 root root 15796 9853 1366598958
1 f none /usr/local/bin/xzdiff 0755 root root 5265 39463 1366598959
1 s none /usr/local/bin/xzegrep=xzgrep
1 s none /usr/local/bin/xzfgrep=xzgrep
1 f none /usr/local/bin/xzgrep 0755 root root 5351 9832 1366598959
1 f none /usr/local/bin/xzless 0755 root root 1800 16509 1366598959
1 f none /usr/local/bin/xzmore 0755 root root 2161 39069 1366598959
1 d none /usr/local/include 0755 root root
1 d none /usr/local/include/lzma 0755 root root
1 f none /usr/local/include/lzma.h 0644 root root 9274 31367 1366598958
1 f none /usr/local/include/lzma/base.h 0644 root root 22174 53754 1366598958
1 f none /usr/local/include/lzma/bcj.h 0644 root root 2630 11762 1366598958
1 f none /usr/local/include/lzma/block.h 0644 root root 20078 61138 1366598958
1 f none /usr/local/include/lzma/check.h 0644 root root 4255 10592 1366598958
1 f none /usr/local/include/lzma/container.h 0644 root root 17396 53084 1366598958
1 f none /usr/local/include/lzma/delta.h 0644 root root 1865 20096 1366598958
1 f none /usr/local/include/lzma/filter.h 0644 root root 16396 61526 1366598958
1 f none /usr/local/include/lzma/hardware.h 0644 root root 2058 41658 1366598958
1 f none /usr/local/include/lzma/index.h 0644 root root 23200 47314 1366598958
1 f none /usr/local/include/lzma/index_hash.h 0644 root root 3902 63647 1366598958
1 f none /usr/local/include/lzma/lzma.h 0644 root root 14741 44256 1366598958
1 f none /usr/local/include/lzma/stream_flags.h 0644 root root 8253 5772 1366598958
1 f none /usr/local/include/lzma/version.h 0644 root root 3497 17096 1366598958
1 f none /usr/local/include/lzma/vli.h 0644 root root 6547 57528 1366598958
1 d none /usr/local/lib 0755 root root
1 f none /usr/local/lib/liblzma.a 0644 root root 529872 37731 1366598958
1 f none /usr/local/lib/liblzma.la 0755 root root 948 17722 1366598958
1 s none /usr/local/lib/liblzma.so=liblzma.so.5.0.4
1 s none /usr/local/lib/liblzma.so.5=liblzma.so.5.0.4
1 f none /usr/local/lib/liblzma.so.5.0.4 0755 root root 467540 15665 1366598958
1 d none /usr/local/lib/pkgconfig 0755 root root
1 f none /usr/local/lib/pkgconfig/liblzma.pc 0644 root root 424 37537 1366598958
1 d none /usr/local/share 0755 root root
1 d none /usr/local/share/doc 0755 root root
1 d none /usr/local/share/doc/xz 0755 root root
1 f none /usr/local/share/doc/xz/AUTHORS 0644 root root 1043 22516 1366598959
1 f none /usr/local/share/doc/xz/COPYING 0644 root root 2774 38083 1366598959
1 f none /usr/local/share/doc/xz/COPYING.GPLv2 0644 root root 17987 30406 1366598959
1 f none /usr/local/share/doc/xz/NEWS 0644 root root 6373 60263 1366598959
1 f none /usr/local/share/doc/xz/README 0644 root root 13527 7858 1366598959
1 f none /usr/local/share/doc/xz/THANKS 0644 root root 1843 19857 1366598959
1 f none /usr/local/share/doc/xz/TODO 0644 root root 2294 64967 1366598959
1 d none /usr/local/share/doc/xz/examples 0755 root root
1 f none /usr/local/share/doc/xz/examples/00_README.txt 0644 root root 851 64011 1366598959
1 f none /usr/local/share/doc/xz/examples/01_compress_easy.c 0644 root root 9534 58430 1366598959
1 f none /usr/local/share/doc/xz/examples/02_decompress.c 0644 root root 8912 62312 1366598959
1 f none /usr/local/share/doc/xz/examples/03_compress_custom.c 0644 root root 5017 8124 1366598959
1 f none /usr/local/share/doc/xz/examples/Makefile 0644 root root 314 23350 1366598959
1 d none /usr/local/share/doc/xz/examples_old 0755 root root
1 f none /usr/local/share/doc/xz/examples_old/xz_pipe_comp.c 0644 root root 3043 43628 1366598959
1 f none /usr/local/share/doc/xz/examples_old/xz_pipe_decomp.c 0644 root root 3130 52378 1366598959
1 f none /usr/local/share/doc/xz/faq.txt 0644 root root 9409 24766 1366598959
1 f none /usr/local/share/doc/xz/history.txt 0644 root root 7427 52939 1366598959
1 f none /usr/local/share/doc/xz/lzma-file-format.txt 0644 root root 5689 39338 1366598959
1 f none /usr/local/share/doc/xz/xz-file-format.txt 0644 root root 43305 16916 1366598959
1 d none /usr/local/share/locale 0755 root root
1 d none /usr/local/share/locale/cs 0755 root root
1 d none /usr/local/share/locale/cs/LC_MESSAGES 0755 root root
1 f none /usr/local/share/locale/cs/LC_MESSAGES/xz.mo 0644 root root 21945 54807 1366598959
1 d none /usr/local/share/locale/de 0755 root root
1 d none /usr/local/share/locale/de/LC_MESSAGES 0755 root root
1 f none /usr/local/share/locale/de/LC_MESSAGES/xz.mo 0644 root root 22197 6167 1366598959
1 d none /usr/local/share/locale/fr 0755 root root
1 d none /usr/local/share/locale/fr/LC_MESSAGES 0755 root root
1 f none /usr/local/share/locale/fr/LC_MESSAGES/xz.mo 0644 root root 22435 44957 1366598959
1 d none /usr/local/share/locale/it 0755 root root
1 d none /usr/local/share/locale/it/LC_MESSAGES 0755 root root
1 f none /usr/local/share/locale/it/LC_MESSAGES/xz.mo 0644 root root 22174 56372 1366598959
1 d none /usr/local/share/locale/pl 0755 root root
1 d none /usr/local/share/locale/pl/LC_MESSAGES 0755 root root
1 f none /usr/local/share/locale/pl/LC_MESSAGES/xz.mo 0644 root root 21850 38651 1366598959
1 d none /usr/local/share/man 0755 root root
1 d none /usr/local/share/man/man1 0755 root root
1 s none /usr/local/share/man/man1/lzcat.1=xz.1
1 s none /usr/local/share/man/man1/lzcmp.1=xzdiff.1
1 s none /usr/local/share/man/man1/lzdiff.1=xzdiff.1
1 s none /usr/local/share/man/man1/lzegrep.1=xzgrep.1
1 s none /usr/local/share/man/man1/lzfgrep.1=xzgrep.1
1 s none /usr/local/share/man/man1/lzgrep.1=xzgrep.1
1 s none /usr/local/share/man/man1/lzless.1=xzless.1
1 s none /usr/local/share/man/man1/lzma.1=xz.1
1 s none /usr/local/share/man/man1/lzmadec.1=xzdec.1
1 f none /usr/local/share/man/man1/lzmainfo.1 0644 root root 1250 39717 1366598959
1 s none /usr/local/share/man/man1/lzmore.1=xzmore.1
1 s none /usr/local/share/man/man1/unlzma.1=xz.1
1 s none /usr/local/share/man/man1/unxz.1=xz.1
1 f none /usr/local/share/man/man1/xz.1 0644 root root 59193 50023 1366598959
1 s none /usr/local/share/man/man1/xzcat.1=xz.1
1 s none /usr/local/share/man/man1/xzcmp.1=xzdiff.1
1 f none /usr/local/share/man/man1/xzdec.1 0644 root root 2837 43521 1366598959
1 f none /usr/local/share/man/man1/xzdiff.1 0644 root root 1441 47726 1366598959
1 s none /usr/local/share/man/man1/xzegrep.1=xzgrep.1
1 s none /usr/local/share/man/man1/xzfgrep.1=xzgrep.1
1 f none /usr/local/share/man/man1/xzgrep.1 0644 root root 1446 46514 1366598959
1 f none /usr/local/share/man/man1/xzless.1 0644 root root 1360 46087 1366598959
1 f none /usr/local/share/man/man1/xzmore.1 0644 root root 1167 30313 1366598959
1 i checkinstall 790 2505 1366599166
1 i pkginfo 239 18016 1366598988
070701000996c5000041ed0000000000000000000000025174a54c00000000000000b600010002ffffffffffffffff0000000800000000install 070701000996c6000081ed0000000000000000000000015174a5fe00000316000000b600010002ffffffffffffffff0000001500000000install/checkinstall #!/bin/sh
#
expected_bits="64"
expected_release="5.11"
expected_platform="i386"
#
release=`uname -r`
platform=`uname -p`
bits=`isainfo -b`
#
if [ ${platform} != ${expected_platform} ]; then
echo "\n\n\n\tThis package must be installed on a ${expected_platform} architecture\n"
echo "\tAborting installation.\n\n\n"
exit 1
fi
if [ ${release} != ${expected_release} ]; then
echo "\n\n\n\tThis package must be installed on a ${expected_release} machine\n"
echo "\tAborting installation.\n\n\n"
exit 1
fi
#if [ ${bits} != ${expected_bits} ]; then
# echo "\n\n\n\tThis package must be installed on a ${expected_bits} bit machine\n"
# echo "\tYour machine is running a ${bits} bit O.S. currently\n"
# echo "\tAborting installation.\n\n\n"
# exit 1
#fi
exit 0
07070100099672000041ed0000000000000000000000035174a54c00000000000000b600010002ffffffffffffffff0000000500000000root 07070100099673000041ed0000000000000000000000035174a54c00000000000000b600010002ffffffffffffffff0000000900000000root/usr 07070100099674000041ed0000000000000000000000065174a54c00000000000000b600010002ffffffffffffffff0000000f00000000root/usr/local 07070100099695000041ed0000000000000000000000055174a54c00000000000000b600010002ffffffffffffffff0000001500000000root/usr/local/share 070701000996bc000041ed0000000000000000000000035174a54c00000000000000b600010002ffffffffffffffff0000001900000000root/usr/local/share/man 070701000996bd000041ed0000000000000000000000025174a54c00000000000000b600010002ffffffffffffffff0000001e00000000root/usr/local/share/man/man1 070701000996be000081a40000000000000000000000015174a52f000004e2000000b600010002ffffffffffffffff0000002900000000root/usr/local/share/man/man1/lzmainfo.1 .\"
.\" Author: Lasse Collin
.\"
.\" This file has been put into the public domain.
.\" You can do whatever you want with this file.
.\"
.TH LZMAINFO 1 "2010-09-27" "Tukaani" "XZ Utils"
.SH NAME
lzmainfo \- show information stored in the .lzma file header
.SH SYNOPSIS
.B lzmainfo
.RB [ \-\-help ]
.RB [ \-\-version ]
.RI [ file ]...
.SH DESCRIPTION
.B lzmainfo
shows information stored in the
.B .lzma
file header.
It reads the first 13 bytes from the specified
.IR file ,
decodes the header, and prints it to standard output in human
readable format.
If no
.I files
are given or
.I file
is
.BR \- ,
standard input is read.
.PP
Usually the most interesting information is
the uncompressed size and the dictionary size.
Uncompressed size can be shown only if
the file is in the non-streamed
.B .lzma
format variant.
The amount of memory required to decompress the file is
a few dozen kilobytes plus the dictionary size.
.PP
.B lzmainfo
is included in XZ Utils primarily for
backward compatibility with LZMA Utils.
.SH "EXIT STATUS"
.TP
.B 0
All is good.
.TP
.B 1
An error occurred.
.SH BUGS
.B lzmainfo
uses
.B MB
while the correct suffix would be
.B MiB
(2^20 bytes).
This is to keep the output compatible with LZMA Utils.
.SH "SEE ALSO"
.BR xz (1)
070701000996c3000081a40000000000000000000000015174a52f00000550000000b600010002ffffffffffffffff0000002700000000root/usr/local/share/man/man1/xzless.1 .\"
.\" Authors: Andrew Dudman
.\" Lasse Collin
.\"
.\" This file has been put into the public domain.
.\" You can do whatever you want with this file.
.\"
.\" (Note that this file is not based on gzip's zless.1.)
.\"
.TH XZLESS 1 "2010-09-27" "Tukaani" "XZ Utils"
.SH NAME
xzless, lzless \- view xz or lzma compressed (text) files
.SH SYNOPSIS
.B xzless
.RI [ file ...]
.br
.B lzless
.RI [ file ...]
.SH DESCRIPTION
.B xzless
is a filter that displays text from compressed files to a terminal.
It works on files compressed with
.BR xz (1)
or
.BR lzma (1).
If no
.I files
are given,
.B xzless
reads from standard input.
.PP
.B xzless
uses
.BR less (1)
to present its output.
Unlike
.BR xzmore ,
its choice of pager cannot be altered by
setting an environment variable.
Commands are based on both
.BR more (1)
and
.BR vi (1)
and allow back and forth movement and searching.
See the
.BR less (1)
manual for more information.
.PP
The command named
.B lzless
is provided for backward compatibility with LZMA Utils.
.SH ENVIRONMENT
.TP
.B LESSMETACHARS
A list of characters special to the shell.
Set by
.B xzless
unless it is already set in the environment.
.TP
.B LESSOPEN
Set to a command line to invoke the
.BR xz (1)
decompressor for preprocessing the input files to
.BR less (1).
.SH "SEE ALSO"
.BR less (1),
.BR xz (1),
.BR xzmore (1),
.BR zless (1)
070701000996bf000081a40000000000000000000000015174a52f0000e739000000b600010002ffffffffffffffff0000002300000000root/usr/local/share/man/man1/xz.1 '\" t
.\"
.\" Author: Lasse Collin
.\"
.\" This file has been put into the public domain.
.\" You can do whatever you want with this file.
.\"
.TH XZ 1 "2012-05-27" "Tukaani" "XZ Utils"
.
.SH NAME
xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
.
.SH SYNOPSIS
.B xz
.RI [ option ]...
.RI [ file ]...
.PP
.B unxz
is equivalent to
.BR "xz \-\-decompress" .
.br
.B xzcat
is equivalent to
.BR "xz \-\-decompress \-\-stdout" .
.br
.B lzma
is equivalent to
.BR "xz \-\-format=lzma" .
.br
.B unlzma
is equivalent to
.BR "xz \-\-format=lzma \-\-decompress" .
.br
.B lzcat
is equivalent to
.BR "xz \-\-format=lzma \-\-decompress \-\-stdout" .
.PP
When writing scripts that need to decompress files,
it is recommended to always use the name
.B xz
with appropriate arguments
.RB ( "xz \-d"
or
.BR "xz \-dc" )
instead of the names
.B unxz
and
.BR xzcat .
.
.SH DESCRIPTION
.B xz
is a general-purpose data compression tool with
command line syntax similar to
.BR gzip (1)
and
.BR bzip2 (1).
The native file format is the
.B .xz
format, but the legacy
.B .lzma
format used by LZMA Utils and
raw compressed streams with no container format headers
are also supported.
.PP
.B xz
compresses or decompresses each
.I file
according to the selected operation mode.
If no
.I files
are given or
.I file
is
.BR \- ,
.B xz
reads from standard input and writes the processed data
to standard output.
.B xz
will refuse (display an error and skip the
.IR file )
to write compressed data to standard output if it is a terminal.
Similarly,
.B xz
will refuse to read compressed data
from standard input if it is a terminal.
.PP
Unless
.B \-\-stdout
is specified,
.I files
other than
.B \-
are written to a new file whose name is derived from the source
.I file
name:
.IP \(bu 3
When compressing, the suffix of the target file format
.RB ( .xz
or
.BR .lzma )
is appended to the source filename to get the target filename.
.IP \(bu 3
When decompressing, the
.B .xz
or
.B .lzma
suffix is removed from the filename to get the target filename.
.B xz
also recognizes the suffixes
.B .txz
and
.BR .tlz ,
and replaces them with the
.B .tar
suffix.
.PP
If the target file already exists, an error is displayed and the
.I file
is skipped.
.PP
Unless writing to standard output,
.B xz
will display a warning and skip the
.I file
if any of the following applies:
.IP \(bu 3
.I File
is not a regular file.
Symbolic links are not followed,
and thus they are not considered to be regular files.
.IP \(bu 3
.I File
has more than one hard link.
.IP \(bu 3
.I File
has setuid, setgid, or sticky bit set.
.IP \(bu 3
The operation mode is set to compress and the
.I file
already has a suffix of the target file format
.RB ( .xz
or
.B .txz
when compressing to the
.B .xz
format, and
.B .lzma
or
.B .tlz
when compressing to the
.B .lzma
format).
.IP \(bu 3
The operation mode is set to decompress and the
.I file
doesn't have a suffix of any of the supported file formats
.RB ( .xz ,
.BR .txz ,
.BR .lzma ,
or
.BR .tlz ).
.PP
After successfully compressing or decompressing the
.IR file ,
.B xz
copies the owner, group, permissions, access time,
and modification time from the source
.I file
to the target file.
If copying the group fails, the permissions are modified
so that the target file doesn't become accessible to users
who didn't have permission to access the source
.IR file .
.B xz
doesn't support copying other metadata like access control lists
or extended attributes yet.
.PP
Once the target file has been successfully closed, the source
.I file
is removed unless
.B \-\-keep
was specified.
The source
.I file
is never removed if the output is written to standard output.
.PP
Sending
.B SIGINFO
or
.B SIGUSR1
to the
.B xz
process makes it print progress information to standard error.
This has only limited use since when standard error
is a terminal, using
.B \-\-verbose
will display an automatically updating progress indicator.
.
.SS "Memory usage"
The memory usage of
.B xz
varies from a few hundred kilobytes to several gigabytes
depending on the compression settings.
The settings used when compressing a file determine
the memory requirements of the decompressor.
Typically the decompressor needs 5\ % to 20\ % of
the amount of memory that the compressor needed when
creating the file.
For example, decompressing a file created with
.B xz \-9
currently requires 65\ MiB of memory.
Still, it is possible to have
.B .xz
files that require several gigabytes of memory to decompress.
.PP
Especially users of older systems may find
the possibility of very large memory usage annoying.
To prevent uncomfortable surprises,
.B xz
has a built-in memory usage limiter, which is disabled by default.
While some operating systems provide ways to limit
the memory usage of processes, relying on it
wasn't deemed to be flexible enough (e.g. using
.BR ulimit (1)
to limit virtual memory tends to cripple
.BR mmap (2)).
.PP
The memory usage limiter can be enabled with
the command line option \fB\-\-memlimit=\fIlimit\fR.
Often it is more convenient to enable the limiter
by default by setting the environment variable
.BR XZ_DEFAULTS ,
e.g.\&
.BR XZ_DEFAULTS=\-\-memlimit=150MiB .
It is possible to set the limits separately
for compression and decompression
by using \fB\-\-memlimit\-compress=\fIlimit\fR and
\fB\-\-memlimit\-decompress=\fIlimit\fR.
Using these two options outside
.B XZ_DEFAULTS
is rarely useful because a single run of
.B xz
cannot do both compression and decompression and
.BI \-\-memlimit= limit
(or \fB\-M\fR \fIlimit\fR)
is shorter to type on the command line.
.PP
If the specified memory usage limit is exceeded when decompressing,
.B xz
will display an error and decompressing the file will fail.
If the limit is exceeded when compressing,
.B xz
will try to scale the settings down so that the limit
is no longer exceeded (except when using \fB\-\-format=raw\fR
or \fB\-\-no\-adjust\fR).
This way the operation won't fail unless the limit is very small.
The scaling of the settings is done in steps that don't
match the compression level presets, e.g. if the limit is
only slightly less than the amount required for
.BR "xz \-9" ,
the settings will be scaled down only a little,
not all the way down to
.BR "xz \-8" .
.
.SS "Concatenation and padding with .xz files"
It is possible to concatenate
.B .xz
files as is.
.B xz
will decompress such files as if they were a single
.B .xz
file.
.PP
It is possible to insert padding between the concatenated parts
or after the last part.
The padding must consist of null bytes and the size
of the padding must be a multiple of four bytes.
This can be useful e.g. if the
.B .xz
file is stored on a medium that measures file sizes
in 512-byte blocks.
.PP
Concatenation and padding are not allowed with
.B .lzma
files or raw streams.
.
.SH OPTIONS
.
.SS "Integer suffixes and special values"
In most places where an integer argument is expected,
an optional suffix is supported to easily indicate large integers.
There must be no space between the integer and the suffix.
.TP
.B KiB
Multiply the integer by 1,024 (2^10).
.BR Ki ,
.BR k ,
.BR kB ,
.BR K ,
and
.B KB
are accepted as synonyms for
.BR KiB .
.TP
.B MiB
Multiply the integer by 1,048,576 (2^20).
.BR Mi ,
.BR m ,
.BR M ,
and
.B MB
are accepted as synonyms for
.BR MiB .
.TP
.B GiB
Multiply the integer by 1,073,741,824 (2^30).
.BR Gi ,
.BR g ,
.BR G ,
and
.B GB
are accepted as synonyms for
.BR GiB .
.PP
The special value
.B max
can be used to indicate the maximum integer value
supported by the option.
.
.SS "Operation mode"
If multiple operation mode options are given,
the last one takes effect.
.TP
.BR \-z ", " \-\-compress
Compress.
This is the default operation mode when no operation mode option
is specified and no other operation mode is implied from
the command name (for example,
.B unxz
implies
.BR \-\-decompress ).
.TP
.BR \-d ", " \-\-decompress ", " \-\-uncompress
Decompress.
.TP
.BR \-t ", " \-\-test
Test the integrity of compressed
.IR files .
This option is equivalent to
.B "\-\-decompress \-\-stdout"
except that the decompressed data is discarded instead of being
written to standard output.
No files are created or removed.
.TP
.BR \-l ", " \-\-list
Print information about compressed
.IR files .
No uncompressed output is produced,
and no files are created or removed.
In list mode, the program cannot read
the compressed data from standard
input or from other unseekable sources.
.IP ""
The default listing shows basic information about
.IR files ,
one file per line.
To get more detailed information, use also the
.B \-\-verbose
option.
For even more information, use
.B \-\-verbose
twice, but note that this may be slow, because getting all the extra
information requires many seeks.
The width of verbose output exceeds
80 characters, so piping the output to e.g.\&
.B "less\ \-S"
may be convenient if the terminal isn't wide enough.
.IP ""
The exact output may vary between
.B xz
versions and different locales.
For machine-readable output,
.B \-\-robot \-\-list
should be used.
.
.SS "Operation modifiers"
.TP
.BR \-k ", " \-\-keep
Don't delete the input files.
.TP
.BR \-f ", " \-\-force
This option has several effects:
.RS
.IP \(bu 3
If the target file already exists,
delete it before compressing or decompressing.
.IP \(bu 3
Compress or decompress even if the input is
a symbolic link to a regular file,
has more than one hard link,
or has the setuid, setgid, or sticky bit set.
The setuid, setgid, and sticky bits are not copied
to the target file.
.IP \(bu 3
When used with
.B \-\-decompress
.BR \-\-stdout
and
.B xz
cannot recognize the type of the source file,
copy the source file as is to standard output.
This allows
.B xzcat
.B \-\-force
to be used like
.BR cat (1)
for files that have not been compressed with
.BR xz .
Note that in future,
.B xz
might support new compressed file formats, which may make
.B xz
decompress more types of files instead of copying them as is to
standard output.
.BI \-\-format= format
can be used to restrict
.B xz
to decompress only a single file format.
.RE
.TP
.BR \-c ", " \-\-stdout ", " \-\-to\-stdout
Write the compressed or decompressed data to
standard output instead of a file.
This implies
.BR \-\-keep .
.TP
.B \-\-no\-sparse
Disable creation of sparse files.
By default, if decompressing into a regular file,
.B xz
tries to make the file sparse if the decompressed data contains
long sequences of binary zeros.
It also works when writing to standard output
as long as standard output is connected to a regular file
and certain additional conditions are met to make it safe.
Creating sparse files may save disk space and speed up
the decompression by reducing the amount of disk I/O.
.TP
\fB\-S\fR \fI.suf\fR, \fB\-\-suffix=\fI.suf
When compressing, use
.I .suf
as the suffix for the target file instead of
.B .xz
or
.BR .lzma .
If not writing to standard output and
the source file already has the suffix
.IR .suf ,
a warning is displayed and the file is skipped.
.IP ""
When decompressing, recognize files with the suffix
.I .suf
in addition to files with the
.BR .xz ,
.BR .txz ,
.BR .lzma ,
or
.B .tlz
suffix.
If the source file has the suffix
.IR .suf ,
the suffix is removed to get the target filename.
.IP ""
When compressing or decompressing raw streams
.RB ( \-\-format=raw ),
the suffix must always be specified unless
writing to standard output,
because there is no default suffix for raw streams.
.TP
\fB\-\-files\fR[\fB=\fIfile\fR]
Read the filenames to process from
.IR file ;
if
.I file
is omitted, filenames are read from standard input.
Filenames must be terminated with the newline character.
A dash
.RB ( \- )
is taken as a regular filename; it doesn't mean standard input.
If filenames are given also as command line arguments, they are
processed before the filenames read from
.IR file .
.TP
\fB\-\-files0\fR[\fB=\fIfile\fR]
This is identical to \fB\-\-files\fR[\fB=\fIfile\fR] except
that each filename must be terminated with the null character.
.
.SS "Basic file format and compression options"
.TP
\fB\-F\fR \fIformat\fR, \fB\-\-format=\fIformat
Specify the file
.I format
to compress or decompress:
.RS
.TP
.B auto
This is the default.
When compressing,
.B auto
is equivalent to
.BR xz .
When decompressing,
the format of the input file is automatically detected.
Note that raw streams (created with
.BR \-\-format=raw )
cannot be auto-detected.
.TP
.B xz
Compress to the
.B .xz
file format, or accept only
.B .xz
files when decompressing.
.TP
.BR lzma ", " alone
Compress to the legacy
.B .lzma
file format, or accept only
.B .lzma
files when decompressing.
The alternative name
.B alone
is provided for backwards compatibility with LZMA Utils.
.TP
.B raw
Compress or uncompress a raw stream (no headers).
This is meant for advanced users only.
To decode raw streams, you need use
.B \-\-format=raw
and explicitly specify the filter chain,
which normally would have been stored in the container headers.
.RE
.TP
\fB\-C\fR \fIcheck\fR, \fB\-\-check=\fIcheck
Specify the type of the integrity check.
The check is calculated from the uncompressed data and
stored in the
.B .xz
file.
This option has an effect only when compressing into the
.B .xz
format; the
.B .lzma
format doesn't support integrity checks.
The integrity check (if any) is verified when the
.B .xz
file is decompressed.
.IP ""
Supported
.I check
types:
.RS
.TP
.B none
Don't calculate an integrity check at all.
This is usually a bad idea.
This can be useful when integrity of the data is verified
by other means anyway.
.TP
.B crc32
Calculate CRC32 using the polynomial from IEEE-802.3 (Ethernet).
.TP
.B crc64
Calculate CRC64 using the polynomial from ECMA-182.
This is the default, since it is slightly better than CRC32
at detecting damaged files and the speed difference is negligible.
.TP
.B sha256
Calculate SHA-256.
This is somewhat slower than CRC32 and CRC64.
.RE
.IP ""
Integrity of the
.B .xz
headers is always verified with CRC32.
It is not possible to change or disable it.
.TP
.BR \-0 " ... " \-9
Select a compression preset level.
The default is
.BR \-6 .
If multiple preset levels are specified,
the last one takes effect.
If a custom filter chain was already specified, setting
a compression preset level clears the custom filter chain.
.IP ""
The differences between the presets are more significant than with
.BR gzip (1)
and
.BR bzip2 (1).
The selected compression settings determine
the memory requirements of the decompressor,
thus using a too high preset level might make it painful
to decompress the file on an old system with little RAM.
Specifically,
.B "it's not a good idea to blindly use \-9 for everything"
like it often is with
.BR gzip (1)
and
.BR bzip2 (1).
.RS
.TP
.BR "\-0" " ... " "\-3"
These are somewhat fast presets.
.B \-0
is sometimes faster than
.B "gzip \-9"
while compressing much better.
The higher ones often have speed comparable to
.BR bzip2 (1)
with comparable or better compression ratio,
although the results
depend a lot on the type of data being compressed.
.TP
.BR "\-4" " ... " "\-6"
Good to very good compression while keeping
decompressor memory usage reasonable even for old systems.
.B \-6
is the default, which is usually a good choice
e.g. for distributing files that need to be decompressible
even on systems with only 16\ MiB RAM.
.RB ( \-5e
or
.B \-6e
may be worth considering too.
See
.BR \-\-extreme .)
.TP
.B "\-7 ... \-9"
These are like
.B \-6
but with higher compressor and decompressor memory requirements.
These are useful only when compressing files bigger than
8\ MiB, 16\ MiB, and 32\ MiB, respectively.
.RE
.IP ""
On the same hardware, the decompression speed is approximately
a constant number of bytes of compressed data per second.
In other words, the better the compression,
the faster the decompression will usually be.
This also means that the amount of uncompressed output
produced per second can vary a lot.
.IP ""
The following table summarises the features of the presets:
.RS
.RS
.PP
.TS
tab(;);
c c c c c
n n n n n.
Preset;DictSize;CompCPU;CompMem;DecMem
\-0;256 KiB;0;3 MiB;1 MiB
\-1;1 MiB;1;9 MiB;2 MiB
\-2;2 MiB;2;17 MiB;3 MiB
\-3;4 MiB;3;32 MiB;5 MiB
\-4;4 MiB;4;48 MiB;5 MiB
\-5;8 MiB;5;94 MiB;9 MiB
\-6;8 MiB;6;94 MiB;9 MiB
\-7;16 MiB;6;186 MiB;17 MiB
\-8;32 MiB;6;370 MiB;33 MiB
\-9;64 MiB;6;674 MiB;65 MiB
.TE
.RE
.RE
.IP ""
Column descriptions:
.RS
.IP \(bu 3
DictSize is the LZMA2 dictionary size.
It is waste of memory to use a dictionary bigger than
the size of the uncompressed file.
This is why it is good to avoid using the presets
.BR \-7 " ... " \-9
when there's no real need for them.
At
.B \-6
and lower, the amount of memory wasted is
usually low enough to not matter.
.IP \(bu 3
CompCPU is a simplified representation of the LZMA2 settings
that affect compression speed.
The dictionary size affects speed too,
so while CompCPU is the same for levels
.BR \-6 " ... " \-9 ,
higher levels still tend to be a little slower.
To get even slower and thus possibly better compression, see
.BR \-\-extreme .
.IP \(bu 3
CompMem contains the compressor memory requirements
in the single-threaded mode.
It may vary slightly between
.B xz
versions.
Memory requirements of some of the future multithreaded modes may
be dramatically higher than that of the single-threaded mode.
.IP \(bu 3
DecMem contains the decompressor memory requirements.
That is, the compression settings determine
the memory requirements of the decompressor.
The exact decompressor memory usage is slighly more than
the LZMA2 dictionary size, but the values in the table
have been rounded up to the next full MiB.
.RE
.TP
.BR \-e ", " \-\-extreme
Use a slower variant of the selected compression preset level
.RB ( \-0 " ... " \-9 )
to hopefully get a little bit better compression ratio,
but with bad luck this can also make it worse.
Decompressor memory usage is not affected,
but compressor memory usage increases a little at preset levels
.BR \-0 " ... " \-3 .
.IP ""
Since there are two presets with dictionary sizes
4\ MiB and 8\ MiB, the presets
.B \-3e
and
.B \-5e
use slightly faster settings (lower CompCPU) than
.B \-4e
and
.BR \-6e ,
respectively.
That way no two presets are identical.
.RS
.RS
.PP
.TS
tab(;);
c c c c c
n n n n n.
Preset;DictSize;CompCPU;CompMem;DecMem
\-0e;256 KiB;8;4 MiB;1 MiB
\-1e;1 MiB;8;13 MiB;2 MiB
\-2e;2 MiB;8;25 MiB;3 MiB
\-3e;4 MiB;7;48 MiB;5 MiB
\-4e;4 MiB;8;48 MiB;5 MiB
\-5e;8 MiB;7;94 MiB;9 MiB
\-6e;8 MiB;8;94 MiB;9 MiB
\-7e;16 MiB;8;186 MiB;17 MiB
\-8e;32 MiB;8;370 MiB;33 MiB
\-9e;64 MiB;8;674 MiB;65 MiB
.TE
.RE
.RE
.IP ""
For example, there are a total of four presets that use
8\ MiB dictionary, whose order from the fastest to the slowest is
.BR \-5 ,
.BR \-6 ,
.BR \-5e ,
and
.BR \-6e .
.TP
.B \-\-fast
.PD 0
.TP
.B \-\-best
.PD
These are somewhat misleading aliases for
.B \-0
and
.BR \-9 ,
respectively.
These are provided only for backwards compatibility
with LZMA Utils.
Avoid using these options.
.TP
.BI \-\-memlimit\-compress= limit
Set a memory usage limit for compression.
If this option is specified multiple times,
the last one takes effect.
.IP ""
If the compression settings exceed the
.IR limit ,
.B xz
will adjust the settings downwards so that
the limit is no longer exceeded and display a notice that
automatic adjustment was done.
Such adjustments are not made when compressing with
.B \-\-format=raw
or if
.B \-\-no\-adjust
has been specified.
In those cases, an error is displayed and
.B xz
will exit with exit status 1.
.IP ""
The
.I limit
can be specified in multiple ways:
.RS
.IP \(bu 3
The
.I limit
can be an absolute value in bytes.
Using an integer suffix like
.B MiB
can be useful.
Example:
.B "\-\-memlimit\-compress=80MiB"
.IP \(bu 3
The
.I limit
can be specified as a percentage of total physical memory (RAM).
This can be useful especially when setting the
.B XZ_DEFAULTS
environment variable in a shell initialization script
that is shared between different computers.
That way the limit is automatically bigger
on systems with more memory.
Example:
.B "\-\-memlimit\-compress=70%"
.IP \(bu 3
The
.I limit
can be reset back to its default value by setting it to
.BR 0 .
This is currently equivalent to setting the
.I limit
to
.B max
(no memory usage limit).
Once multithreading support has been implemented,
there may be a difference between
.B 0
and
.B max
for the multithreaded case, so it is recommended to use
.B 0
instead of
.B max
until the details have been decided.
.RE
.IP ""
See also the section
.BR "Memory usage" .
.TP
.BI \-\-memlimit\-decompress= limit
Set a memory usage limit for decompression.
This also affects the
.B \-\-list
mode.
If the operation is not possible without exceeding the
.IR limit ,
.B xz
will display an error and decompressing the file will fail.
See
.BI \-\-memlimit\-compress= limit
for possible ways to specify the
.IR limit .
.TP
\fB\-M\fR \fIlimit\fR, \fB\-\-memlimit=\fIlimit\fR, \fB\-\-memory=\fIlimit
This is equivalent to specifying \fB\-\-memlimit\-compress=\fIlimit
\fB\-\-memlimit\-decompress=\fIlimit\fR.
.TP
.B \-\-no\-adjust
Display an error and exit if the compression settings exceed
the memory usage limit.
The default is to adjust the settings downwards so
that the memory usage limit is not exceeded.
Automatic adjusting is always disabled when creating raw streams
.RB ( \-\-format=raw ).
.TP
\fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads
Specify the number of worker threads to use.
The actual number of threads can be less than
.I threads
if using more threads would exceed the memory usage limit.
.IP ""
.B "Multithreaded compression and decompression are not"
.B "implemented yet, so this option has no effect for now."
.IP ""
.B "As of writing (2010-09-27), it hasn't been decided"
.B "if threads will be used by default on multicore systems"
.B "once support for threading has been implemented."
.B "Comments are welcome."
The complicating factor is that using many threads
will increase the memory usage dramatically.
Note that if multithreading will be the default,
it will probably be done so that single-threaded and
multithreaded modes produce the same output,
so compression ratio won't be significantly affected
if threading will be enabled by default.
.
.SS "Custom compressor filter chains"
A custom filter chain allows specifying
the compression settings in detail instead of relying on
the settings associated to the preset levels.
When a custom filter chain is specified,
the compression preset level options
(\fB\-0\fR ... \fB\-9\fR and \fB\-\-extreme\fR) are
silently ignored.
.PP
A filter chain is comparable to piping on the command line.
When compressing, the uncompressed input goes to the first filter,
whose output goes to the next filter (if any).
The output of the last filter gets written to the compressed file.
The maximum number of filters in the chain is four,
but typically a filter chain has only one or two filters.
.PP
Many filters have limitations on where they can be
in the filter chain:
some filters can work only as the last filter in the chain,
some only as a non-last filter, and some work in any position
in the chain.
Depending on the filter, this limitation is either inherent to
the filter design or exists to prevent security issues.
.PP
A custom filter chain is specified by using one or more
filter options in the order they are wanted in the filter chain.
That is, the order of filter options is significant!
When decoding raw streams
.RB ( \-\-format=raw ),
the filter chain is specified in the same order as
it was specified when compressing.
.PP
Filters take filter-specific
.I options
as a comma-separated list.
Extra commas in
.I options
are ignored.
Every option has a default value, so you need to
specify only those you want to change.
.TP
\fB\-\-lzma1\fR[\fB=\fIoptions\fR]
.PD 0
.TP
\fB\-\-lzma2\fR[\fB=\fIoptions\fR]
.PD
Add LZMA1 or LZMA2 filter to the filter chain.
These filters can be used only as the last filter in the chain.
.IP ""
LZMA1 is a legacy filter,
which is supported almost solely due to the legacy
.B .lzma
file format, which supports only LZMA1.
LZMA2 is an updated
version of LZMA1 to fix some practical issues of LZMA1.
The
.B .xz
format uses LZMA2 and doesn't support LZMA1 at all.
Compression speed and ratios of LZMA1 and LZMA2
are practically the same.
.IP ""
LZMA1 and LZMA2 share the same set of
.IR options :
.RS
.TP
.BI preset= preset
Reset all LZMA1 or LZMA2
.I options
to
.IR preset .
.I Preset
consist of an integer, which may be followed by single-letter
preset modifiers.
The integer can be from
.B 0
to
.BR 9 ,
matching the command line options \fB\-0\fR ... \fB\-9\fR.
The only supported modifier is currently
.BR e ,
which matches
.BR \-\-extreme .
The default
.I preset
is
.BR 6 ,
from which the default values for the rest of the LZMA1 or LZMA2
.I options
are taken.
.TP
.BI dict= size
Dictionary (history buffer)
.I size
indicates how many bytes of the recently processed
uncompressed data is kept in memory.
The algorithm tries to find repeating byte sequences (matches) in
the uncompressed data, and replace them with references
to the data currently in the dictionary.
The bigger the dictionary, the higher is the chance
to find a match.
Thus, increasing dictionary
.I size
usually improves compression ratio, but
a dictionary bigger than the uncompressed file is waste of memory.
.IP ""
Typical dictionary
.I size
is from 64\ KiB to 64\ MiB.
The minimum is 4\ KiB.
The maximum for compression is currently 1.5\ GiB (1536\ MiB).
The decompressor already supports dictionaries up to
one byte less than 4\ GiB, which is the maximum for
the LZMA1 and LZMA2 stream formats.
.IP ""
Dictionary
.I size
and match finder
.RI ( mf )
together determine the memory usage of the LZMA1 or LZMA2 encoder.
The same (or bigger) dictionary
.I size
is required for decompressing that was used when compressing,
thus the memory usage of the decoder is determined
by the dictionary size used when compressing.
The
.B .xz
headers store the dictionary
.I size
either as
.RI "2^" n
or
.RI "2^" n " + 2^(" n "\-1),"
so these
.I sizes
are somewhat preferred for compression.
Other
.I sizes
will get rounded up when stored in the
.B .xz
headers.
.TP
.BI lc= lc
Specify the number of literal context bits.
The minimum is 0 and the maximum is 4; the default is 3.
In addition, the sum of
.I lc
and
.I lp
must not exceed 4.
.IP ""
All bytes that cannot be encoded as matches
are encoded as literals.
That is, literals are simply 8-bit bytes
that are encoded one at a time.
.IP ""
The literal coding makes an assumption that the highest
.I lc
bits of the previous uncompressed byte correlate
with the next byte.
E.g. in typical English text, an upper-case letter is
often followed by a lower-case letter, and a lower-case
letter is usually followed by another lower-case letter.
In the US-ASCII character set, the highest three bits are 010
for upper-case letters and 011 for lower-case letters.
When
.I lc
is at least 3, the literal coding can take advantage of
this property in the uncompressed data.
.IP ""
The default value (3) is usually good.
If you want maximum compression, test
.BR lc=4 .
Sometimes it helps a little, and
sometimes it makes compression worse.
If it makes it worse, test e.g.\&
.B lc=2
too.
.TP
.BI lp= lp
Specify the number of literal position bits.
The minimum is 0 and the maximum is 4; the default is 0.
.IP ""
.I Lp
affects what kind of alignment in the uncompressed data is
assumed when encoding literals.
See
.I pb
below for more information about alignment.
.TP
.BI pb= pb
Specify the number of position bits.
The minimum is 0 and the maximum is 4; the default is 2.
.IP ""
.I Pb
affects what kind of alignment in the uncompressed data is
assumed in general.
The default means four-byte alignment
.RI (2^ pb =2^2=4),
which is often a good choice when there's no better guess.
.IP ""
When the aligment is known, setting
.I pb
accordingly may reduce the file size a little.
E.g. with text files having one-byte
alignment (US-ASCII, ISO-8859-*, UTF-8), setting
.B pb=0
can improve compression slightly.
For UTF-16 text,
.B pb=1
is a good choice.
If the alignment is an odd number like 3 bytes,
.B pb=0
might be the best choice.
.IP ""
Even though the assumed alignment can be adjusted with
.I pb
and
.IR lp ,
LZMA1 and LZMA2 still slightly favor 16-byte alignment.
It might be worth taking into account when designing file formats
that are likely to be often compressed with LZMA1 or LZMA2.
.TP
.BI mf= mf
Match finder has a major effect on encoder speed,
memory usage, and compression ratio.
Usually Hash Chain match finders are faster than Binary Tree
match finders.
The default depends on the
.IR preset :
0 uses
.BR hc3 ,
1\-3
use
.BR hc4 ,
and the rest use
.BR bt4 .
.IP ""
The following match finders are supported.
The memory usage formulas below are rough approximations,
which are closest to the reality when
.I dict
is a power of two.
.RS
.TP
.B hc3
Hash Chain with 2- and 3-byte hashing
.br
Minimum value for
.IR nice :
3
.br
Memory usage:
.br
.I dict
* 7.5 (if
.I dict
<= 16 MiB);
.br
.I dict
* 5.5 + 64 MiB (if
.I dict
> 16 MiB)
.TP
.B hc4
Hash Chain with 2-, 3-, and 4-byte hashing
.br
Minimum value for
.IR nice :
4
.br
Memory usage:
.br
.I dict
* 7.5 (if
.I dict
<= 32 MiB);
.br
.I dict
* 6.5 (if
.I dict
> 32 MiB)
.TP
.B bt2
Binary Tree with 2-byte hashing
.br
Minimum value for
.IR nice :
2
.br
Memory usage:
.I dict
* 9.5
.TP
.B bt3
Binary Tree with 2- and 3-byte hashing
.br
Minimum value for
.IR nice :
3
.br
Memory usage:
.br
.I dict
* 11.5 (if
.I dict
<= 16 MiB);
.br
.I dict
* 9.5 + 64 MiB (if
.I dict
> 16 MiB)
.TP
.B bt4
Binary Tree with 2-, 3-, and 4-byte hashing
.br
Minimum value for
.IR nice :
4
.br
Memory usage:
.br
.I dict
* 11.5 (if
.I dict
<= 32 MiB);
.br
.I dict
* 10.5 (if
.I dict
> 32 MiB)
.RE
.TP
.BI mode= mode
Compression
.I mode
specifies the method to analyze
the data produced by the match finder.
Supported
.I modes
are
.B fast
and
.BR normal .
The default is
.B fast
for
.I presets
0\-3 and
.B normal
for
.I presets
4\-9.
.IP ""
Usually
.B fast
is used with Hash Chain match finders and
.B normal
with Binary Tree match finders.
This is also what the
.I presets
do.
.TP
.BI nice= nice
Specify what is considered to be a nice length for a match.
Once a match of at least
.I nice
bytes is found, the algorithm stops
looking for possibly better matches.
.IP ""
.I Nice
can be 2\-273 bytes.
Higher values tend to give better compression ratio
at the expense of speed.
The default depends on the
.IR preset .
.TP
.BI depth= depth
Specify the maximum search depth in the match finder.
The default is the special value of 0,
which makes the compressor determine a reasonable
.I depth
from
.I mf
and
.IR nice .
.IP ""
Reasonable
.I depth
for Hash Chains is 4\-100 and 16\-1000 for Binary Trees.
Using very high values for
.I depth
can make the encoder extremely slow with some files.
Avoid setting the
.I depth
over 1000 unless you are prepared to interrupt
the compression in case it is taking far too long.
.RE
.IP ""
When decoding raw streams
.RB ( \-\-format=raw ),
LZMA2 needs only the dictionary
.IR size .
LZMA1 needs also
.IR lc ,
.IR lp ,
and
.IR pb .
.TP
\fB\-\-x86\fR[\fB=\fIoptions\fR]
.PD 0
.TP
\fB\-\-powerpc\fR[\fB=\fIoptions\fR]
.TP
\fB\-\-ia64\fR[\fB=\fIoptions\fR]
.TP
\fB\-\-arm\fR[\fB=\fIoptions\fR]
.TP
\fB\-\-armthumb\fR[\fB=\fIoptions\fR]
.TP
\fB\-\-sparc\fR[\fB=\fIoptions\fR]
.PD
Add a branch/call/jump (BCJ) filter to the filter chain.
These filters can be used only as a non-last filter
in the filter chain.
.IP ""
A BCJ filter converts relative addresses in
the machine code to their absolute counterparts.
This doesn't change the size of the data,
but it increases redundancy,
which can help LZMA2 to produce 0\-15\ % smaller
.B .xz
file.
The BCJ filters are always reversible,
so using a BCJ filter for wrong type of data
doesn't cause any data loss, although it may make
the compression ratio slightly worse.
.IP ""
It is fine to apply a BCJ filter on a whole executable;
there's no need to apply it only on the executable section.
Applying a BCJ filter on an archive that contains both executable
and non-executable files may or may not give good results,
so it generally isn't good to blindly apply a BCJ filter when
compressing binary packages for distribution.
.IP ""
These BCJ filters are very fast and
use insignificant amount of memory.
If a BCJ filter improves compression ratio of a file,
it can improve decompression speed at the same time.
This is because, on the same hardware,
the decompression speed of LZMA2 is roughly
a fixed number of bytes of compressed data per second.
.IP ""
These BCJ filters have known problems related to
the compression ratio:
.RS
.IP \(bu 3
Some types of files containing executable code
(e.g. object files, static libraries, and Linux kernel modules)
have the addresses in the instructions filled with filler values.
These BCJ filters will still do the address conversion,
which will make the compression worse with these files.
.IP \(bu 3
Applying a BCJ filter on an archive containing multiple similar
executables can make the compression ratio worse than not using
a BCJ filter.
This is because the BCJ filter doesn't detect the boundaries
of the executable files, and doesn't reset
the address conversion counter for each executable.
.RE
.IP ""
Both of the above problems will be fixed
in the future in a new filter.
The old BCJ filters will still be useful in embedded systems,
because the decoder of the new filter will be bigger
and use more memory.
.IP ""
Different instruction sets have have different alignment:
.RS
.RS
.PP
.TS
tab(;);
l n l
l n l.
Filter;Alignment;Notes
x86;1;32-bit or 64-bit x86
PowerPC;4;Big endian only
ARM;4;Little endian only
ARM-Thumb;2;Little endian only
IA-64;16;Big or little endian
SPARC;4;Big or little endian
.TE
.RE
.RE
.IP ""
Since the BCJ-filtered data is usually compressed with LZMA2,
the compression ratio may be improved slightly if
the LZMA2 options are set to match the
alignment of the selected BCJ filter.
For example, with the IA-64 filter, it's good to set
.B pb=4
with LZMA2 (2^4=16).
The x86 filter is an exception;
it's usually good to stick to LZMA2's default
four-byte alignment when compressing x86 executables.
.IP ""
All BCJ filters support the same
.IR options :
.RS
.TP
.BI start= offset
Specify the start
.I offset
that is used when converting between relative
and absolute addresses.
The
.I offset
must be a multiple of the alignment of the filter
(see the table above).
The default is zero.
In practice, the default is good; specifying a custom
.I offset
is almost never useful.
.RE
.TP
\fB\-\-delta\fR[\fB=\fIoptions\fR]
Add the Delta filter to the filter chain.
The Delta filter can be only used as a non-last filter
in the filter chain.
.IP ""
Currently only simple byte-wise delta calculation is supported.
It can be useful when compressing e.g. uncompressed bitmap images
or uncompressed PCM audio.
However, special purpose algorithms may give significantly better
results than Delta + LZMA2.
This is true especially with audio,
which compresses faster and better e.g. with
.BR flac (1).
.IP ""
Supported
.IR options :
.RS
.TP
.BI dist= distance
Specify the
.I distance
of the delta calculation in bytes.
.I distance
must be 1\-256.
The default is 1.
.IP ""
For example, with
.B dist=2
and eight-byte input A1 B1 A2 B3 A3 B5 A4 B7, the output will be
A1 B1 01 02 01 02 01 02.
.RE
.
.SS "Other options"
.TP
.BR \-q ", " \-\-quiet
Suppress warnings and notices.
Specify this twice to suppress errors too.
This option has no effect on the exit status.
That is, even if a warning was suppressed,
the exit status to indicate a warning is still used.
.TP
.BR \-v ", " \-\-verbose
Be verbose.
If standard error is connected to a terminal,
.B xz
will display a progress indicator.
Specifying
.B \-\-verbose
twice will give even more verbose output.
.IP ""
The progress indicator shows the following information:
.RS
.IP \(bu 3
Completion percentage is shown
if the size of the input file is known.
That is, the percentage cannot be shown in pipes.
.IP \(bu 3
Amount of compressed data produced (compressing)
or consumed (decompressing).
.IP \(bu 3
Amount of uncompressed data consumed (compressing)
or produced (decompressing).
.IP \(bu 3
Compression ratio, which is calculated by dividing
the amount of compressed data processed so far by
the amount of uncompressed data processed so far.
.IP \(bu 3
Compression or decompression speed.
This is measured as the amount of uncompressed data consumed
(compression) or produced (decompression) per second.
It is shown after a few seconds have passed since
.B xz
started processing the file.
.IP \(bu 3
Elapsed time in the format M:SS or H:MM:SS.
.IP \(bu 3
Estimated remaining time is shown
only when the size of the input file is
known and a couple of seconds have already passed since
.B xz
started processing the file.
The time is shown in a less precise format which
never has any colons, e.g. 2 min 30 s.
.RE
.IP ""
When standard error is not a terminal,
.B \-\-verbose
will make
.B xz
print the filename, compressed size, uncompressed size,
compression ratio, and possibly also the speed and elapsed time
on a single line to standard error after compressing or
decompressing the file.
The speed and elapsed time are included only when
the operation took at least a few seconds.
If the operation didn't finish, e.g. due to user interruption,
also the completion percentage is printed
if the size of the input file is known.
.TP
.BR \-Q ", " \-\-no\-warn
Don't set the exit status to 2
even if a condition worth a warning was detected.
This option doesn't affect the verbosity level, thus both
.B \-\-quiet
and
.B \-\-no\-warn
have to be used to not display warnings and
to not alter the exit status.
.TP
.B \-\-robot
Print messages in a machine-parsable format.
This is intended to ease writing frontends that want to use
.B xz
instead of liblzma, which may be the case with various scripts.
The output with this option enabled is meant to be stable across
.B xz
releases.
See the section
.B "ROBOT MODE"
for details.
.TP
.BR \-\-info\-memory
Display, in human-readable format, how much physical memory (RAM)
.B xz
thinks the system has and the memory usage limits for compression
and decompression, and exit successfully.
.TP
.BR \-h ", " \-\-help
Display a help message describing the most commonly used options,
and exit successfully.
.TP
.BR \-H ", " \-\-long\-help
Display a help message describing all features of
.BR xz ,
and exit successfully
.TP
.BR \-V ", " \-\-version
Display the version number of
.B xz
and liblzma in human readable format.
To get machine-parsable output, specify
.B \-\-robot
before
.BR \-\-version .
.
.SH "ROBOT MODE"
The robot mode is activated with the
.B \-\-robot
option.
It makes the output of
.B xz
easier to parse by other programs.
Currently
.B \-\-robot
is supported only together with
.BR \-\-version ,
.BR \-\-info\-memory ,
and
.BR \-\-list .
It will be supported for normal compression and
decompression in the future.
.
.SS Version
.B "xz \-\-robot \-\-version"
will print the version number of
.B xz
and liblzma in the following format:
.PP
.BI XZ_VERSION= XYYYZZZS
.br
.BI LIBLZMA_VERSION= XYYYZZZS
.TP
.I X
Major version.
.TP
.I YYY
Minor version.
Even numbers are stable.
Odd numbers are alpha or beta versions.
.TP
.I ZZZ
Patch level for stable releases or
just a counter for development releases.
.TP
.I S
Stability.
0 is alpha, 1 is beta, and 2 is stable.
.I S
should be always 2 when
.I YYY
is even.
.PP
.I XYYYZZZS
are the same on both lines if
.B xz
and liblzma are from the same XZ Utils release.
.PP
Examples: 4.999.9beta is
.B 49990091
and
5.0.0 is
.BR 50000002 .
.
.SS "Memory limit information"
.B "xz \-\-robot \-\-info\-memory"
prints a single line with three tab-separated columns:
.IP 1. 4
Total amount of physical memory (RAM) in bytes
.IP 2. 4
Memory usage limit for compression in bytes.
A special value of zero indicates the default setting,
which for single-threaded mode is the same as no limit.
.IP 3. 4
Memory usage limit for decompression in bytes.
A special value of zero indicates the default setting,
which for single-threaded mode is the same as no limit.
.PP
In the future, the output of
.B "xz \-\-robot \-\-info\-memory"
may have more columns, but never more than a single line.
.
.SS "List mode"
.B "xz \-\-robot \-\-list"
uses tab-separated output.
The first column of every line has a string
that indicates the type of the information found on that line:
.TP
.B name
This is always the first line when starting to list a file.
The second column on the line is the filename.
.TP
.B file
This line contains overall information about the
.B .xz
file.
This line is always printed after the
.B name
line.
.TP
.B stream
This line type is used only when
.B \-\-verbose
was specified.
There are as many
.B stream
lines as there are streams in the
.B .xz
file.
.TP
.B block
This line type is used only when
.B \-\-verbose
was specified.
There are as many
.B block
lines as there are blocks in the
.B .xz
file.
The
.B block
lines are shown after all the
.B stream
lines; different line types are not interleaved.
.TP
.B summary
This line type is used only when
.B \-\-verbose
was specified twice.
This line is printed after all
.B block
lines.
Like the
.B file
line, the
.B summary
line contains overall information about the
.B .xz
file.
.TP
.B totals
This line is always the very last line of the list output.
It shows the total counts and sizes.
.PP
The columns of the
.B file
lines:
.PD 0
.RS
.IP 2. 4
Number of streams in the file
.IP 3. 4
Total number of blocks in the stream(s)
.IP 4. 4
Compressed size of the file
.IP 5. 4
Uncompressed size of the file
.IP 6. 4
Compression ratio, for example
.BR 0.123.
If ratio is over 9.999, three dashes
.RB ( \-\-\- )
are displayed instead of the ratio.
.IP 7. 4
Comma-separated list of integrity check names.
The following strings are used for the known check types:
.BR None ,
.BR CRC32 ,
.BR CRC64 ,
and
.BR SHA\-256 .
For unknown check types,
.BI Unknown\- N
is used, where
.I N
is the Check ID as a decimal number (one or two digits).
.IP 8. 4
Total size of stream padding in the file
.RE
.PD
.PP
The columns of the
.B stream
lines:
.PD 0
.RS
.IP 2. 4
Stream number (the first stream is 1)
.IP 3. 4
Number of blocks in the stream
.IP 4. 4
Compressed start offset
.IP 5. 4
Uncompressed start offset
.IP 6. 4
Compressed size (does not include stream padding)
.IP 7. 4
Uncompressed size
.IP 8. 4
Compression ratio
.IP 9. 4
Name of the integrity check
.IP 10. 4
Size of stream padding
.RE
.PD
.PP
The columns of the
.B block
lines:
.PD 0
.RS
.IP 2. 4
Number of the stream containing this block
.IP 3. 4
Block number relative to the beginning of the stream
(the first block is 1)
.IP 4. 4
Block number relative to the beginning of the file
.IP 5. 4
Compressed start offset relative to the beginning of the file
.IP 6. 4
Uncompressed start offset relative to the beginning of the file
.IP 7. 4
Total compressed size of the block (includes headers)
.IP 8. 4
Uncompressed size
.IP 9. 4
Compression ratio
.IP 10. 4
Name of the integrity check
.RE
.PD
.PP
If
.B \-\-verbose
was specified twice, additional columns are included on the
.B block
lines.
These are not displayed with a single
.BR \-\-verbose ,
because getting this information requires many seeks
and can thus be slow:
.PD 0
.RS
.IP 11. 4
Value of the integrity check in hexadecimal
.IP 12. 4
Block header size
.IP 13. 4
Block flags:
.B c
indicates that compressed size is present, and
.B u
indicates that uncompressed size is present.
If the flag is not set, a dash
.RB ( \- )
is shown instead to keep the string length fixed.
New flags may be added to the end of the string in the future.
.IP 14. 4
Size of the actual compressed data in the block (this excludes
the block header, block padding, and check fields)
.IP 15. 4
Amount of memory (in bytes) required to decompress
this block with this
.B xz
version
.IP 16. 4
Filter chain.
Note that most of the options used at compression time
cannot be known, because only the options
that are needed for decompression are stored in the
.B .xz
headers.
.RE
.PD
.PP
The columns of the
.B summary
lines:
.PD 0
.RS
.IP 2. 4
Amount of memory (in bytes) required to decompress
this file with this
.B xz
version
.IP 3. 4
.B yes
or
.B no
indicating if all block headers have both compressed size and
uncompressed size stored in them
.RE
.PD
.PP
The columns of the
.B totals
line:
.PD 0
.RS
.IP 2. 4
Number of streams
.IP 3. 4
Number of blocks
.IP 4. 4
Compressed size
.IP 5. 4
Uncompressed size
.IP 6. 4
Average compression ratio
.IP 7. 4
Comma-separated list of integrity check names
that were present in the files
.IP 8. 4
Stream padding size
.IP 9. 4
Number of files.
This is here to
keep the order of the earlier columns the same as on
.B file
lines.
.PD
.RE
.PP
If
.B \-\-verbose
was specified twice, additional columns are included on the
.B totals
line:
.PD 0
.RS
.IP 10. 4
Maximum amount of memory (in bytes) required to decompress
the files with this
.B xz
version
.IP 11. 4
.B yes
or
.B no
indicating if all block headers have both compressed size and
uncompressed size stored in them
.RE
.PD
.PP
Future versions may add new line types and
new columns can be added to the existing line types,
but the existing columns won't be changed.
.
.SH "EXIT STATUS"
.TP
.B 0
All is good.
.TP
.B 1
An error occurred.
.TP
.B 2
Something worth a warning occurred,
but no actual errors occurred.
.PP
Notices (not warnings or errors) printed on standard error
don't affect the exit status.
.
.SH ENVIRONMENT
.B xz
parses space-separated lists of options
from the environment variables
.B XZ_DEFAULTS
and
.BR XZ_OPT ,
in this order, before parsing the options from the command line.
Note that only options are parsed from the environment variables;
all non-options are silently ignored.
Parsing is done with
.BR getopt_long (3)
which is used also for the command line arguments.
.TP
.B XZ_DEFAULTS
User-specific or system-wide default options.
Typically this is set in a shell initialization script to enable
.BR xz 's
memory usage limiter by default.
Excluding shell initialization scripts
and similar special cases, scripts must never set or unset
.BR XZ_DEFAULTS .
.TP
.B XZ_OPT
This is for passing options to
.B xz
when it is not possible to set the options directly on the
.B xz
command line.
This is the case e.g. when
.B xz
is run by a script or tool, e.g. GNU
.BR tar (1):
.RS
.RS
.PP
.nf
.ft CW
XZ_OPT=\-2v tar caf foo.tar.xz foo
.ft R
.fi
.RE
.RE
.IP ""
Scripts may use
.B XZ_OPT
e.g. to set script-specific default compression options.
It is still recommended to allow users to override
.B XZ_OPT
if that is reasonable, e.g. in
.BR sh (1)
scripts one may use something like this:
.RS
.RS
.PP
.nf
.ft CW
XZ_OPT=${XZ_OPT\-"\-7e"}
export XZ_OPT
.ft R
.fi
.RE
.RE
.
.SH "LZMA UTILS COMPATIBILITY"
The command line syntax of
.B xz
is practically a superset of
.BR lzma ,
.BR unlzma ,
and
.BR lzcat
as found from LZMA Utils 4.32.x.
In most cases, it is possible to replace
LZMA Utils with XZ Utils without breaking existing scripts.
There are some incompatibilities though,
which may sometimes cause problems.
.
.SS "Compression preset levels"
The numbering of the compression level presets is not identical in
.B xz
and LZMA Utils.
The most important difference is how dictionary sizes
are mapped to different presets.
Dictionary size is roughly equal to the decompressor memory usage.
.RS
.PP
.TS
tab(;);
c c c
c n n.
Level;xz;LZMA Utils
\-0;256 KiB;N/A
\-1;1 MiB;64 KiB
\-2;2 MiB;1 MiB
\-3;4 MiB;512 KiB
\-4;4 MiB;1 MiB
\-5;8 MiB;2 MiB
\-6;8 MiB;4 MiB
\-7;16 MiB;8 MiB
\-8;32 MiB;16 MiB
\-9;64 MiB;32 MiB
.TE
.RE
.PP
The dictionary size differences affect
the compressor memory usage too,
but there are some other differences between
LZMA Utils and XZ Utils, which
make the difference even bigger:
.RS
.PP
.TS
tab(;);
c c c
c n n.
Level;xz;LZMA Utils 4.32.x
\-0;3 MiB;N/A
\-1;9 MiB;2 MiB
\-2;17 MiB;12 MiB
\-3;32 MiB;12 MiB
\-4;48 MiB;16 MiB
\-5;94 MiB;26 MiB
\-6;94 MiB;45 MiB
\-7;186 MiB;83 MiB
\-8;370 MiB;159 MiB
\-9;674 MiB;311 MiB
.TE
.RE
.PP
The default preset level in LZMA Utils is
.B \-7
while in XZ Utils it is
.BR \-6 ,
so both use an 8 MiB dictionary by default.
.
.SS "Streamed vs. non-streamed .lzma files"
The uncompressed size of the file can be stored in the
.B .lzma
header.
LZMA Utils does that when compressing regular files.
The alternative is to mark that uncompressed size is unknown
and use end-of-payload marker to indicate
where the decompressor should stop.
LZMA Utils uses this method when uncompressed size isn't known,
which is the case for example in pipes.
.PP
.B xz
supports decompressing
.B .lzma
files with or without end-of-payload marker, but all
.B .lzma
files created by
.B xz
will use end-of-payload marker and have uncompressed size
marked as unknown in the
.B .lzma
header.
This may be a problem in some uncommon situations.
For example, a
.B .lzma
decompressor in an embedded device might work
only with files that have known uncompressed size.
If you hit this problem, you need to use LZMA Utils
or LZMA SDK to create
.B .lzma
files with known uncompressed size.
.
.SS "Unsupported .lzma files"
The
.B .lzma
format allows
.I lc
values up to 8, and
.I lp
values up to 4.
LZMA Utils can decompress files with any
.I lc
and
.IR lp ,
but always creates files with
.B lc=3
and
.BR lp=0 .
Creating files with other
.I lc
and
.I lp
is possible with
.B xz
and with LZMA SDK.
.PP
The implementation of the LZMA1 filter in liblzma
requires that the sum of
.I lc
and
.I lp
must not exceed 4.
Thus,
.B .lzma
files, which exceed this limitation, cannot be decompressed with
.BR xz .
.PP
LZMA Utils creates only
.B .lzma
files which have a dictionary size of
.RI "2^" n
(a power of 2) but accepts files with any dictionary size.
liblzma accepts only
.B .lzma
files which have a dictionary size of
.RI "2^" n
or
.RI "2^" n " + 2^(" n "\-1)."
This is to decrease false positives when detecting
.B .lzma
files.
.PP
These limitations shouldn't be a problem in practice,
since practically all
.B .lzma
files have been compressed with settings that liblzma will accept.
.
.SS "Trailing garbage"
When decompressing,
LZMA Utils silently ignore everything after the first
.B .lzma
stream.
In most situations, this is a bug.
This also means that LZMA Utils
don't support decompressing concatenated
.B .lzma
files.
.PP
If there is data left after the first
.B .lzma
stream,
.B xz
considers the file to be corrupt.
This may break obscure scripts which have
assumed that trailing garbage is ignored.
.
.SH NOTES
.
.SS "Compressed output may vary"
The exact compressed output produced from
the same uncompressed input file
may vary between XZ Utils versions even if
compression options are identical.
This is because the encoder can be improved
(faster or better compression)
without affecting the file format.
The output can vary even between different
builds of the same XZ Utils version,
if different build options are used.
.PP
The above means that implementing
.B \-\-rsyncable
to create rsyncable
.B .xz
files is not going to happen without
freezing a part of the encoder
implementation, which can then be used with
.BR \-\-rsyncable .
.
.SS "Embedded .xz decompressors"
Embedded
.B .xz
decompressor implementations like XZ Embedded don't necessarily
support files created with integrity
.I check
types other than
.B none
and
.BR crc32 .
Since the default is
.BR \-\-check=crc64 ,
you must use
.B \-\-check=none
or
.B \-\-check=crc32
when creating files for embedded systems.
.PP
Outside embedded systems, all
.B .xz
format decompressors support all the
.I check
types, or at least are able to decompress
the file without verifying the
integrity check if the particular
.I check
is not supported.
.PP
XZ Embedded supports BCJ filters,
but only with the default start offset.
.
.SH EXAMPLES
.
.SS Basics
Compress the file
.I foo
into
.I foo.xz
using the default compression level
.RB ( \-6 ),
and remove
.I foo
if compression is successful:
.RS
.PP
.nf
.ft CW
xz foo
.ft R
.fi
.RE
.PP
Decompress
.I bar.xz
into
.I bar
and don't remove
.I bar.xz
even if decompression is successful:
.RS
.PP
.nf
.ft CW
xz \-dk bar.xz
.ft R
.fi
.RE
.PP
Create
.I baz.tar.xz
with the preset
.B \-4e
.RB ( "\-4 \-\-extreme" ),
which is slower than e.g. the default
.BR \-6 ,
but needs less memory for compression and decompression (48\ MiB
and 5\ MiB, respectively):
.RS
.PP
.nf
.ft CW
tar cf \- baz | xz \-4e > baz.tar.xz
.ft R
.fi
.RE
.PP
A mix of compressed and uncompressed files can be decompressed
to standard output with a single command:
.RS
.PP
.nf
.ft CW
xz \-dcf a.txt b.txt.xz c.txt d.txt.lzma > abcd.txt
.ft R
.fi
.RE
.
.SS "Parallel compression of many files"
On GNU and *BSD,
.BR find (1)
and
.BR xargs (1)
can be used to parallelize compression of many files:
.RS
.PP
.nf
.ft CW
find . \-type f \e! \-name '*.xz' \-print0 \e
| xargs \-0r \-P4 \-n16 xz \-T1
.ft R
.fi
.RE
.PP
The
.B \-P
option to
.BR xargs (1)
sets the number of parallel
.B xz
processes.
The best value for the
.B \-n
option depends on how many files there are to be compressed.
If there are only a couple of files,
the value should probably be 1;
with tens of thousands of files,
100 or even more may be appropriate to reduce the number of
.B xz
processes that
.BR xargs (1)
will eventually create.
.PP
The option
.B \-T1
for
.B xz
is there to force it to single-threaded mode, because
.BR xargs (1)
is used to control the amount of parallelization.
.
.SS "Robot mode"
Calculate how many bytes have been saved in total
after compressing multiple files:
.RS
.PP
.nf
.ft CW
xz \-\-robot \-\-list *.xz | awk '/^totals/{print $5\-$4}'
.ft R
.fi
.RE
.PP
A script may want to know that it is using new enough
.BR xz .
The following
.BR sh (1)
script checks that the version number of the
.B xz
tool is at least 5.0.0.
This method is compatible with old beta versions,
which didn't support the
.B \-\-robot
option:
.RS
.PP
.nf
.ft CW
if ! eval "$(xz \-\-robot \-\-version 2> /dev/null)" ||
[ "$XZ_VERSION" \-lt 50000002 ]; then
echo "Your xz is too old."
fi
unset XZ_VERSION LIBLZMA_VERSION
.ft R
.fi
.RE
.PP
Set a memory usage limit for decompression using
.BR XZ_OPT ,
but if a limit has already been set, don't increase it:
.RS
.PP
.nf
.ft CW
NEWLIM=$((123 << 20)) # 123 MiB
OLDLIM=$(xz \-\-robot \-\-info\-memory | cut \-f3)
if [ $OLDLIM \-eq 0 \-o $OLDLIM \-gt $NEWLIM ]; then
XZ_OPT="$XZ_OPT \-\-memlimit\-decompress=$NEWLIM"
export XZ_OPT
fi
.ft R
.fi
.RE
.
.SS "Custom compressor filter chains"
The simplest use for custom filter chains is
customizing a LZMA2 preset.
This can be useful,
because the presets cover only a subset of the
potentially useful combinations of compression settings.
.PP
The CompCPU columns of the tables
from the descriptions of the options
.BR "\-0" " ... " "\-9"
and
.B \-\-extreme
are useful when customizing LZMA2 presets.
Here are the relevant parts collected from those two tables:
.RS
.PP
.TS
tab(;);
c c
n n.
Preset;CompCPU
\-0;0
\-1;1
\-2;2
\-3;3
\-4;4
\-5;5
\-6;6
\-5e;7
\-6e;8
.TE
.RE
.PP
If you know that a file requires
somewhat big dictionary (e.g. 32 MiB) to compress well,
but you want to compress it quicker than
.B "xz \-8"
would do, a preset with a low CompCPU value (e.g. 1)
can be modified to use a bigger dictionary:
.RS
.PP
.nf
.ft CW
xz \-\-lzma2=preset=1,dict=32MiB foo.tar
.ft R
.fi
.RE
.PP
With certain files, the above command may be faster than
.B "xz \-6"
while compressing significantly better.
However, it must be emphasized that only some files benefit from
a big dictionary while keeping the CompCPU value low.
The most obvious situation,
where a big dictionary can help a lot,
is an archive containing very similar files
of at least a few megabytes each.
The dictionary size has to be significantly bigger
than any individual file to allow LZMA2 to take
full advantage of the similarities between consecutive files.
.PP
If very high compressor and decompressor memory usage is fine,
and the file being compressed is
at least several hundred megabytes, it may be useful
to use an even bigger dictionary than the 64 MiB that
.B "xz \-9"
would use:
.RS
.PP
.nf
.ft CW
xz \-vv \-\-lzma2=dict=192MiB big_foo.tar
.ft R
.fi
.RE
.PP
Using
.B \-vv
.RB ( "\-\-verbose \-\-verbose" )
like in the above example can be useful
to see the memory requirements
of the compressor and decompressor.
Remember that using a dictionary bigger than
the size of the uncompressed file is waste of memory,
so the above command isn't useful for small files.
.PP
Sometimes the compression time doesn't matter,
but the decompressor memory usage has to be kept low
e.g. to make it possible to decompress the file on
an embedded system.
The following command uses
.B \-6e
.RB ( "\-6 \-\-extreme" )
as a base and sets the dictionary to only 64\ KiB.
The resulting file can be decompressed with XZ Embedded
(that's why there is
.BR \-\-check=crc32 )
using about 100\ KiB of memory.
.RS
.PP
.nf
.ft CW
xz \-\-check=crc32 \-\-lzma2=preset=6e,dict=64KiB foo
.ft R
.fi
.RE
.PP
If you want to squeeze out as many bytes as possible,
adjusting the number of literal context bits
.RI ( lc )
and number of position bits
.RI ( pb )
can sometimes help.
Adjusting the number of literal position bits
.RI ( lp )
might help too, but usually
.I lc
and
.I pb
are more important.
E.g. a source code archive contains mostly US-ASCII text,
so something like the following might give
slightly (like 0.1\ %) smaller file than
.B "xz \-6e"
(try also without
.BR lc=4 ):
.RS
.PP
.nf
.ft CW
xz \-\-lzma2=preset=6e,pb=0,lc=4 source_code.tar
.ft R
.fi
.RE
.PP
Using another filter together with LZMA2 can improve
compression with certain file types.
E.g. to compress a x86-32 or x86-64 shared library
using the x86 BCJ filter:
.RS
.PP
.nf
.ft CW
xz \-\-x86 \-\-lzma2 libfoo.so
.ft R
.fi
.RE
.PP
Note that the order of the filter options is significant.
If
.B \-\-x86
is specified after
.BR \-\-lzma2 ,
.B xz
will give an error,
because there cannot be any filter after LZMA2,
and also because the x86 BCJ filter cannot be used
as the last filter in the chain.
.PP
The Delta filter together with LZMA2
can give good results with bitmap images.
It should usually beat PNG,
which has a few more advanced filters than simple
delta but uses Deflate for the actual compression.
.PP
The image has to be saved in uncompressed format,
e.g. as uncompressed TIFF.
The distance parameter of the Delta filter is set
to match the number of bytes per pixel in the image.
E.g. 24-bit RGB bitmap needs
.BR dist=3 ,
and it is also good to pass
.B pb=0
to LZMA2 to accommodate the three-byte alignment:
.RS
.PP
.nf
.ft CW
xz \-\-delta=dist=3 \-\-lzma2=pb=0 foo.tiff
.ft R
.fi
.RE
.PP
If multiple images have been put into a single archive (e.g.\&
.BR .tar ),
the Delta filter will work on that too as long as all images
have the same number of bytes per pixel.
.
.SH "SEE ALSO"
.BR xzdec (1),
.BR xzdiff (1),
.BR xzgrep (1),
.BR xzless (1),
.BR xzmore (1),
.BR gzip (1),
.BR bzip2 (1),
.BR 7z (1)
.PP
XZ Utils:
.br
XZ Embedded:
.br
LZMA SDK:
070701000996c2000081a40000000000000000000000015174a52f000005a6000000b600010002ffffffffffffffff0000002700000000root/usr/local/share/man/man1/xzgrep.1 .\"
.\" Original zgrep.1 for gzip: Jean-loup Gailly
.\" Charles Levert
.\"
.\" Modifications for XZ Utils: Lasse Collin
.\"
.\" License: GNU GPLv2+
.\"
.TH XZGREP 1 "2010-09-27" "Tukaani" "XZ Utils"
.SH NAME
xzgrep \- search compressed files for a regular expression
.SH SYNOPSIS
.B xzgrep
.RI [ grep_options ]
.RB [ \-e ]
.I pattern
.IR file "..."
.br
.B xzegrep
.RB ...
.br
.B xzfgrep
.RB ...
.br
.B lzgrep
.RB ...
.br
.B lzegrep
.RB ...
.br
.B lzfgrep
.RB ...
.SH DESCRIPTION
.B xzgrep
invokes
.BR grep (1)
on
.I files
which may be either uncompressed or compressed with
.BR xz (1),
.BR lzma (1),
.BR gzip (1),
or
.BR bzip2 (1).
All options specified are passed directly to
.BR grep (1).
.PP
If no
.I file
is specified, then standard input is decompressed if necessary
and fed to
.BR grep (1).
When reading from standard input,
.BR gzip (1)
and
.BR bzip2 (1)
compressed files are not supported.
.PP
If
.B xzgrep
is invoked as
.B xzegrep
or
.B xzfgrep
then
.BR egrep (1)
or
.BR fgrep (1)
is used instead of
.BR grep (1).
The same applies to names
.BR lzgrep ,
.BR lzegrep ,
and
.BR lzfgrep ,
which are provided for backward compatibility with LZMA Utils.
.PP
.SH ENVIRONMENT
.TP
.B GREP
If the
.B GREP
environment variable is set,
.B xzgrep
uses it instead of
.BR grep (1),
.BR egrep (1),
or
.BR fgrep (1).
.SH "SEE ALSO"
.BR grep (1),
.BR xz (1),
.BR gzip (1),
.BR bzip2 (1),
.BR zgrep (1)
070701000996c0000081a40000000000000000000000015174a52f00000b15000000b600010002ffffffffffffffff0000002600000000root/usr/local/share/man/man1/xzdec.1 .\"
.\" Author: Lasse Collin
.\"
.\" This file has been put into the public domain.
.\" You can do whatever you want with this file.
.\"
.TH XZDEC 1 "2010-09-27" "Tukaani" "XZ Utils"
.SH NAME
xzdec, lzmadec \- Small .xz and .lzma decompressors
.SH SYNOPSIS
.B xzdec
.RI [ option ]...
.RI [ file ]...
.br
.B lzmadec
.RI [ option ]...
.RI [ file ]...
.SH DESCRIPTION
.B xzdec
is a liblzma-based decompression-only tool for
.B .xz
(and only
.BR .xz )
files.
.B xzdec
is intended to work as a drop-in replacement for
.BR xz (1)
in the most common situations where a script
has been written to use
.B "xz \-\-decompress \-\-stdout"
(and possibly a few other commonly used options) to decompress
.B .xz
files.
.B lzmadec
is identical to
.B xzdec
except that
.B lzmadec
supports
.B .lzma
files instead of
.B .xz
files.
.PP
To reduce the size of the executable,
.B xzdec
doesn't support multithreading or localization,
and doesn't read options from
.B XZ_DEFAULTS
and
.B XZ_OPT
environment variables.
.B xzdec
doesn't support displaying intermediate progress information: sending
.B SIGINFO
to
.B xzdec
does nothing, but sending
.B SIGUSR1
terminates the process instead of displaying progress information.
.SH OPTIONS
.TP
.BR \-d ", " \-\-decompress ", " \-\-uncompress
Ignored for
.BR xz (1)
compatibility.
.B xzdec
supports only decompression.
.TP
.BR \-k ", " \-\-keep
Ignored for
.BR xz (1)
compatibility.
.B xzdec
never creates or removes any files.
.TP
.BR \-c ", " \-\-stdout ", " \-\-to-stdout
Ignored for
.BR xz (1)
compatibility.
.B xzdec
always writes the decompressed data to standard output.
.TP
.BR \-q ", " \-\-quiet
Specifying this once does nothing since
.B xzdec
never displays any warnings or notices.
Specify this twice to suppress errors.
.TP
.BR \-Q ", " \-\-no-warn
Ignored for
.BR xz (1)
compatibility.
.B xzdec
never uses the exit status 2.
.TP
.BR \-h ", " \-\-help
Display a help message and exit successfully.
.TP
.BR \-V ", " \-\-version
Display the version number of
.B xzdec
and liblzma.
.SH "EXIT STATUS"
.TP
.B 0
All was good.
.TP
.B 1
An error occurred.
.PP
.B xzdec
doesn't have any warning messages like
.BR xz (1)
has, thus the exit status 2 is not used by
.BR xzdec .
.SH NOTES
Use
.BR xz (1)
instead of
.B xzdec
or
.B lzmadec
for normal everyday use.
.B xzdec
or
.B lzmadec
are meant only for situations where it is important to have
a smaller decompressor than the full-featured
.BR xz (1).
.PP
.B xzdec
and
.B lzmadec
are not really that small.
The size can be reduced further by dropping
features from liblzma at compile time,
but that shouldn't usually be done for executables distributed
in typical non-embedded operating system distributions.
If you need a truly small
.B .xz
decompressor, consider using XZ Embedded.
.SH "SEE ALSO"
.BR xz (1)
.PP
XZ Embedded:
070701000996c1000081a40000000000000000000000015174a52f000005a1000000b600010002ffffffffffffffff0000002700000000root/usr/local/share/man/man1/xzdiff.1 .\"
.\" Original zdiff.1 for gzip: Jean-loup Gailly
.\"
.\" Modifications for XZ Utils: Lasse Collin
.\" Andrew Dudman
.\"
.\" License: GNU GPLv2+
.\"
.TH XZDIFF 1 "2010-09-27" "Tukaani" "XZ Utils"
.SH NAME
xzcmp, xzdiff, lzcmp, lzdiff \- compare compressed files
.SH SYNOPSIS
.B xzcmp
.RI [ cmp_options "] " file1 " [" file2 ]
.br
.B xzdiff
.RI [ diff_options "] " file1 " [" file2 ]
.br
.B lzcmp
.RI [ cmp_options "] " file1 " [" file2 ]
.br
.B lzdiff
.RI [ diff_options "] " file1 " [" file2 ]
.SH DESCRIPTION
.B xzcmp
and
.B xzdiff
invoke
.BR cmp (1)
or
.BR diff (1)
on files compressed with
.BR xz (1),
.BR lzma (1),
.BR gzip (1),
or
.BR bzip2 (1).
All options specified are passed directly to
.BR cmp (1)
or
.BR diff (1).
If only one file is specified, then the files compared are
.I file1
(which must have a suffix of a supported compression format) and
.I file1
from which the compression format suffix has been stripped.
If two files are specified,
then they are uncompressed if necessary and fed to
.BR cmp (1)
or
.BR diff (1).
The exit status from
.BR cmp (1)
or
.BR diff (1)
is preserved.
.PP
The names
.B lzcmp
and
.B lzdiff
are provided for backward compatibility with LZMA Utils.
.SH "SEE ALSO"
.BR cmp (1),
.BR diff (1),
.BR xz (1),
.BR gzip (1),
.BR bzip2 (1),
.BR zdiff (1)
.SH BUGS
Messages from the
.BR cmp (1)
or
.BR diff (1)
programs refer to temporary filenames instead of those specified.
070701000996c4000081a40000000000000000000000015174a52f0000048f000000b600010002ffffffffffffffff0000002700000000root/usr/local/share/man/man1/xzmore.1 .\"
.\" Original zdiff.1 for gzip: Jean-loup Gailly
.\" Modifications for XZ Utils: Lasse Collin
.\"
.\" License: GNU GPLv2+
.\"
.TH XZMORE 1 "2010-09-27" "Tukaani" "XZ Utils"
.SH NAME
xzmore, lzmore \- view xz or lzma compressed (text) files
.SH SYNOPSIS
.B xzmore
.RI [ "filename ..." ]
.br
.B lzmore
.RI [ "filename ..." ]
.SH DESCRIPTION
.B xzmore
is a filter which allows examination of
.BR xz (1)
or
.BR lzma (1)
compressed text files one screenful at a time
on a soft-copy terminal.
.PP
To use a pager other than the default
.B more,
set environment variable
.B PAGER
to the name of the desired program.
The name
.B lzmore
is provided for backward compatibility with LZMA Utils.
.TP
.BR e " or " q
When the prompt \-\-More\-\-(Next file:
.IR file )
is printed, this command causes
.B xzmore
to exit.
.TP
.B s
When the prompt \-\-More\-\-(Next file:
.IR file )
is printed, this command causes
.B xzmore
to skip the next file and continue.
.PP
For list of keyboard commands supported while actually viewing the
content of a file, refer to manual of the pager you use, usually
.BR more (1).
.SH "SEE ALSO"
.BR more (1),
.BR xz (1),
.BR xzless (1),
.BR zmore (1)
07070100099696000041ed0000000000000000000000035174a54c00000000000000b600010002ffffffffffffffff0000001900000000root/usr/local/share/doc 07070100099697000041ed0000000000000000000000045174a54c00000000000000b600010002ffffffffffffffff0000001c00000000root/usr/local/share/doc/xz 0707010009969c000081a40000000000000000000000015174a52f000034d7000000b600010002ffffffffffffffff0000002300000000root/usr/local/share/doc/xz/README
XZ Utils
========
0. Overview
1. Documentation
1.1. Overall documentation
1.2. Documentation for command-line tools
1.3. Documentation for liblzma
2. Version numbering
3. Reporting bugs
4. Translating the xz tool
5. Other implementations of the .xz format
6. Contact information
0. Overview
-----------
XZ Utils provide a general-purpose data-compression library plus
command-line tools. The native file format is the .xz format, but
also the legacy .lzma format is supported. The .xz format supports
multiple compression algorithms, which are called "filters" in the
context of XZ Utils. The primary filter is currently LZMA2. With
typical files, XZ Utils create about 30 % smaller files than gzip.
To ease adapting support for the .xz format into existing applications
and scripts, the API of liblzma is somewhat similar to the API of the
popular zlib library. For the same reason, the command-line tool xz
has a command-line syntax similar to that of gzip.
When aiming for the highest compression ratio, the LZMA2 encoder uses
a lot of CPU time and may use, depending on the settings, even
hundreds of megabytes of RAM. However, in fast modes, the LZMA2 encoder
competes with bzip2 in compression speed, RAM usage, and compression
ratio.
LZMA2 is reasonably fast to decompress. It is a little slower than
gzip, but a lot faster than bzip2. Being fast to decompress means
that the .xz format is especially nice when the same file will be
decompressed very many times (usually on different computers), which
is the case e.g. when distributing software packages. In such
situations, it's not too bad if the compression takes some time,
since that needs to be done only once to benefit many people.
With some file types, combining (or "chaining") LZMA2 with an
additional filter can improve the compression ratio. A filter chain may
contain up to four filters, although usually only one or two are used.
For example, putting a BCJ (Branch/Call/Jump) filter before LZMA2
in the filter chain can improve compression ratio of executable files.
Since the .xz format allows adding new filter IDs, it is possible that
some day there will be a filter that is, for example, much faster to
compress than LZMA2 (but probably with worse compression ratio).
Similarly, it is possible that some day there is a filter that will
compress better than LZMA2.
XZ Utils doesn't support multithreaded compression or decompression
yet. It has been planned though and taken into account when designing
the .xz file format.
1. Documentation
----------------
1.1. Overall documentation
README This file
INSTALL.generic Generic install instructions for those not familiar
with packages using GNU Autotools
INSTALL Installation instructions specific to XZ Utils
PACKAGERS Information to packagers of XZ Utils
COPYING XZ Utils copyright and license information
COPYING.GPLv2 GNU General Public License version 2
COPYING.GPLv3 GNU General Public License version 3
COPYING.LGPLv2.1 GNU Lesser General Public License version 2.1
AUTHORS The main authors of XZ Utils
THANKS Incomplete list of people who have helped making
this software
NEWS User-visible changes between XZ Utils releases
ChangeLog Detailed list of changes (commit log)
TODO Known bugs and some sort of to-do list
Note that only some of the above files are included in binary
packages.
1.2. Documentation for command-line tools
The command-line tools are documented as man pages. In source code
releases (and possibly also in some binary packages), the man pages
are also provided in plain text (ASCII only) and PDF formats in the
directory "doc/man" to make the man pages more accessible to those
whose operating system doesn't provide an easy way to view man pages.
1.3. Documentation for liblzma
The liblzma API headers include short docs about each function
and data type as Doxygen tags. These docs should be quite OK as
a quick reference.
I have planned to write a bunch of very well documented example
programs, which (due to comments) should work as a tutorial to
various features of liblzma. No such example programs have been
written yet.
For now, if you have never used liblzma, libbzip2, or zlib, I
recommend learning the *basics* of the zlib API. Once you know that,
it should be easier to learn liblzma.
http://zlib.net/manual.html
http://zlib.net/zlib_how.html
2. Version numbering
--------------------
The version number format of XZ Utils is X.Y.ZS:
- X is the major version. When this is incremented, the library
API and ABI break.
- Y is the minor version. It is incremented when new features
are added without breaking the existing API or ABI. An even Y
indicates a stable release and an odd Y indicates unstable
(alpha or beta version).
- Z is the revision. This has a different meaning for stable and
unstable releases:
* Stable: Z is incremented when bugs get fixed without adding
any new features. This is intended to be convenient for
downstream distributors that want bug fixes but don't want
any new features to minimize the risk of introducing new bugs.
* Unstable: Z is just a counter. API or ABI of features added
in earlier unstable releases having the same X.Y may break.
- S indicates stability of the release. It is missing from the
stable releases, where Y is an even number. When Y is odd, S
is either "alpha" or "beta" to make it very clear that such
versions are not stable releases. The same X.Y.Z combination is
not used for more than one stability level, i.e. after X.Y.Zalpha,
the next version can be X.Y.(Z+1)beta but not X.Y.Zbeta.
3. Reporting bugs
-----------------
Naturally it is easiest for me if you already know what causes the
unexpected behavior. Even better if you have a patch to propose.
However, quite often the reason for unexpected behavior is unknown,
so here are a few things to do before sending a bug report:
1. Try to create a small example how to reproduce the issue.
2. Compile XZ Utils with debugging code using configure switches
--enable-debug and, if possible, --disable-shared. If you are
using GCC, use CFLAGS='-O0 -ggdb3'. Don't strip the resulting
binaries.
3. Turn on core dumps. The exact command depends on your shell;
for example in GNU bash it is done with "ulimit -c unlimited",
and in tcsh with "limit coredumpsize unlimited".
4. Try to reproduce the suspected bug. If you get "assertion failed"
message, be sure to include the complete message in your bug
report. If the application leaves a coredump, get a backtrace
using gdb:
$ gdb /path/to/app-binary # Load the app to the debugger.
(gdb) core core # Open the coredump.
(gdb) bt # Print the backtrace. Copy & paste to bug report.
(gdb) quit # Quit gdb.
Report your bug via email or IRC (see Contact information below).
Don't send core dump files or any executables. If you have a small
example file(s) (total size less than 256 KiB), please include
it/them as an attachment. If you have bigger test files, put them
online somewhere and include a URL to the file(s) in the bug report.
Always include the exact version number of XZ Utils in the bug report.
If you are using a snapshot from the git repository, use "git describe"
to get the exact snapshot version. If you are using XZ Utils shipped
in an operating system distribution, mention the distribution name,
distribution version, and exact xz package version; if you cannot
repeat the bug with the code compiled from unpatched source code,
you probably need to report a bug to your distribution's bug tracking
system.
4. Translating the xz tool
--------------------------
The messages from the xz tool have been translated into a few
languages. Before starting to translate into a new language, ask
the author whether someone else hasn't already started working on it.
Test your translation. Testing includes comparing the translated
output to the original English version by running the same commands
in both your target locale and with LC_ALL=C. Ask someone to
proof-read and test the translation.
Testing can be done e.g. by installing xz into a temporary directory:
./configure --disable-shared --prefix=/tmp/xz-test
#
make -C po update-po
make install
bash debug/translations.bash | less
bash debug/translations.bash | less -S # For --list outputs
Repeat the above as needed (no need to re-run configure though).
Note especially the following:
- The output of --help and --long-help must look nice on
an 80-column terminal. It's OK to add extra lines if needed.
- In contrast, don't add extra lines to error messages and such.
They are often preceded with e.g. a filename on the same line,
so you have no way to predict where to put a \n. Let the terminal
do the wrapping even if it looks ugly. Adding new lines will be
even uglier in the generic case even if it looks nice in a few
limited examples.
- Be careful with column alignment in tables and table-like output
(--list, --list --verbose --verbose, --info-memory, --help, and
--long-help):
* All descriptions of options in --help should start in the
same column (but it doesn't need to be the same column as
in the English messages; just be consistent if you change it).
Check that both --help and --long-help look OK, since they
share several strings.
* --list --verbose and --info-memory print lines that have
the format "Description: %s". If you need a longer
description, you can put extra space between the colon
and %s. Then you may need to add extra space to other
strings too so that the result as a whole looks good (all
values start at the same column).
* The columns of the actual tables in --list --verbose --verbose
should be aligned properly. Abbreviate if necessary. It might
be good to keep at least 2 or 3 spaces between column headings
and avoid spaces in the headings so that the columns stand out
better, but this is a matter of opinion. Do what you think
looks best.
- Be careful to put a period at the end of a sentence when the
original version has it, and don't put it when the original
doesn't have it. Similarly, be careful with \n characters
at the beginning and end of the strings.
- Read the TRANSLATORS comments that have been extracted from the
source code and included in xz.pot. If they suggest testing the
translation with some type of command, do it. If testing needs
input files, use e.g. tests/files/good-*.xz.
- When updating the translation, read the fuzzy (modified) strings
carefully, and don't mark them as updated before you actually
have updated them. Reading through the unchanged messages can be
good too; sometimes you may find a better wording for them.
- If you find language problems in the original English strings,
feel free to suggest improvements. Ask if something is unclear.
- The translated messages should be understandable (sometimes this
may be a problem with the original English messages too). Don't
make a direct word-by-word translation from English especially if
the result doesn't sound good in your language.
In short, take your time and pay attention to the details. Making
a good translation is not a quick and trivial thing to do. The
translated xz should look as polished as the English version.
5. Other implementations of the .xz format
------------------------------------------
7-Zip and the p7zip port of 7-Zip support the .xz format starting
from the version 9.00alpha.
http://7-zip.org/
http://p7zip.sourceforge.net/
XZ Embedded is a limited implementation written for use in the Linux
kernel, but it is also suitable for other embedded use.
http://tukaani.org/xz/embedded.html
6. Contact information
----------------------
If you have questions, bug reports, patches etc. related to XZ Utils,
contact Lasse Collin (in Finnish or English).
I'm sometimes slow at replying. If you haven't got a reply within two
weeks, assume that your email has got lost and resend it or use IRC.
You can find me also from #tukaani on Freenode; my nick is Larhzu.
The channel tends to be pretty quiet, so just ask your question and
someone may wake up.
0707010009969b000081a40000000000000000000000015174a52f000018e5000000b600010002ffffffffffffffff0000002100000000root/usr/local/share/doc/xz/NEWS
XZ Utils Release Notes
======================
5.0.4 (2012-06-22)
* liblzma:
- Fix lzma_index_init(). It could crash if memory allocation
failed.
- Fix the possibility of an incorrect LZMA_BUF_ERROR when a BCJ
filter is used and the application only provides exactly as
much output space as is the uncompressed size of the file.
- Fix a bug in doc/examples_old/xz_pipe_decompress.c. It didn't
check if the last call to lzma_code() really returned
LZMA_STREAM_END, which made the program think that truncated
files are valid.
- New example programs in doc/examples (old programs are now in
doc/examples_old). These have more comments and more detailed
error handling.
* Fix "xz -lvv foo.xz". It could crash on some corrupted files.
* Fix output of "xz --robot -lv" and "xz --robot -lvv" which
incorrectly printed the filename also in the "foo (x/x)" format.
* Fix exit status of "xzdiff foo.xz bar.xz".
* Fix exit status of "xzgrep foo binary_file".
* Fix portability to EBCDIC systems.
* Fix a configure issue on AIX with the XL C compiler. See INSTALL
for details.
* Update French, German, Italian, and Polish translations.
5.0.3 (2011-05-21)
* liblzma fixes:
- A memory leak was fixed.
- lzma_stream_buffer_encode() no longer creates an empty .xz
Block if encoding an empty buffer. Such an empty Block with
LZMA2 data would trigger a bug in 5.0.1 and older (see the
first bullet point in 5.0.2 notes). When releasing 5.0.2,
I thought that no encoder creates this kind of files but
I was wrong.
- Validate function arguments better in a few functions. Most
importantly, specifying an unsupported integrity check to
lzma_stream_buffer_encode() no longer creates a corrupt .xz
file. Probably no application tries to do that, so this
shouldn't be a big problem in practice.
- Document that lzma_block_buffer_encode(),
lzma_easy_buffer_encode(), lzma_stream_encoder(), and
lzma_stream_buffer_encode() may return LZMA_UNSUPPORTED_CHECK.
- The return values of the _memusage() functions are now
documented better.
* Fix command name detection in xzgrep. xzegrep and xzfgrep now
correctly use egrep and fgrep instead of grep.
* French translation was added.
5.0.2 (2011-04-01)
* LZMA2 decompressor now correctly accepts LZMA2 streams with no
uncompressed data. Previously it considered them corrupt. The
bug can affect applications that use raw LZMA2 streams. It is
very unlikely to affect .xz files because no compressor creates
.xz files with empty LZMA2 streams. (Empty .xz files are a
different thing than empty LZMA2 streams.)
* "xz --suffix=.foo filename.foo" now refuses to compress the
file due to it already having the suffix .foo. It was already
documented on the man page, but the code lacked the test.
* "xzgrep -l foo bar.xz" works now.
* Polish translation was added.
5.0.1 (2011-01-29)
* xz --force now (de)compresses files that have setuid, setgid,
or sticky bit set and files that have multiple hard links.
The man page had it documented this way already, but the code
had a bug.
* gzip and bzip2 support in xzdiff was fixed.
* Portability fixes
* Minor fix to Czech translation
5.0.0 (2010-10-23)
Only the most important changes compared to 4.999.9beta are listed
here. One change is especially important:
* The memory usage limit is now disabled by default. Some scripts
written before this change may have used --memory=max on xz command
line or in XZ_OPT. THESE USES OF --memory=max SHOULD BE REMOVED
NOW, because they interfere with user's ability to set the memory
usage limit himself. If user-specified limit causes problems to
your script, blame the user.
Other significant changes:
* Added support for XZ_DEFAULTS environment variable. This variable
allows users to set default options for xz, e.g. default memory
usage limit or default compression level. Scripts that use xz
must never set or unset XZ_DEFAULTS. Scripts should use XZ_OPT
instead if they need a way to pass options to xz via an
environment variable.
* The compression settings associated with the preset levels
-0 ... -9 have been changed. --extreme was changed a little too.
It is now less likely to make compression worse, but with some
files the new --extreme may compress slightly worse than the old
--extreme.
* If a preset level (-0 ... -9) is specified after a custom filter
chain options have been used (e.g. --lzma2), the custom filter
chain will be forgotten. Earlier the preset options were
completely ignored after custom filter chain options had been
seen.
* xz will create sparse files when decompressing if the uncompressed
data contains long sequences of binary zeros. This is done even
when writing to standard output that is connected to a regular
file and certain additional conditions are met to make it safe.
* Support for "xz --list" was added. Combine with --verbose or
--verbose --verbose (-vv) for detailed output.
* I had hoped that liblzma API would have been stable after
4.999.9beta, but there have been a couple of changes in the
advanced features, which don't affect most applications:
- Index handling code was revised. If you were using the old
API, you will get a compiler error (so it's easy to notice).
- A subtle but important change was made to the Block handling
API. lzma_block.version has to be initialized even for
lzma_block_header_decode(). Code that doesn't do it will work
for now, but might break in the future, which makes this API
change easy to miss.
* The major soname has been bumped to 5.0.0. liblzma API and ABI
are now stable, so the need to recompile programs linking against
liblzma shouldn't arise soon.
070701000996a9000081a40000000000000000000000015174a52f00001d03000000b600010002ffffffffffffffff0000002800000000root/usr/local/share/doc/xz/history.txt
History of LZMA Utils and XZ Utils
==================================
Tukaani distribution
In 2005, there was a small group working on the Tukaani distribution,
which was a Slackware fork. One of the project's goals was to fit the
distro on a single 700 MiB ISO-9660 image. Using LZMA instead of gzip
helped a lot. Roughly speaking, one could fit data that took 1000 MiB
in gzipped form into 700 MiB with LZMA. Naturally, the compression
ratio varied across packages, but this was what we got on average.
Slackware packages have traditionally had .tgz as the filename suffix,
which is an abbreviation of .tar.gz. A logical naming for LZMA
compressed packages was .tlz, being an abbreviation of .tar.lzma.
At the end of the year 2007, there was no distribution under the
Tukaani project anymore, but development of LZMA Utils was kept going.
Still, there were .tlz packages around, because at least Vector Linux
(a Slackware based distribution) used LZMA for its packages.
First versions of the modified pkgtools used the LZMA_Alone tool from
Igor Pavlov's LZMA SDK as is. It was fine, because users wouldn't need
to interact with LZMA_Alone directly. But people soon wanted to use
LZMA for other files too, and the interface of LZMA_Alone wasn't
comfortable for those used to gzip and bzip2.
First steps of LZMA Utils
The first version of LZMA Utils (4.22.0) included a shell script called
lzmash. It was a wrapper that had a gzip-like command-line interface. It
used the LZMA_Alone tool from LZMA SDK to do all the real work. zgrep,
zdiff, and related scripts from gzip were adapted to work with LZMA and
were part of the first LZMA Utils release too.
LZMA Utils 4.22.0 included also lzmadec, which was a small (less than
10 KiB) decoder-only command-line tool. It was written on top of the
decoder-only C code found from the LZMA SDK. lzmadec was convenient in
situations where LZMA_Alone (a few hundred KiB) would be too big.
lzmash and lzmadec were written by Lasse Collin.
Second generation
The lzmash script was an ugly and not very secure hack. The last
version of LZMA Utils to use lzmash was 4.27.1.
LZMA Utils 4.32.0beta1 introduced a new lzma command-line tool written
by Ville Koskinen. It was written in C++, and used the encoder and
decoder from C++ LZMA SDK with some little modifications. This tool
replaced both the lzmash script and the LZMA_Alone command-line tool
in LZMA Utils.
Introducing this new tool caused some temporary incompatibilities,
because the LZMA_Alone executable was simply named lzma like the new
command-line tool, but they had a completely different command-line
interface. The file format was still the same.
Lasse wrote liblzmadec, which was a small decoder-only library based
on the C code found from LZMA SDK. liblzmadec had an API similar to
zlib, although there were some significant differences, which made it
non-trivial to use it in some applications designed for zlib and
libbzip2.
The lzmadec command-line tool was converted to use liblzmadec.
Alexandre Sauvé helped converting the build system to use GNU
Autotools. This made it easier to test for certain less portable
features needed by the new command-line tool.
Since the new command-line tool never got completely finished (for
example, it didn't support the LZMA_OPT environment variable), the
intent was to not call 4.32.x stable. Similarly, liblzmadec wasn't
polished, but appeared to work well enough, so some people started
using it too.
Because the development of the third generation of LZMA Utils was
delayed considerably (3-4 years), the 4.32.x branch had to be kept
maintained. It got some bug fixes now and then, and finally it was
decided to call it stable, although most of the missing features were
never added.
File format problems
The file format used by LZMA_Alone was primitive. It was designed with
embedded systems in mind, and thus provided only a minimal set of
features. The two biggest problems for non-embedded use were the lack
of magic bytes and an integrity check.
Igor and Lasse started developing a new file format with some help
from Ville Koskinen. Also Mark Adler, Mikko Pouru, H. Peter Anvin,
and Lars Wirzenius helped with some minor things at some point of the
development. Designing the new format took quite a long time (actually,
too long a time would be a more appropriate expression). It was mostly
because Lasse was quite slow at getting things done due to personal
reasons.
Originally the new format was supposed to use the same .lzma suffix
that was already used by the old file format. Switching to the new
format wouldn't have caused much trouble when the old format wasn't
used by many people. But since the development of the new format took
such a long time, the old format got quite popular, and it was decided
that the new file format must use a different suffix.
It was decided to use .xz as the suffix of the new file format. The
first stable .xz file format specification was finally released in
December 2008. In addition to fixing the most obvious problems of
the old .lzma format, the .xz format added some new features like
support for multiple filters (compression algorithms), filter chaining
(like piping on the command line), and limited random-access reading.
Currently the primary compression algorithm used in .xz is LZMA2.
It is an extension on top of the original LZMA to fix some practical
problems: LZMA2 adds support for flushing the encoder, uncompressed
chunks, eases stateful decoder implementations, and improves support
for multithreading. Since LZMA2 is better than the original LZMA, the
original LZMA is not supported in .xz.
Transition to XZ Utils
The early versions of XZ Utils were called LZMA Utils. The first
releases were 4.42.0alphas. They dropped the rest of the C++ LZMA SDK.
The code was still directly based on LZMA SDK but ported to C and
converted from a callback API to a stateful API. Later, Igor Pavlov
made a C version of the LZMA encoder too; these ports from C++ to C
were independent in LZMA SDK and LZMA Utils.
The core of the new LZMA Utils was liblzma, a compression library with
a zlib-like API. liblzma supported both the old and new file format.
The gzip-like lzma command-line tool was rewritten to use liblzma.
The new LZMA Utils code base was renamed to XZ Utils when the name
of the new file format had been decided. The liblzma compression
library retained its name though, because changing it would have
caused unnecessary breakage in applications already using the early
liblzma snapshots.
The xz command-line tool can emulate the gzip-like lzma tool by
creating appropriate symlinks (e.g. lzma -> xz). Thus, practically
all scripts using the lzma tool from LZMA Utils will work as is with
XZ Utils (and will keep using the old .lzma format). Still, the .lzma
format is more or less deprecated. XZ Utils will keep supporting it,
but new applications should use the .xz format, and migrating old
applications to .xz is often a good idea too.
070701000996aa000081a40000000000000000000000015174a52f00001639000000b600010002ffffffffffffffff0000003100000000root/usr/local/share/doc/xz/lzma-file-format.txt
The .lzma File Format
=====================
0. Preface
0.1. Notices and Acknowledgements
0.2. Changes
1. File Format
1.1. Header
1.1.1. Properties
1.1.2. Dictionary Size
1.1.3. Uncompressed Size
1.2. LZMA Compressed Data
2. References
0. Preface
This document describes the .lzma file format, which is
sometimes also called LZMA_Alone format. It is a legacy file
format, which is being or has been replaced by the .xz format.
The MIME type of the .lzma format is `application/x-lzma'.
The most commonly used software to handle .lzma files are
LZMA SDK, LZMA Utils, 7-Zip, and XZ Utils. This document
describes some of the differences between these implementations
and gives hints what subset of the .lzma format is the most
portable.
0.1. Notices and Acknowledgements
This file format was designed by Igor Pavlov for use in
LZMA SDK. This document was written by Lasse Collin
using the documentation found
from the LZMA SDK.
This document has been put into the public domain.
0.2. Changes
Last modified: 2011-04-12 11:55+0300
1. File Format
+-+-+-+-+-+-+-+-+-+-+-+-+-+==========================+
| Header | LZMA Compressed Data |
+-+-+-+-+-+-+-+-+-+-+-+-+-+==========================+
The .lzma format file consist of 13-byte Header followed by
the LZMA Compressed Data.
Unlike the .gz, .bz2, and .xz formats, it is not possible to
concatenate multiple .lzma files as is and expect the
decompression tool to decode the resulting file as if it were
a single .lzma file.
For example, the command line tools from LZMA Utils and
LZMA SDK silently ignore all the data after the first .lzma
stream. In contrast, the command line tool from XZ Utils
considers the .lzma file to be corrupt if there is data after
the first .lzma stream.
1.1. Header
+------------+----+----+----+----+--+--+--+--+--+--+--+--+
| Properties | Dictionary Size | Uncompressed Size |
+------------+----+----+----+----+--+--+--+--+--+--+--+--+
1.1.1. Properties
The Properties field contains three properties. An abbreviation
is given in parentheses, followed by the value range of the
property. The field consists of
1) the number of literal context bits (lc, [0, 8]);
2) the number of literal position bits (lp, [0, 4]); and
3) the number of position bits (pb, [0, 4]).
The properties are encoded using the following formula:
Properties = (pb * 5 + lp) * 9 + lc
The following C code illustrates a straightforward way to
decode the Properties field:
uint8_t lc, lp, pb;
uint8_t prop = get_lzma_properties();
if (prop > (4 * 5 + 4) * 9 + 8)
return LZMA_PROPERTIES_ERROR;
pb = prop / (9 * 5);
prop -= pb * 9 * 5;
lp = prop / 9;
lc = prop - lp * 9;
XZ Utils has an additional requirement: lc + lp <= 4. Files
which don't follow this requirement cannot be decompressed
with XZ Utils. Usually this isn't a problem since the most
common lc/lp/pb values are 3/0/2. It is the only lc/lp/pb
combination that the files created by LZMA Utils can have,
but LZMA Utils can decompress files with any lc/lp/pb.
1.1.2. Dictionary Size
Dictionary Size is stored as an unsigned 32-bit little endian
integer. Any 32-bit value is possible, but for maximum
portability, only sizes of 2^n and 2^n + 2^(n-1) should be
used.
LZMA Utils creates only files with dictionary size 2^n,
16 <= n <= 25. LZMA Utils can decompress files with any
dictionary size.
XZ Utils creates and decompresses .lzma files only with
dictionary sizes 2^n and 2^n + 2^(n-1). If some other
dictionary size is specified when compressing, the value
stored in the Dictionary Size field is a rounded up, but the
specified value is still used in the actual compression code.
1.1.3. Uncompressed Size
Uncompressed Size is stored as unsigned 64-bit little endian
integer. A special value of 0xFFFF_FFFF_FFFF_FFFF indicates
that Uncompressed Size is unknown. End of Payload Marker (*)
is used if and only if Uncompressed Size is unknown.
XZ Utils rejects files whose Uncompressed Size field specifies
a known size that is 256 GiB or more. This is to reject false
positives when trying to guess if the input file is in the
.lzma format. When Uncompressed Size is unknown, there is no
limit for the uncompressed size of the file.
(*) Some tools use the term End of Stream (EOS) marker
instead of End of Payload Marker.
1.2. LZMA Compressed Data
Detailed description of the format of this field is out of
scope of this document.
2. References
LZMA SDK - The original LZMA implementation
http://7-zip.org/sdk.html
7-Zip
http://7-zip.org/
LZMA Utils - LZMA adapted to POSIX-like systems
http://tukaani.org/lzma/
XZ Utils - The next generation of LZMA Utils
http://tukaani.org/xz/
The .xz file format - The successor of the .lzma format
http://tukaani.org/xz/xz-file-format.txt
0707010009969f000041ed0000000000000000000000025174a54c00000000000000b600010002ffffffffffffffff0000002500000000root/usr/local/share/doc/xz/examples 070701000996a4000081a40000000000000000000000015174a52f0000013a000000b600010002ffffffffffffffff0000002e00000000root/usr/local/share/doc/xz/examples/Makefile #
# Author: Lasse Collin
#
# This file has been put into the public domain.
# You can do whatever you want with this file.
#
CC = c99
CFLAGS = -g
LDFLAGS = -llzma
PROGS = \
01_compress_easy \
02_decompress \
03_compress_custom
all: $(PROGS)
.c:
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
clean:
-rm -f $(PROGS)
070701000996a3000081a40000000000000000000000015174a52f00001399000000b600010002ffffffffffffffff0000003a00000000root/usr/local/share/doc/xz/examples/03_compress_custom.c ///////////////////////////////////////////////////////////////////////////////
//
/// \file 03_compress_custom.c
/// \brief Compress in multi-call mode using x86 BCJ and LZMA2
///
/// Usage: ./03_compress_custom < INFILE > OUTFILE
///
/// Example: ./03_compress_custom < foo > foo.xz
//
// Author: Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#include
#include
#include
#include
#include
#include
static bool
init_encoder(lzma_stream *strm)
{
// Use the default preset (6) for LZMA2.
//
// The lzma_options_lzma structure and the lzma_lzma_preset() function
// are declared in lzma/lzma.h (src/liblzma/api/lzma/lzma.h in the
// source package or e.g. /usr/include/lzma/lzma.h depending on
// the install prefix).
lzma_options_lzma opt_lzma2;
if (lzma_lzma_preset(&opt_lzma2, LZMA_PRESET_DEFAULT)) {
// It should never fail because the default preset
// (and presets 0-9 optionally with LZMA_PRESET_EXTREME)
// are supported by all stable liblzma versions.
//
// (The encoder initialization later in this function may
// still fail due to unsupported preset *if* the features
// required by the preset have been disabled at build time,
// but no-one does such things except on embedded systems.)
fprintf(stderr, "Unsupported preset, possibly a bug\n");
return false;
}
// Now we could customize the LZMA2 options if we wanted. For example,
// we could set the the dictionary size (opt_lzma2.dict_size) to
// something else than the default (8 MiB) of the default preset.
// See lzma/lzma.h for details of all LZMA2 options.
//
// The x86 BCJ filter will try to modify the x86 instruction stream so
// that LZMA2 can compress it better. The x86 BCJ filter doesn't need
// any options so it will be set to NULL below.
//
// Construct the filter chain. The uncompressed data goes first to
// the first filter in the array, in this case the x86 BCJ filter.
// The array is always terminated by setting .id = LZMA_VLI_UNKNOWN.
//
// See lzma/filter.h for more information about the lzma_filter
// structure.
lzma_filter filters[] = {
{ .id = LZMA_FILTER_X86, .options = NULL },
{ .id = LZMA_FILTER_LZMA2, .options = &opt_lzma2 },
{ .id = LZMA_VLI_UNKNOWN, .options = NULL },
};
// Initialize the encoder using the custom filter chain.
lzma_ret ret = lzma_stream_encoder(strm, filters, LZMA_CHECK_CRC64);
if (ret == LZMA_OK)
return true;
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_OPTIONS_ERROR:
// We are no longer using a plain preset so this error
// message has been edited accordingly compared to
// 01_compress_easy.c.
msg = "Specified filter chain is not supported";
break;
case LZMA_UNSUPPORTED_CHECK:
msg = "Specified integrity check is not supported";
break;
default:
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
msg, ret);
return false;
}
// This function is identical to the one in 01_compress_easy.c.
static bool
compress(lzma_stream *strm, FILE *infile, FILE *outfile)
{
lzma_action action = LZMA_RUN;
uint8_t inbuf[BUFSIZ];
uint8_t outbuf[BUFSIZ];
strm->next_in = NULL;
strm->avail_in = 0;
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
while (true) {
if (strm->avail_in == 0 && !feof(infile)) {
strm->next_in = inbuf;
strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
infile);
if (ferror(infile)) {
fprintf(stderr, "Read error: %s\n",
strerror(errno));
return false;
}
if (feof(infile))
action = LZMA_FINISH;
}
lzma_ret ret = lzma_code(strm, action);
if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
size_t write_size = sizeof(outbuf) - strm->avail_out;
if (fwrite(outbuf, 1, write_size, outfile)
!= write_size) {
fprintf(stderr, "Write error: %s\n",
strerror(errno));
return false;
}
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
}
if (ret != LZMA_OK) {
if (ret == LZMA_STREAM_END)
return true;
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_DATA_ERROR:
msg = "File size limits exceeded";
break;
default:
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Encoder error: %s (error code %u)\n",
msg, ret);
return false;
}
}
}
extern int
main(void)
{
lzma_stream strm = LZMA_STREAM_INIT;
bool success = init_encoder(&strm);
if (success)
success = compress(&strm, stdin, stdout);
lzma_end(&strm);
if (fclose(stdout)) {
fprintf(stderr, "Write error: %s\n", strerror(errno));
success = false;
}
return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
070701000996a0000081a40000000000000000000000015174a52f00000353000000b600010002ffffffffffffffff0000003300000000root/usr/local/share/doc/xz/examples/00_README.txt
liblzma example programs
========================
Introduction
The examples are written so that the same comments aren't
repeated (much) in later files.
On POSIX systems, the examples should build by just typing "make".
The examples that use stdin or stdout don't set stdin and stdout
to binary mode. On systems where it matters (e.g. Windows) it is
possible that the examples won't work without modification.
List of examples
01_compress_easy.c Multi-call compression using
a compression preset
02_decompress.c Multi-call decompression
03_compress_custom.c Like 01_compress_easy.c but using
a custom filter chain
(x86 BCJ + LZMA2)
070701000996a1000081a40000000000000000000000015174a52f0000253e000000b600010002ffffffffffffffff0000003800000000root/usr/local/share/doc/xz/examples/01_compress_easy.c ///////////////////////////////////////////////////////////////////////////////
//
/// \file 01_compress_easy.c
/// \brief Compress from stdin to stdout in multi-call mode
///
/// Usage: ./01_compress_easy PRESET < INFILE > OUTFILE
///
/// Example: ./01_compress_easy 6 < foo > foo.xz
//
// Author: Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#include
#include
#include
#include
#include
#include
static void
show_usage_and_exit(const char *argv0)
{
fprintf(stderr, "Usage: %s PRESET < INFILE > OUTFILE\n"
"PRESET is a number 0-9 and can optionally be "
"followed by `e' to indicate extreme preset\n",
argv0);
exit(EXIT_FAILURE);
}
static uint32_t
get_preset(int argc, char **argv)
{
// One argument whose first char must be 0-9.
if (argc != 2 || argv[1][0] < '0' || argv[1][0] > '9')
show_usage_and_exit(argv[0]);
// Calculate the preste level 0-9.
uint32_t preset = argv[1][0] - '0';
// If there is a second char, it must be 'e'. It will set
// the LZMA_PRESET_EXTREME flag.
if (argv[1][1] != '\0') {
if (argv[1][1] != 'e' || argv[1][2] != '\0')
show_usage_and_exit(argv[0]);
preset |= LZMA_PRESET_EXTREME;
}
return preset;
}
static bool
init_encoder(lzma_stream *strm, uint32_t preset)
{
// Initialize the encoder using a preset. Set the integrity to check
// to CRC64, which is the default in the xz command line tool. If
// the .xz file needs to be decompressed with XZ Embedded, use
// LZMA_CHECK_CRC32 instead.
lzma_ret ret = lzma_easy_encoder(strm, preset, LZMA_CHECK_CRC64);
// Return successfully if the initialization went fine.
if (ret == LZMA_OK)
return true;
// Something went wrong. The possible errors are documented in
// lzma/container.h (src/liblzma/api/lzma/container.h in the source
// package or e.g. /usr/include/lzma/container.h depending on the
// install prefix).
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_OPTIONS_ERROR:
msg = "Specified preset is not supported";
break;
case LZMA_UNSUPPORTED_CHECK:
msg = "Specified integrity check is not supported";
break;
default:
// This is most likely LZMA_PROG_ERROR indicating a bug in
// this program or in liblzma. It is inconvenient to have a
// separate error message for errors that should be impossible
// to occur, but knowing the error code is important for
// debugging. That's why it is good to print the error code
// at least when there is no good error message to show.
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
msg, ret);
return false;
}
static bool
compress(lzma_stream *strm, FILE *infile, FILE *outfile)
{
// This will be LZMA_RUN until the end of the input file is reached.
// This tells lzma_code() when there will be no more input.
lzma_action action = LZMA_RUN;
// Buffers to temporarily hold uncompressed input
// and compressed output.
uint8_t inbuf[BUFSIZ];
uint8_t outbuf[BUFSIZ];
// Initialize the input and output pointers. Initializing next_in
// and avail_in isn't really necessary when we are going to encode
// just one file since LZMA_STREAM_INIT takes care of initializing
// those already. But it doesn't hurt much and it will be needed
// if encoding more than one file like we will in 02_decompress.c.
//
// While we don't care about strm->total_in or strm->total_out in this
// example, it is worth noting that initializing the encoder will
// always reset total_in and total_out to zero. But the encoder
// initialization doesn't touch next_in, avail_in, next_out, or
// avail_out.
strm->next_in = NULL;
strm->avail_in = 0;
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
// Loop until the file has been successfully compressed or until
// an error occurs.
while (true) {
// Fill the input buffer if it is empty.
if (strm->avail_in == 0 && !feof(infile)) {
strm->next_in = inbuf;
strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
infile);
if (ferror(infile)) {
fprintf(stderr, "Read error: %s\n",
strerror(errno));
return false;
}
// Once the end of the input file has been reached,
// we need to tell lzma_code() that no more input
// will be coming and that it should finish the
// encoding.
if (feof(infile))
action = LZMA_FINISH;
}
// Tell liblzma do the actual encoding.
//
// This reads up to strm->avail_in bytes of input starting
// from strm->next_in. avail_in will be decremented and
// next_in incremented by an equal amount to match the
// number of input bytes consumed.
//
// Up to strm->avail_out bytes of compressed output will be
// written starting from strm->next_out. avail_out and next_out
// will be incremented by an equal amount to match the number
// of output bytes written.
//
// The encoder has to do internal buffering, which means that
// it may take quite a bit of input before the same data is
// available in compressed form in the output buffer.
lzma_ret ret = lzma_code(strm, action);
// If the output buffer is full or if the compression finished
// successfully, write the data from the output bufffer to
// the output file.
if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
// When lzma_code() has returned LZMA_STREAM_END,
// the output buffer is likely to be only partially
// full. Calculate how much new data there is to
// be written to the output file.
size_t write_size = sizeof(outbuf) - strm->avail_out;
if (fwrite(outbuf, 1, write_size, outfile)
!= write_size) {
fprintf(stderr, "Write error: %s\n",
strerror(errno));
return false;
}
// Reset next_out and avail_out.
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
}
// Normally the return value of lzma_code() will be LZMA_OK
// until everything has been encoded.
if (ret != LZMA_OK) {
// Once everything has been encoded successfully, the
// return value of lzma_code() will be LZMA_STREAM_END.
//
// It is important to check for LZMA_STREAM_END. Do not
// assume that getting ret != LZMA_OK would mean that
// everything has gone well.
if (ret == LZMA_STREAM_END)
return true;
// It's not LZMA_OK nor LZMA_STREAM_END,
// so it must be an error code. See lzma/base.h
// (src/liblzma/api/lzma/base.h in the source package
// or e.g. /usr/include/lzma/base.h depending on the
// install prefix) for the list and documentation of
// possible values. Most values listen in lzma_ret
// enumeration aren't possible in this example.
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_DATA_ERROR:
// This error is returned if the compressed
// or uncompressed size get near 8 EiB
// (2^63 bytes) because that's where the .xz
// file format size limits currently are.
// That is, the possibility of this error
// is mostly theoretical unless you are doing
// something very unusual.
//
// Note that strm->total_in and strm->total_out
// have nothing to do with this error. Changing
// those variables won't increase or decrease
// the chance of getting this error.
msg = "File size limits exceeded";
break;
default:
// This is most likely LZMA_PROG_ERROR, but
// if this program is buggy (or liblzma has
// a bug), it may be e.g. LZMA_BUF_ERROR or
// LZMA_OPTIONS_ERROR too.
//
// It is inconvenient to have a separate
// error message for errors that should be
// impossible to occur, but knowing the error
// code is important for debugging. That's why
// it is good to print the error code at least
// when there is no good error message to show.
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Encoder error: %s (error code %u)\n",
msg, ret);
return false;
}
}
}
extern int
main(int argc, char **argv)
{
// Get the preset number from the command line.
uint32_t preset = get_preset(argc, argv);
// Initialize a lzma_stream structure. When it is allocated on stack,
// it is simplest to use LZMA_STREAM_INIT macro like below. When it
// is allocated on heap, using memset(strmptr, 0, sizeof(*strmptr))
// works (as long as NULL pointers are represented with zero bits
// as they are on practically all computers today).
lzma_stream strm = LZMA_STREAM_INIT;
// Initialize the encoder. If it succeeds, compress from
// stdin to stdout.
bool success = init_encoder(&strm, preset);
if (success)
success = compress(&strm, stdin, stdout);
// Free the memory allocated for the encoder. If we were encoding
// multiple files, this would only need to be done after the last
// file. See 02_decompress.c for handling of multiple files.
//
// It is OK to call lzma_end() multiple times or when it hasn't been
// actually used except initialized with LZMA_STREAM_INIT.
lzma_end(&strm);
// Close stdout to catch possible write errors that can occur
// when pending data is flushed from the stdio buffers.
if (fclose(stdout)) {
fprintf(stderr, "Write error: %s\n", strerror(errno));
success = false;
}
return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
070701000996a2000081a40000000000000000000000015174a52f000022d0000000b600010002ffffffffffffffff0000003500000000root/usr/local/share/doc/xz/examples/02_decompress.c ///////////////////////////////////////////////////////////////////////////////
//
/// \file 02_decompress.c
/// \brief Decompress .xz files to stdout
///
/// Usage: ./02_decompress INPUT_FILES... > OUTFILE
///
/// Example: ./02_decompress foo.xz bar.xz > foobar
//
// Author: Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#include
#include
#include
#include
#include
#include
static bool
init_decoder(lzma_stream *strm)
{
// Initialize a .xz decoder. The decoder supports a memory usage limit
// and a set of flags.
//
// The memory usage of the decompressor depends on the settings used
// to compress a .xz file. It can vary from less than a megabyte to
// a few gigabytes, but in practice (at least for now) it rarely
// exceeds 65 MiB because that's how much memory is required to
// decompress files created with "xz -9". Settings requiring more
// memory take extra effort to use and don't (at least for now)
// provide significantly better compression in most cases.
//
// Memory usage limit is useful if it is important that the
// decompressor won't consume gigabytes of memory. The need
// for limiting depends on the application. In this example,
// no memory usage limiting is used. This is done by setting
// the limit to UINT64_MAX.
//
// The .xz format allows concatenating compressed files as is:
//
// echo foo | xz > foobar.xz
// echo bar | xz >> foobar.xz
//
// When decompressing normal standalone .xz files, LZMA_CONCATENATED
// should always be used to support decompression of concatenated
// .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop
// after the first .xz stream. This can be useful when .xz data has
// been embedded inside another file format.
//
// Flags other than LZMA_CONCATENATED are supported too, and can
// be combined with bitwise-or. See lzma/container.h
// (src/liblzma/api/lzma/container.h in the source package or e.g.
// /usr/include/lzma/container.h depending on the install prefix)
// for details.
lzma_ret ret = lzma_stream_decoder(
strm, UINT64_MAX, LZMA_CONCATENATED);
// Return successfully if the initialization went fine.
if (ret == LZMA_OK)
return true;
// Something went wrong. The possible errors are documented in
// lzma/container.h (src/liblzma/api/lzma/container.h in the source
// package or e.g. /usr/include/lzma/container.h depending on the
// install prefix).
//
// Note that LZMA_MEMLIMIT_ERROR is never possible here. If you
// specify a very tiny limit, the error will be delayed until
// the first headers have been parsed by a call to lzma_code().
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_OPTIONS_ERROR:
msg = "Unsupported decompressor flags";
break;
default:
// This is most likely LZMA_PROG_ERROR indicating a bug in
// this program or in liblzma. It is inconvenient to have a
// separate error message for errors that should be impossible
// to occur, but knowing the error code is important for
// debugging. That's why it is good to print the error code
// at least when there is no good error message to show.
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Error initializing the decoder: %s (error code %u)\n",
msg, ret);
return false;
}
static bool
decompress(lzma_stream *strm, const char *inname, FILE *infile, FILE *outfile)
{
// When LZMA_CONCATENATED flag was used when initializing the decoder,
// we need to tell lzma_code() when there will be no more input.
// This is done by setting action to LZMA_FINISH instead of LZMA_RUN
// in the same way as it is done when encoding.
//
// When LZMA_CONCATENATED isn't used, there is no need to use
// LZMA_FINISH to tell when all the input has been read, but it
// is still OK to use it if you want. When LZMA_CONCATENATED isn't
// used, the decoder will stop after the first .xz stream. In that
// case some unused data may be left in strm->next_in.
lzma_action action = LZMA_RUN;
uint8_t inbuf[BUFSIZ];
uint8_t outbuf[BUFSIZ];
strm->next_in = NULL;
strm->avail_in = 0;
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
while (true) {
if (strm->avail_in == 0 && !feof(infile)) {
strm->next_in = inbuf;
strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
infile);
if (ferror(infile)) {
fprintf(stderr, "%s: Read error: %s\n",
inname, strerror(errno));
return false;
}
// Once the end of the input file has been reached,
// we need to tell lzma_code() that no more input
// will be coming. As said before, this isn't required
// if the LZMA_CONATENATED flag isn't used when
// initializing the decoder.
if (feof(infile))
action = LZMA_FINISH;
}
lzma_ret ret = lzma_code(strm, action);
if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
size_t write_size = sizeof(outbuf) - strm->avail_out;
if (fwrite(outbuf, 1, write_size, outfile)
!= write_size) {
fprintf(stderr, "Write error: %s\n",
strerror(errno));
return false;
}
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
}
if (ret != LZMA_OK) {
// Once everything has been decoded successfully, the
// return value of lzma_code() will be LZMA_STREAM_END.
//
// It is important to check for LZMA_STREAM_END. Do not
// assume that getting ret != LZMA_OK would mean that
// everything has gone well or that when you aren't
// getting more output it must have successfully
// decoded everything.
if (ret == LZMA_STREAM_END)
return true;
// It's not LZMA_OK nor LZMA_STREAM_END,
// so it must be an error code. See lzma/base.h
// (src/liblzma/api/lzma/base.h in the source package
// or e.g. /usr/include/lzma/base.h depending on the
// install prefix) for the list and documentation of
// possible values. Many values listen in lzma_ret
// enumeration aren't possible in this example, but
// can be made possible by enabling memory usage limit
// or adding flags to the decoder initialization.
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_FORMAT_ERROR:
// .xz magic bytes weren't found.
msg = "The input is not in the .xz format";
break;
case LZMA_OPTIONS_ERROR:
// For example, the headers specify a filter
// that isn't supported by this liblzma
// version (or it hasn't been enabled when
// building liblzma, but no-one sane does
// that unless building liblzma for an
// embedded system). Upgrading to a newer
// liblzma might help.
//
// Note that it is unlikely that the file has
// accidentally became corrupt if you get this
// error. The integrity of the .xz headers is
// always verified with a CRC32, so
// unintentionally corrupt files can be
// distinguished from unsupported files.
msg = "Unsupported compression options";
break;
case LZMA_DATA_ERROR:
msg = "Compressed file is corrupt";
break;
case LZMA_BUF_ERROR:
// Typically this error means that a valid
// file has got truncated, but it might also
// be a damaged part in the file that makes
// the decoder think the file is truncated.
// If you prefer, you can use the same error
// message for this as for LZMA_DATA_ERROR.
msg = "Compressed file is truncated or "
"otherwise corrupt";
break;
default:
// This is most likely LZMA_PROG_ERROR.
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "%s: Decoder error: "
"%s (error code %u)\n",
inname, msg, ret);
return false;
}
}
}
extern int
main(int argc, char **argv)
{
if (argc <= 1) {
fprintf(stderr, "Usage: %s FILES...\n", argv[0]);
return EXIT_FAILURE;
}
lzma_stream strm = LZMA_STREAM_INIT;
bool success = true;
// Try to decompress all files.
for (int i = 1; i < argc; ++i) {
if (!init_decoder(&strm)) {
// Decoder initialization failed. There's no point
// to retry it so we need to exit.
success = false;
break;
}
FILE *infile = fopen(argv[i], "rb");
if (infile == NULL) {
fprintf(stderr, "%s: Error opening the "
"input file: %s\n",
argv[i], strerror(errno));
success = false;
} else {
success &= decompress(&strm, argv[i], infile, stdout);
fclose(infile);
}
}
// Free the memory allocated for the decoder. This only needs to be
// done after the last file.
lzma_end(&strm);
if (fclose(stdout)) {
fprintf(stderr, "Write error: %s\n", strerror(errno));
success = false;
}
return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
070701000996a5000041ed0000000000000000000000025174a54c00000000000000b600010002ffffffffffffffff0000002900000000root/usr/local/share/doc/xz/examples_old 070701000996a7000081a40000000000000000000000015174a52f00000c3a000000b600010002ffffffffffffffff0000003a00000000root/usr/local/share/doc/xz/examples_old/xz_pipe_decomp.c /*
* xz_pipe_decomp.c
* A simple example of pipe-only xz decompressor implementation.
* version: 2012-06-14 - by Daniel Mealha Cabrita
* Not copyrighted -- provided to the public domain.
*
* Compiling:
* Link with liblzma. GCC example:
* $ gcc -llzma xz_pipe_decomp.c -o xz_pipe_decomp
*
* Usage example:
* $ cat some_file.xz | ./xz_pipe_decomp > some_file
*/
#include
#include
#include
#include
#include
/* read/write buffer sizes */
#define IN_BUF_MAX 4096
#define OUT_BUF_MAX 4096
/* error codes */
#define RET_OK 0
#define RET_ERROR_INIT 1
#define RET_ERROR_INPUT 2
#define RET_ERROR_OUTPUT 3
#define RET_ERROR_DECOMPRESSION 4
/* note: in_file and out_file must be open already */
int xz_decompress (FILE *in_file, FILE *out_file)
{
lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */
const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED;
const uint64_t memory_limit = UINT64_MAX; /* no memory limit */
uint8_t in_buf [IN_BUF_MAX];
uint8_t out_buf [OUT_BUF_MAX];
size_t in_len; /* length of useful data in in_buf */
size_t out_len; /* length of useful data in out_buf */
bool in_finished = false;
bool out_finished = false;
lzma_action action;
lzma_ret ret_xz;
int ret;
ret = RET_OK;
/* initialize xz decoder */
ret_xz = lzma_stream_decoder (&strm, memory_limit, flags);
if (ret_xz != LZMA_OK) {
fprintf (stderr, "lzma_stream_decoder error: %d\n", (int) ret_xz);
return RET_ERROR_INIT;
}
while ((! in_finished) && (! out_finished)) {
/* read incoming data */
in_len = fread (in_buf, 1, IN_BUF_MAX, in_file);
if (feof (in_file)) {
in_finished = true;
}
if (ferror (in_file)) {
in_finished = true;
ret = RET_ERROR_INPUT;
}
strm.next_in = in_buf;
strm.avail_in = in_len;
/* if no more data from in_buf, flushes the
internal xz buffers and closes the decompressed data
with LZMA_FINISH */
action = in_finished ? LZMA_FINISH : LZMA_RUN;
/* loop until there's no pending decompressed output */
do {
/* out_buf is clean at this point */
strm.next_out = out_buf;
strm.avail_out = OUT_BUF_MAX;
/* decompress data */
ret_xz = lzma_code (&strm, action);
if ((ret_xz != LZMA_OK) && (ret_xz != LZMA_STREAM_END)) {
fprintf (stderr, "lzma_code error: %d\n", (int) ret_xz);
out_finished = true;
ret = RET_ERROR_DECOMPRESSION;
} else {
/* write decompressed data */
out_len = OUT_BUF_MAX - strm.avail_out;
fwrite (out_buf, 1, out_len, out_file);
if (ferror (out_file)) {
out_finished = true;
ret = RET_ERROR_OUTPUT;
}
}
} while (strm.avail_out == 0);
}
/* Bug fix (2012-06-14): If no errors were detected, check
that the last lzma_code() call returned LZMA_STREAM_END.
If not, the file is probably truncated. */
if ((ret == RET_OK) && (ret_xz != LZMA_STREAM_END)) {
fprintf (stderr, "Input truncated or corrupt\n");
ret = RET_ERROR_DECOMPRESSION;
}
lzma_end (&strm);
return ret;
}
int main ()
{
int ret;
ret = xz_decompress (stdin, stdout);
return ret;
}
070701000996a6000081a40000000000000000000000015174a52f00000be3000000b600010002ffffffffffffffff0000003800000000root/usr/local/share/doc/xz/examples_old/xz_pipe_comp.c /*
* xz_pipe_comp.c
* A simple example of pipe-only xz compressor implementation.
* version: 2010-07-12 - by Daniel Mealha Cabrita
* Not copyrighted -- provided to the public domain.
*
* Compiling:
* Link with liblzma. GCC example:
* $ gcc -llzma xz_pipe_comp.c -o xz_pipe_comp
*
* Usage example:
* $ cat some_file | ./xz_pipe_comp > some_file.xz
*/
#include
#include
#include
#include
#include
/* COMPRESSION SETTINGS */
/* analogous to xz CLI options: -0 to -9 */
#define COMPRESSION_LEVEL 6
/* boolean setting, analogous to xz CLI option: -e */
#define COMPRESSION_EXTREME true
/* see: /usr/include/lzma/check.h LZMA_CHECK_* */
#define INTEGRITY_CHECK LZMA_CHECK_CRC64
/* read/write buffer sizes */
#define IN_BUF_MAX 4096
#define OUT_BUF_MAX 4096
/* error codes */
#define RET_OK 0
#define RET_ERROR_INIT 1
#define RET_ERROR_INPUT 2
#define RET_ERROR_OUTPUT 3
#define RET_ERROR_COMPRESSION 4
/* note: in_file and out_file must be open already */
int xz_compress (FILE *in_file, FILE *out_file)
{
uint32_t preset = COMPRESSION_LEVEL | (COMPRESSION_EXTREME ? LZMA_PRESET_EXTREME : 0);
lzma_check check = INTEGRITY_CHECK;
lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */
uint8_t in_buf [IN_BUF_MAX];
uint8_t out_buf [OUT_BUF_MAX];
size_t in_len; /* length of useful data in in_buf */
size_t out_len; /* length of useful data in out_buf */
bool in_finished = false;
bool out_finished = false;
lzma_action action;
lzma_ret ret_xz;
int ret;
ret = RET_OK;
/* initialize xz encoder */
ret_xz = lzma_easy_encoder (&strm, preset, check);
if (ret_xz != LZMA_OK) {
fprintf (stderr, "lzma_easy_encoder error: %d\n", (int) ret_xz);
return RET_ERROR_INIT;
}
while ((! in_finished) && (! out_finished)) {
/* read incoming data */
in_len = fread (in_buf, 1, IN_BUF_MAX, in_file);
if (feof (in_file)) {
in_finished = true;
}
if (ferror (in_file)) {
in_finished = true;
ret = RET_ERROR_INPUT;
}
strm.next_in = in_buf;
strm.avail_in = in_len;
/* if no more data from in_buf, flushes the
internal xz buffers and closes the xz data
with LZMA_FINISH */
action = in_finished ? LZMA_FINISH : LZMA_RUN;
/* loop until there's no pending compressed output */
do {
/* out_buf is clean at this point */
strm.next_out = out_buf;
strm.avail_out = OUT_BUF_MAX;
/* compress data */
ret_xz = lzma_code (&strm, action);
if ((ret_xz != LZMA_OK) && (ret_xz != LZMA_STREAM_END)) {
fprintf (stderr, "lzma_code error: %d\n", (int) ret_xz);
out_finished = true;
ret = RET_ERROR_COMPRESSION;
} else {
/* write compressed data */
out_len = OUT_BUF_MAX - strm.avail_out;
fwrite (out_buf, 1, out_len, out_file);
if (ferror (out_file)) {
out_finished = true;
ret = RET_ERROR_OUTPUT;
}
}
} while (strm.avail_out == 0);
}
lzma_end (&strm);
return ret;
}
int main ()
{
int ret;
ret = xz_compress (stdin, stdout);
return ret;
}
0707010009969a000081a40000000000000000000000015174a52f00004643000000b600010002ffffffffffffffff0000002a00000000root/usr/local/share/doc/xz/COPYING.GPLv2 GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
Copyright (C)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.
0707010009969e000081a40000000000000000000000015174a52f000008f6000000b600010002ffffffffffffffff0000002100000000root/usr/local/share/doc/xz/TODO
XZ Utils To-Do List
===================
Known bugs
----------
The test suite is too incomplete.
If the memory usage limit is less than about 13 MiB, xz is unable to
automatically scale down the compression settings enough even though
it would be possible by switching from BT2/BT3/BT4 match finder to
HC3/HC4.
The code to detect number of CPU cores doesn't count hyperthreading
as multiple cores. In context of xz, it probably should.
Hyperthreading is good at least with p7zip.
XZ Utils compress some files significantly worse than LZMA Utils.
This is due to faster compression presets used by XZ Utils, and
can often be worked around by using "xz --extreme". With some files
--extreme isn't enough though: it's most likely with files that
compress extremely well, so going from compression ratio of 0.003
to 0.004 means big relative increase in the compressed file size.
xz doesn't quote unprintable characters when it displays file names
given on the command line.
tuklib_exit() doesn't block signals => EINTR is possible.
SIGTSTP is not handled. If xz is stopped, the estimated remaining
time and calculated (de)compression speed won't make sense in the
progress indicator (xz --verbose).
Missing features
----------------
xz doesn't support copying extended attributes, access control
lists etc. from source to target file.
Multithreaded compression
Multithreaded decompression
Buffer-to-buffer coding could use less RAM (especially when
decompressing LZMA1 or LZMA2).
I/O library is not implemented (similar to gzopen() in zlib).
It will be a separate library that supports uncompressed, .gz,
.bz2, .lzma, and .xz files.
lzma_strerror() to convert lzma_ret to human readable form?
This is tricky, because the same error codes are used with
slightly different meanings, and this cannot be fixed anymore.
Documentation
-------------
Some tutorial is needed for liblzma. I have planned to write some
extremely well commented example programs, which would work as
a tutorial. I suppose the Doxygen tags are quite OK as a quick
reference once one is familiar with the liblzma API.
Document the LZMA1 and LZMA2 algorithms.
070701000996ab000081a40000000000000000000000015174a52f0000a929000000b600010002ffffffffffffffff0000002f00000000root/usr/local/share/doc/xz/xz-file-format.txt
The .xz File Format
===================
Version 1.0.4 (2009-08-27)
0. Preface
0.1. Notices and Acknowledgements
0.2. Getting the Latest Version
0.3. Version History
1. Conventions
1.1. Byte and Its Representation
1.2. Multibyte Integers
2. Overall Structure of .xz File
2.1. Stream
2.1.1. Stream Header
2.1.1.1. Header Magic Bytes
2.1.1.2. Stream Flags
2.1.1.3. CRC32
2.1.2. Stream Footer
2.1.2.1. CRC32
2.1.2.2. Backward Size
2.1.2.3. Stream Flags
2.1.2.4. Footer Magic Bytes
2.2. Stream Padding
3. Block
3.1. Block Header
3.1.1. Block Header Size
3.1.2. Block Flags
3.1.3. Compressed Size
3.1.4. Uncompressed Size
3.1.5. List of Filter Flags
3.1.6. Header Padding
3.1.7. CRC32
3.2. Compressed Data
3.3. Block Padding
3.4. Check
4. Index
4.1. Index Indicator
4.2. Number of Records
4.3. List of Records
4.3.1. Unpadded Size
4.3.2. Uncompressed Size
4.4. Index Padding
4.5. CRC32
5. Filter Chains
5.1. Alignment
5.2. Security
5.3. Filters
5.3.1. LZMA2
5.3.2. Branch/Call/Jump Filters for Executables
5.3.3. Delta
5.3.3.1. Format of the Encoded Output
5.4. Custom Filter IDs
5.4.1. Reserved Custom Filter ID Ranges
6. Cyclic Redundancy Checks
7. References
0. Preface
This document describes the .xz file format (filename suffix
".xz", MIME type "application/x-xz"). It is intended that this
this format replace the old .lzma format used by LZMA SDK and
LZMA Utils.
0.1. Notices and Acknowledgements
This file format was designed by Lasse Collin
and Igor Pavlov.
Special thanks for helping with this document goes to
Ville Koskinen. Thanks for helping with this document goes to
Mark Adler, H. Peter Anvin, Mikko Pouru, and Lars Wirzenius.
This document has been put into the public domain.
0.2. Getting the Latest Version
The latest official version of this document can be downloaded
from .
Specific versions of this document have a filename
xz-file-format-X.Y.Z.txt where X.Y.Z is the version number.
For example, the version 1.0.0 of this document is available
at .
0.3. Version History
Version Date Description
1.0.4 2009-08-27 Language improvements in Sections 1.2,
2.1.1.2, 3.1.1, 3.1.2, and 5.3.1
1.0.3 2009-06-05 Spelling fixes in Sections 5.1 and 5.4
1.0.2 2009-06-04 Typo fixes in Sections 4 and 5.3.1
1.0.1 2009-06-01 Typo fix in Section 0.3 and minor
clarifications to Sections 2, 2.2,
3.3, 4.4, and 5.3.2
1.0.0 2009-01-14 The first official version
1. Conventions
The key words "MUST", "MUST NOT", "REQUIRED", "SHOULD",
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
document are to be interpreted as described in [RFC-2119].
Indicating a warning means displaying a message, returning
appropriate exit status, or doing something else to let the
user know that something worth warning occurred. The operation
SHOULD still finish if a warning is indicated.
Indicating an error means displaying a message, returning
appropriate exit status, or doing something else to let the
user know that something prevented successfully finishing the
operation. The operation MUST be aborted once an error has
been indicated.
1.1. Byte and Its Representation
In this document, byte is always 8 bits.
A "null byte" has all bits unset. That is, the value of a null
byte is 0x00.
To represent byte blocks, this document uses notation that
is similar to the notation used in [RFC-1952]:
+-------+
| Foo | One byte.
+-------+
+---+---+
| Foo | Two bytes; that is, some of the vertical bars
+---+---+ can be missing.
+=======+
| Foo | Zero or more bytes.
+=======+
In this document, a boxed byte or a byte sequence declared
using this notation is called "a field". The example field
above would be called "the Foo field" or plain "Foo".
If there are many fields, they may be split to multiple lines.
This is indicated with an arrow ("--->"):
+=====+
| Foo |
+=====+
+=====+
---> | Bar |
+=====+
The above is equivalent to this:
+=====+=====+
| Foo | Bar |
+=====+=====+
1.2. Multibyte Integers
Multibyte integers of static length, such as CRC values,
are stored in little endian byte order (least significant
byte first).
When smaller values are more likely than bigger values (for
example file sizes), multibyte integers are encoded in a
variable-length representation:
- Numbers in the range [0, 127] are copied as is, and take
one byte of space.
- Bigger numbers will occupy two or more bytes. All but the
last byte of the multibyte representation have the highest
(eighth) bit set.
For now, the value of the variable-length integers is limited
to 63 bits, which limits the encoded size of the integer to
nine bytes. These limits may be increased in the future if
needed.
The following C code illustrates encoding and decoding of
variable-length integers. The functions return the number of
bytes occupied by the integer (1-9), or zero on error.
#include
#include
size_t
encode(uint8_t buf[static 9], uint64_t num)
{
if (num > UINT64_MAX / 2)
return 0;
size_t i = 0;
while (num >= 0x80) {
buf[i++] = (uint8_t)(num) | 0x80;
num >>= 7;
}
buf[i++] = (uint8_t)(num);
return i;
}
size_t
decode(const uint8_t buf[], size_t size_max, uint64_t *num)
{
if (size_max == 0)
return 0;
if (size_max > 9)
size_max = 9;
*num = buf[0] & 0x7F;
size_t i = 0;
while (buf[i++] & 0x80) {
if (i >= size_max || buf[i] == 0x00)
return 0;
*num |= (uint64_t)(buf[i] & 0x7F) << (i * 7);
}
return i;
}
2. Overall Structure of .xz File
A standalone .xz files consist of one or more Streams which may
have Stream Padding between or after them:
+========+================+========+================+
| Stream | Stream Padding | Stream | Stream Padding | ...
+========+================+========+================+
The sizes of Stream and Stream Padding are always multiples
of four bytes, thus the size of every valid .xz file MUST be
a multiple of four bytes.
While a typical file contains only one Stream and no Stream
Padding, a decoder handling standalone .xz files SHOULD support
files that have more than one Stream or Stream Padding.
In contrast to standalone .xz files, when the .xz file format
is used as an internal part of some other file format or
communication protocol, it usually is expected that the decoder
stops after the first Stream, and doesn't look for Stream
Padding or possibly other Streams.
2.1. Stream
+-+-+-+-+-+-+-+-+-+-+-+-+=======+=======+ +=======+
| Stream Header | Block | Block | ... | Block |
+-+-+-+-+-+-+-+-+-+-+-+-+=======+=======+ +=======+
+=======+-+-+-+-+-+-+-+-+-+-+-+-+
---> | Index | Stream Footer |
+=======+-+-+-+-+-+-+-+-+-+-+-+-+
All the above fields have a size that is a multiple of four. If
Stream is used as an internal part of another file format, it
is RECOMMENDED to make the Stream start at an offset that is
a multiple of four bytes.
Stream Header, Index, and Stream Footer are always present in
a Stream. The maximum size of the Index field is 16 GiB (2^34).
There are zero or more Blocks. The maximum number of Blocks is
limited only by the maximum size of the Index field.
Total size of a Stream MUST be less than 8 EiB (2^63 bytes).
The same limit applies to the total amount of uncompressed
data stored in a Stream.
If an implementation supports handling .xz files with multiple
concatenated Streams, it MAY apply the above limits to the file
as a whole instead of limiting per Stream basis.
2.1.1. Stream Header
+---+---+---+---+---+---+-------+------+--+--+--+--+
| Header Magic Bytes | Stream Flags | CRC32 |
+---+---+---+---+---+---+-------+------+--+--+--+--+
2.1.1.1. Header Magic Bytes
The first six (6) bytes of the Stream are so called Header
Magic Bytes. They can be used to identify the file type.
Using a C array and ASCII:
const uint8_t HEADER_MAGIC[6]
= { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
In plain hexadecimal:
FD 37 7A 58 5A 00
Notes:
- The first byte (0xFD) was chosen so that the files cannot
be erroneously detected as being in .lzma format, in which
the first byte is in the range [0x00, 0xE0].
- The sixth byte (0x00) was chosen to prevent applications
from misdetecting the file as a text file.
If the Header Magic Bytes don't match, the decoder MUST
indicate an error.
2.1.1.2. Stream Flags
The first byte of Stream Flags is always a null byte. In the
future, this byte may be used to indicate a new Stream version
or other Stream properties.
The second byte of Stream Flags is a bit field:
Bit(s) Mask Description
0-3 0x0F Type of Check (see Section 3.4):
ID Size Check name
0x00 0 bytes None
0x01 4 bytes CRC32
0x02 4 bytes (Reserved)
0x03 4 bytes (Reserved)
0x04 8 bytes CRC64
0x05 8 bytes (Reserved)
0x06 8 bytes (Reserved)
0x07 16 bytes (Reserved)
0x08 16 bytes (Reserved)
0x09 16 bytes (Reserved)
0x0A 32 bytes SHA-256
0x0B 32 bytes (Reserved)
0x0C 32 bytes (Reserved)
0x0D 64 bytes (Reserved)
0x0E 64 bytes (Reserved)
0x0F 64 bytes (Reserved)
4-7 0xF0 Reserved for future use; MUST be zero for now.
Implementations SHOULD support at least the Check IDs 0x00
(None) and 0x01 (CRC32). Supporting other Check IDs is
OPTIONAL. If an unsupported Check is used, the decoder SHOULD
indicate a warning or error.
If any reserved bit is set, the decoder MUST indicate an error.
It is possible that there is a new field present which the
decoder is not aware of, and can thus parse the Stream Header
incorrectly.
2.1.1.3. CRC32
The CRC32 is calculated from the Stream Flags field. It is
stored as an unsigned 32-bit little endian integer. If the
calculated value does not match the stored one, the decoder
MUST indicate an error.
The idea is that Stream Flags would always be two bytes, even
if new features are needed. This way old decoders will be able
to verify the CRC32 calculated from Stream Flags, and thus
distinguish between corrupt files (CRC32 doesn't match) and
files that the decoder doesn't support (CRC32 matches but
Stream Flags has reserved bits set).
2.1.2. Stream Footer
+-+-+-+-+---+---+---+---+-------+------+----------+---------+
| CRC32 | Backward Size | Stream Flags | Footer Magic Bytes |
+-+-+-+-+---+---+---+---+-------+------+----------+---------+
2.1.2.1. CRC32
The CRC32 is calculated from the Backward Size and Stream Flags
fields. It is stored as an unsigned 32-bit little endian
integer. If the calculated value does not match the stored one,
the decoder MUST indicate an error.
The reason to have the CRC32 field before the Backward Size and
Stream Flags fields is to keep the four-byte fields aligned to
a multiple of four bytes.
2.1.2.2. Backward Size
Backward Size is stored as a 32-bit little endian integer,
which indicates the size of the Index field as multiple of
four bytes, minimum value being four bytes:
real_backward_size = (stored_backward_size + 1) * 4;
If the stored value does not match the real size of the Index
field, the decoder MUST indicate an error.
Using a fixed-size integer to store Backward Size makes
it slightly simpler to parse the Stream Footer when the
application needs to parse the Stream backwards.
2.1.2.3. Stream Flags
This is a copy of the Stream Flags field from the Stream
Header. The information stored to Stream Flags is needed
when parsing the Stream backwards. The decoder MUST compare
the Stream Flags fields in both Stream Header and Stream
Footer, and indicate an error if they are not identical.
2.1.2.4. Footer Magic Bytes
As the last step of the decoding process, the decoder MUST
verify the existence of Footer Magic Bytes. If they don't
match, an error MUST be indicated.
Using a C array and ASCII:
const uint8_t FOOTER_MAGIC[2] = { 'Y', 'Z' };
In hexadecimal:
59 5A
The primary reason to have Footer Magic Bytes is to make
it easier to detect incomplete files quickly, without
uncompressing. If the file does not end with Footer Magic Bytes
(excluding Stream Padding described in Section 2.2), it cannot
be undamaged, unless someone has intentionally appended garbage
after the end of the Stream.
2.2. Stream Padding
Only the decoders that support decoding of concatenated Streams
MUST support Stream Padding.
Stream Padding MUST contain only null bytes. To preserve the
four-byte alignment of consecutive Streams, the size of Stream
Padding MUST be a multiple of four bytes. Empty Stream Padding
is allowed. If these requirements are not met, the decoder MUST
indicate an error.
Note that non-empty Stream Padding is allowed at the end of the
file; there doesn't need to be a new Stream after non-empty
Stream Padding. This can be convenient in certain situations
[GNU-tar].
The possibility of Stream Padding MUST be taken into account
when designing an application that parses Streams backwards,
and the application supports concatenated Streams.
3. Block
+==============+=================+===============+=======+
| Block Header | Compressed Data | Block Padding | Check |
+==============+=================+===============+=======+
3.1. Block Header
+-------------------+-------------+=================+
| Block Header Size | Block Flags | Compressed Size |
+-------------------+-------------+=================+
+===================+======================+
---> | Uncompressed Size | List of Filter Flags |
+===================+======================+
+================+--+--+--+--+
---> | Header Padding | CRC32 |
+================+--+--+--+--+
3.1.1. Block Header Size
This field overlaps with the Index Indicator field (see
Section 4.1).
This field contains the size of the Block Header field,
including the Block Header Size field itself. Valid values are
in the range [0x01, 0xFF], which indicate the size of the Block
Header as multiples of four bytes, minimum size being eight
bytes:
real_header_size = (encoded_header_size + 1) * 4;
If a Block Header bigger than 1024 bytes is needed in the
future, a new field can be added between the Block Header and
Compressed Data fields. The presence of this new field would
be indicated in the Block Header field.
3.1.2. Block Flags
The Block Flags field is a bit field:
Bit(s) Mask Description
0-1 0x03 Number of filters (1-4)
2-5 0x3C Reserved for future use; MUST be zero for now.
6 0x40 The Compressed Size field is present.
7 0x80 The Uncompressed Size field is present.
If any reserved bit is set, the decoder MUST indicate an error.
It is possible that there is a new field present which the
decoder is not aware of, and can thus parse the Block Header
incorrectly.
3.1.3. Compressed Size
This field is present only if the appropriate bit is set in
the Block Flags field (see Section 3.1.2).
The Compressed Size field contains the size of the Compressed
Data field, which MUST be non-zero. Compressed Size is stored
using the encoding described in Section 1.2. If the Compressed
Size doesn't match the size of the Compressed Data field, the
decoder MUST indicate an error.
3.1.4. Uncompressed Size
This field is present only if the appropriate bit is set in
the Block Flags field (see Section 3.1.2).
The Uncompressed Size field contains the size of the Block
after uncompressing. Uncompressed Size is stored using the
encoding described in Section 1.2. If the Uncompressed Size
does not match the real uncompressed size, the decoder MUST
indicate an error.
Storing the Compressed Size and Uncompressed Size fields serves
several purposes:
- The decoder knows how much memory it needs to allocate
for a temporary buffer in multithreaded mode.
- Simple error detection: wrong size indicates a broken file.
- Seeking forwards to a specific location in streamed mode.
It should be noted that the only reliable way to determine
the real uncompressed size is to uncompress the Block,
because the Block Header and Index fields may contain
(intentionally or unintentionally) invalid information.
3.1.5. List of Filter Flags
+================+================+ +================+
| Filter 0 Flags | Filter 1 Flags | ... | Filter n Flags |
+================+================+ +================+
The number of Filter Flags fields is stored in the Block Flags
field (see Section 3.1.2).
The format of each Filter Flags field is as follows:
+===========+====================+===================+
| Filter ID | Size of Properties | Filter Properties |
+===========+====================+===================+
Both Filter ID and Size of Properties are stored using the
encoding described in Section 1.2. Size of Properties indicates
the size of the Filter Properties field as bytes. The list of
officially defined Filter IDs and the formats of their Filter
Properties are described in Section 5.3.
Filter IDs greater than or equal to 0x4000_0000_0000_0000
(2^62) are reserved for implementation-specific internal use.
These Filter IDs MUST never be used in List of Filter Flags.
3.1.6. Header Padding
This field contains as many null byte as it is needed to make
the Block Header have the size specified in Block Header Size.
If any of the bytes are not null bytes, the decoder MUST
indicate an error. It is possible that there is a new field
present which the decoder is not aware of, and can thus parse
the Block Header incorrectly.
3.1.7. CRC32
The CRC32 is calculated over everything in the Block Header
field except the CRC32 field itself. It is stored as an
unsigned 32-bit little endian integer. If the calculated
value does not match the stored one, the decoder MUST indicate
an error.
By verifying the CRC32 of the Block Header before parsing the
actual contents allows the decoder to distinguish between
corrupt and unsupported files.
3.2. Compressed Data
The format of Compressed Data depends on Block Flags and List
of Filter Flags. Excluding the descriptions of the simplest
filters in Section 5.3, the format of the filter-specific
encoded data is out of scope of this document.
3.3. Block Padding
Block Padding MUST contain 0-3 null bytes to make the size of
the Block a multiple of four bytes. This can be needed when
the size of Compressed Data is not a multiple of four. If any
of the bytes in Block Padding are not null bytes, the decoder
MUST indicate an error.
3.4. Check
The type and size of the Check field depends on which bits
are set in the Stream Flags field (see Section 2.1.1.2).
The Check, when used, is calculated from the original
uncompressed data. If the calculated Check does not match the
stored one, the decoder MUST indicate an error. If the selected
type of Check is not supported by the decoder, it SHOULD
indicate a warning or error.
4. Index
+-----------------+===================+
| Index Indicator | Number of Records |
+-----------------+===================+
+=================+===============+-+-+-+-+
---> | List of Records | Index Padding | CRC32 |
+=================+===============+-+-+-+-+
Index serves several purposes. Using it, one can
- verify that all Blocks in a Stream have been processed;
- find out the uncompressed size of a Stream; and
- quickly access the beginning of any Block (random access).
4.1. Index Indicator
This field overlaps with the Block Header Size field (see
Section 3.1.1). The value of Index Indicator is always 0x00.
4.2. Number of Records
This field indicates how many Records there are in the List
of Records field, and thus how many Blocks there are in the
Stream. The value is stored using the encoding described in
Section 1.2. If the decoder has decoded all the Blocks of the
Stream, and then notices that the Number of Records doesn't
match the real number of Blocks, the decoder MUST indicate an
error.
4.3. List of Records
List of Records consists of as many Records as indicated by the
Number of Records field:
+========+========+
| Record | Record | ...
+========+========+
Each Record contains information about one Block:
+===============+===================+
| Unpadded Size | Uncompressed Size |
+===============+===================+
If the decoder has decoded all the Blocks of the Stream, it
MUST verify that the contents of the Records match the real
Unpadded Size and Uncompressed Size of the respective Blocks.
Implementation hint: It is possible to verify the Index with
constant memory usage by calculating for example SHA-256 of
both the real size values and the List of Records, then
comparing the hash values. Implementing this using
non-cryptographic hash like CRC32 SHOULD be avoided unless
small code size is important.
If the decoder supports random-access reading, it MUST verify
that Unpadded Size and Uncompressed Size of every completely
decoded Block match the sizes stored in the Index. If only
partial Block is decoded, the decoder MUST verify that the
processed sizes don't exceed the sizes stored in the Index.
4.3.1. Unpadded Size
This field indicates the size of the Block excluding the Block
Padding field. That is, Unpadded Size is the size of the Block
Header, Compressed Data, and Check fields. Unpadded Size is
stored using the encoding described in Section 1.2. The value
MUST never be zero; with the current structure of Blocks, the
actual minimum value for Unpadded Size is five.
Implementation note: Because the size of the Block Padding
field is not included in Unpadded Size, calculating the total
size of a Stream or doing random-access reading requires
calculating the actual size of the Blocks by rounding Unpadded
Sizes up to the next multiple of four.
The reason to exclude Block Padding from Unpadded Size is to
ease making a raw copy of Compressed Data without Block
Padding. This can be useful, for example, if someone wants
to convert Streams to some other file format quickly.
4.3.2. Uncompressed Size
This field indicates the Uncompressed Size of the respective
Block as bytes. The value is stored using the encoding
described in Section 1.2.
4.4. Index Padding
This field MUST contain 0-3 null bytes to pad the Index to
a multiple of four bytes. If any of the bytes are not null
bytes, the decoder MUST indicate an error.
4.5. CRC32
The CRC32 is calculated over everything in the Index field
except the CRC32 field itself. The CRC32 is stored as an
unsigned 32-bit little endian integer. If the calculated
value does not match the stored one, the decoder MUST indicate
an error.
5. Filter Chains
The Block Flags field defines how many filters are used. When
more than one filter is used, the filters are chained; that is,
the output of one filter is the input of another filter. The
following figure illustrates the direction of data flow.
v Uncompressed Data ^
| Filter 0 |
Encoder | Filter 1 | Decoder
| Filter n |
v Compressed Data ^
5.1. Alignment
Alignment of uncompressed input data is usually the job of
the application producing the data. For example, to get the
best results, an archiver tool should make sure that all
PowerPC executable files in the archive stream start at
offsets that are multiples of four bytes.
Some filters, for example LZMA2, can be configured to take
advantage of specified alignment of input data. Note that
taking advantage of aligned input can be beneficial also when
a filter is not the first filter in the chain. For example,
if you compress PowerPC executables, you may want to use the
PowerPC filter and chain that with the LZMA2 filter. Because
not only the input but also the output alignment of the PowerPC
filter is four bytes, it is now beneficial to set LZMA2
settings so that the LZMA2 encoder can take advantage of its
four-byte-aligned input data.
The output of the last filter in the chain is stored to the
Compressed Data field, which is is guaranteed to be aligned
to a multiple of four bytes relative to the beginning of the
Stream. This can increase
- speed, if the filtered data is handled multiple bytes at
a time by the filter-specific encoder and decoder,
because accessing aligned data in computer memory is
usually faster; and
- compression ratio, if the output data is later compressed
with an external compression tool.
5.2. Security
If filters would be allowed to be chained freely, it would be
possible to create malicious files, that would be very slow to
decode. Such files could be used to create denial of service
attacks.
Slow files could occur when multiple filters are chained:
v Compressed input data
| Filter 1 decoder (last filter)
| Filter 0 decoder (non-last filter)
v Uncompressed output data
The decoder of the last filter in the chain produces a lot of
output from little input. Another filter in the chain takes the
output of the last filter, and produces very little output
while consuming a lot of input. As a result, a lot of data is
moved inside the filter chain, but the filter chain as a whole
gets very little work done.
To prevent this kind of slow files, there are restrictions on
how the filters can be chained. These restrictions MUST be
taken into account when designing new filters.
The maximum number of filters in the chain has been limited to
four, thus there can be at maximum of three non-last filters.
Of these three non-last filters, only two are allowed to change
the size of the data.
The non-last filters, that change the size of the data, MUST
have a limit how much the decoder can compress the data: the
decoder SHOULD produce at least n bytes of output when the
filter is given 2n bytes of input. This limit is not
absolute, but significant deviations MUST be avoided.
The above limitations guarantee that if the last filter in the
chain produces 4n bytes of output, the chain as a whole will
produce at least n bytes of output.
5.3. Filters
5.3.1. LZMA2
LZMA (Lempel-Ziv-Markov chain-Algorithm) is a general-purpose
compression algorithm with high compression ratio and fast
decompression. LZMA is based on LZ77 and range coding
algorithms.
LZMA2 is an extension on top of the original LZMA. LZMA2 uses
LZMA internally, but adds support for flushing the encoder,
uncompressed chunks, eases stateful decoder implementations,
and improves support for multithreading. Thus, the plain LZMA
will not be supported in this file format.
Filter ID: 0x21
Size of Filter Properties: 1 byte
Changes size of data: Yes
Allow as a non-last filter: No
Allow as the last filter: Yes
Preferred alignment:
Input data: Adjustable to 1/2/4/8/16 byte(s)
Output data: 1 byte
The format of the one-byte Filter Properties field is as
follows:
Bits Mask Description
0-5 0x3F Dictionary Size
6-7 0xC0 Reserved for future use; MUST be zero for now.
Dictionary Size is encoded with one-bit mantissa and five-bit
exponent. The smallest dictionary size is 4 KiB and the biggest
is 4 GiB.
Raw value Mantissa Exponent Dictionary size
0 2 11 4 KiB
1 3 11 6 KiB
2 2 12 8 KiB
3 3 12 12 KiB
4 2 13 16 KiB
5 3 13 24 KiB
6 2 14 32 KiB
... ... ... ...
35 3 27 768 MiB
36 2 28 1024 MiB
37 3 29 1536 MiB
38 2 30 2048 MiB
39 3 30 3072 MiB
40 2 31 4096 MiB - 1 B
Instead of having a table in the decoder, the dictionary size
can be decoded using the following C code:
const uint8_t bits = get_dictionary_flags() & 0x3F;
if (bits > 40)
return DICTIONARY_TOO_BIG; // Bigger than 4 GiB
uint32_t dictionary_size;
if (bits == 40) {
dictionary_size = UINT32_MAX;
} else {
dictionary_size = 2 | (bits & 1);
dictionary_size <<= bits / 2 + 11;
}
5.3.2. Branch/Call/Jump Filters for Executables
These filters convert relative branch, call, and jump
instructions to their absolute counterparts in executable
files. This conversion increases redundancy and thus
compression ratio.
Size of Filter Properties: 0 or 4 bytes
Changes size of data: No
Allow as a non-last filter: Yes
Allow as the last filter: No
Below is the list of filters in this category. The alignment
is the same for both input and output data.
Filter ID Alignment Description
0x04 1 byte x86 filter (BCJ)
0x05 4 bytes PowerPC (big endian) filter
0x06 16 bytes IA64 filter
0x07 4 bytes ARM (little endian) filter
0x08 2 bytes ARM Thumb (little endian) filter
0x09 4 bytes SPARC filter
If the size of Filter Properties is four bytes, the Filter
Properties field contains the start offset used for address
conversions. It is stored as an unsigned 32-bit little endian
integer. The start offset MUST be a multiple of the alignment
of the filter as listed in the table above; if it isn't, the
decoder MUST indicate an error. If the size of Filter
Properties is zero, the start offset is zero.
Setting the start offset may be useful if an executable has
multiple sections, and there are many cross-section calls.
Taking advantage of this feature usually requires usage of
the Subblock filter, whose design is not complete yet.
5.3.3. Delta
The Delta filter may increase compression ratio when the value
of the next byte correlates with the value of an earlier byte
at specified distance.
Filter ID: 0x03
Size of Filter Properties: 1 byte
Changes size of data: No
Allow as a non-last filter: Yes
Allow as the last filter: No
Preferred alignment:
Input data: 1 byte
Output data: Same as the original input data
The Properties byte indicates the delta distance, which can be
1-256 bytes backwards from the current byte: 0x00 indicates
distance of 1 byte and 0xFF distance of 256 bytes.
5.3.3.1. Format of the Encoded Output
The code below illustrates both encoding and decoding with
the Delta filter.
// Distance is in the range [1, 256].
const unsigned int distance = get_properties_byte() + 1;
uint8_t pos = 0;
uint8_t delta[256];
memset(delta, 0, sizeof(delta));
while (1) {
const int byte = read_byte();
if (byte == EOF)
break;
uint8_t tmp = delta[(uint8_t)(distance + pos)];
if (is_encoder) {
tmp = (uint8_t)(byte) - tmp;
delta[pos] = (uint8_t)(byte);
} else {
tmp = (uint8_t)(byte) + tmp;
delta[pos] = tmp;
}
write_byte(tmp);
--pos;
}
5.4. Custom Filter IDs
If a developer wants to use custom Filter IDs, he has two
choices. The first choice is to contact Lasse Collin and ask
him to allocate a range of IDs for the developer.
The second choice is to generate a 40-bit random integer,
which the developer can use as his personal Developer ID.
To minimize the risk of collisions, Developer ID has to be
a randomly generated integer, not manually selected "hex word".
The following command, which works on many free operating
systems, can be used to generate Developer ID:
dd if=/dev/urandom bs=5 count=1 | hexdump
The developer can then use his Developer ID to create unique
(well, hopefully unique) Filter IDs.
Bits Mask Description
0-15 0x0000_0000_0000_FFFF Filter ID
16-55 0x00FF_FFFF_FFFF_0000 Developer ID
56-62 0x3F00_0000_0000_0000 Static prefix: 0x3F
The resulting 63-bit integer will use 9 bytes of space when
stored using the encoding described in Section 1.2. To get
a shorter ID, see the beginning of this Section how to
request a custom ID range.
5.4.1. Reserved Custom Filter ID Ranges
Range Description
0x0000_0300 - 0x0000_04FF Reserved to ease .7z compatibility
0x0002_0000 - 0x0007_FFFF Reserved to ease .7z compatibility
0x0200_0000 - 0x07FF_FFFF Reserved to ease .7z compatibility
6. Cyclic Redundancy Checks
There are several incompatible variations to calculate CRC32
and CRC64. For simplicity and clarity, complete examples are
provided to calculate the checks as they are used in this file
format. Implementations MAY use different code as long as it
gives identical results.
The program below reads data from standard input, calculates
the CRC32 and CRC64 values, and prints the calculated values
as big endian hexadecimal strings to standard output.
#include
#include
#include
uint32_t crc32_table[256];
uint64_t crc64_table[256];
void
init(void)
{
static const uint32_t poly32 = UINT32_C(0xEDB88320);
static const uint64_t poly64
= UINT64_C(0xC96C5795D7870F42);
for (size_t i = 0; i < 256; ++i) {
uint32_t crc32 = i;
uint64_t crc64 = i;
for (size_t j = 0; j < 8; ++j) {
if (crc32 & 1)
crc32 = (crc32 >> 1) ^ poly32;
else
crc32 >>= 1;
if (crc64 & 1)
crc64 = (crc64 >> 1) ^ poly64;
else
crc64 >>= 1;
}
crc32_table[i] = crc32;
crc64_table[i] = crc64;
}
}
uint32_t
crc32(const uint8_t *buf, size_t size, uint32_t crc)
{
crc = ~crc;
for (size_t i = 0; i < size; ++i)
crc = crc32_table[buf[i] ^ (crc & 0xFF)]
^ (crc >> 8);
return ~crc;
}
uint64_t
crc64(const uint8_t *buf, size_t size, uint64_t crc)
{
crc = ~crc;
for (size_t i = 0; i < size; ++i)
crc = crc64_table[buf[i] ^ (crc & 0xFF)]
^ (crc >> 8);
return ~crc;
}
int
main()
{
init();
uint32_t value32 = 0;
uint64_t value64 = 0;
uint64_t total_size = 0;
uint8_t buf[8192];
while (1) {
const size_t buf_size
= fread(buf, 1, sizeof(buf), stdin);
if (buf_size == 0)
break;
total_size += buf_size;
value32 = crc32(buf, buf_size, value32);
value64 = crc64(buf, buf_size, value64);
}
printf("Bytes: %" PRIu64 "\n", total_size);
printf("CRC-32: 0x%08" PRIX32 "\n", value32);
printf("CRC-64: 0x%016" PRIX64 "\n", value64);
return 0;
}
7. References
LZMA SDK - The original LZMA implementation
http://7-zip.org/sdk.html
LZMA Utils - LZMA adapted to POSIX-like systems
http://tukaani.org/lzma/
XZ Utils - The next generation of LZMA Utils
http://tukaani.org/xz/
[RFC-1952]
GZIP file format specification version 4.3
http://www.ietf.org/rfc/rfc1952.txt
- Notation of byte boxes in section "2.1. Overall conventions"
[RFC-2119]
Key words for use in RFCs to Indicate Requirement Levels
http://www.ietf.org/rfc/rfc2119.txt
[GNU-tar]
GNU tar 1.21 manual
http://www.gnu.org/software/tar/manual/html_node/Blocking-Factor.html
- Node 9.4.2 "Blocking Factor", paragraph that begins
"gzip will complain about trailing garbage"
- Note that this URL points to the latest version of the
manual, and may some day not contain the note which is in
1.21. For the exact version of the manual, download GNU
tar 1.21: ftp://ftp.gnu.org/pub/gnu/tar/tar-1.21.tar.gz
07070100099698000081a40000000000000000000000015174a52f00000413000000b600010002ffffffffffffffff0000002400000000root/usr/local/share/doc/xz/AUTHORS
Authors of XZ Utils
===================
XZ Utils is developed and maintained by Lasse Collin
.
Major parts of liblzma are based on code written by Igor Pavlov,
specifically the LZMA SDK . Without
this code, XZ Utils wouldn't exist.
The SHA-256 implementation in liblzma is based on the code found from
7-Zip , which has a modified version of the SHA-256
code found from Crypto++ . The SHA-256 code
in Crypto++ was written by Kevin Springle and Wei Dai.
Some scripts have been adapted from gzip. The original versions
were written by Jean-loup Gailly, Charles Levert, and Paul Eggert.
Andrew Dudman helped adapting the scripts and their man pages for
XZ Utils.
The GNU Autotools-based build system contains files from many authors,
which I'm not trying to list here.
Several people have contributed fixes or reported bugs. Most of them
are mentioned in the file THANKS.
07070100099699000081a40000000000000000000000015174a52f00000ad6000000b600010002ffffffffffffffff0000002400000000root/usr/local/share/doc/xz/COPYING
XZ Utils Licensing
==================
Different licenses apply to different files in this package. Here
is a rough summary of which licenses apply to which parts of this
package (but check the individual files to be sure!):
- liblzma is in the public domain.
- xz, xzdec, and lzmadec command line tools are in the public
domain unless GNU getopt_long had to be compiled and linked
in from the lib directory. The getopt_long code is under
GNU LGPLv2.1+.
- The scripts to grep, diff, and view compressed files have been
adapted from gzip. These scripts and their documentation are
under GNU GPLv2+.
- All the documentation in the doc directory and most of the
XZ Utils specific documentation files in other directories
are in the public domain.
- Translated messages are in the public domain.
- The build system contains public domain files, and files that
are under GNU GPLv2+ or GNU GPLv3+. None of these files end up
in the binaries being built.
- Test files and test code in the tests directory, and debugging
utilities in the debug directory are in the public domain.
- The extra directory may contain public domain files, and files
that are under various free software licenses.
You can do whatever you want with the files that have been put into
the public domain. If you find public domain legally problematic,
take the previous sentence as a license grant. If you still find
the lack of copyright legally problematic, you have too many
lawyers.
As usual, this software is provided "as is", without any warranty.
If you copy significant amounts of public domain code from XZ Utils
into your project, acknowledging this somewhere in your software is
polite (especially if it is proprietary, non-free software), but
naturally it is not legally required. Here is an example of a good
notice to put into "about box" or into documentation:
This software includes code from XZ Utils .
The following license texts are included in the following files:
- COPYING.LGPLv2.1: GNU Lesser General Public License version 2.1
- COPYING.GPLv2: GNU General Public License version 2
- COPYING.GPLv3: GNU General Public License version 3
Note that the toolchain (compiler, linker etc.) may add some code
pieces that are copyrighted. Thus, it is possible that e.g. liblzma
binary wouldn't actually be in the public domain in its entirety
even though it contains no copyrighted code from the XZ Utils source
package.
If you have questions, don't hesitate to ask the author(s) for more
information.
0707010009969d000081a40000000000000000000000015174a52f00000733000000b600010002ffffffffffffffff0000002300000000root/usr/local/share/doc/xz/THANKS
Thanks
======
Some people have helped more, some less, but nevertheless everyone's help
has been important. :-) In alphabetical order:
- Mark Adler
- H. Peter Anvin
- Nelson H. F. Beebe
- Karl Berry
- Anders F. Björklund
- Emmanuel Blot
- Martin Blumenstingl
- Jakub Bogusz
- Maarten Bosmans
- Trent W. Buck
- James Buren
- David Burklund
- Daniel Mealha Cabrita
- Milo Casagrande
- Marek Černocký
- Chris Donawa
- Andrew Dudman
- Markus Duft
- İsmail Dönmez
- Robert Elz
- Gilles Espinasse
- Denis Excoffier
- Michael Felt
- Mike Frysinger
- Jason Gorski
- Juan Manuel Guerrero
- Joachim Henke
- Peter Ivanov
- Jouk Jansen
- Per Øyvind Karlsen
- Thomas Klausner
- Richard Koch
- Ville Koskinen
- Stephan Kulow
- Peter Lawler
- Hin-Tak Leung
- Andraž 'ruskie' Levstik
- Cary Lewis
- Wim Lewis
- Lorenzo De Liso
- Bela Lubkin
- Gregory Margo
- Jim Meyering
- Rafał Mużyło
- Adrien Nader
- Hongbo Ni
- Jonathan Nieder
- Andre Noll
- Peter O'Gorman
- Peter Pallinger
- Igor Pavlov
- Diego Elio Pettenò
- Elbert Pol
- Mikko Pouru
- Robert Readman
- Bernhard Reutner-Fischer
- Cristian RodrÃguez
- Christian von Roques
- Jukka Salmi
- Alexandre Sauvé
- Benno Schulenberg
- Andreas Schwab
- Dan Shechter
- Stuart Shelton
- Jonathan Stott
- Dan Stromberg
- Paul Townsend
- Mohammed Adnène Trojette
- Alexey Tourbin
- Patrick J. Volkerding
- Martin Väth
- Christian Weisgerber
- Bert Wesarg
- Ralf Wildenhues
- Charles Wilson
- Lars Wirzenius
- Pilorz Wojciech
- Ryan Young
- Andreas Zieringer
Also thanks to all the people who have participated in the Tukaani project.
I have probably forgot to add some names to the above list. Sorry about
that and thanks for your help.
070701000996a8000081a40000000000000000000000015174a52f000024c1000000b600010002ffffffffffffffff0000002400000000root/usr/local/share/doc/xz/faq.txt
XZ Utils FAQ
============
Q: What do the letters XZ mean?
A: Nothing. They are just two letters, which come from the file format
suffix .xz. The .xz suffix was selected, because it seemed to be
pretty much unused. It has no deeper meaning.
Q: What are LZMA and LZMA2?
A: LZMA stands for Lempel-Ziv-Markov chain-Algorithm. It is the name
of the compression algorithm designed by Igor Pavlov for 7-Zip.
LZMA is based on LZ77 and range encoding.
LZMA2 is an updated version of the original LZMA to fix a couple of
practical issues. In context of XZ Utils, LZMA is called LZMA1 to
emphasize that LZMA is not the same thing as LZMA2. LZMA2 is the
primary compression algorithm in the .xz file format.
Q: There are many LZMA related projects. How does XZ Utils relate to them?
A: 7-Zip and LZMA SDK are the original projects. LZMA SDK is roughly
a subset of the 7-Zip source tree.
p7zip is 7-Zip's command-line tools ported to POSIX-like systems.
LZMA Utils provide a gzip-like lzma tool for POSIX-like systems.
LZMA Utils are based on LZMA SDK. XZ Utils are the successor to
LZMA Utils.
There are several other projects using LZMA. Most are more or less
based on LZMA SDK. See .
Q: Why is liblzma named liblzma if its primary file format is .xz?
Shouldn't it be e.g. libxz?
A: When the designing of the .xz format began, the idea was to replace
the .lzma format and use the same .lzma suffix. It would have been
quite OK to reuse the suffix when there were very few .lzma files
around. However, the old .lzma format became popular before the
new format was finished. The new format was renamed to .xz but the
name of liblzma wasn't changed.
Q: Do XZ Utils support the .7z format?
A: No. Use 7-Zip (Windows) or p7zip (POSIX-like systems) to handle .7z
files.
Q: I have many .tar.7z files. Can I convert them to .tar.xz without
spending hours recompressing the data?
A: In the "extra" directory, there is a script named 7z2lzma.bash which
is able to convert some .7z files to the .lzma format (not .xz). It
needs the 7za (or 7z) command from p7zip. The script may silently
produce corrupt output if certain assumptions are not met, so
decompress the resulting .lzma file and compare it against the
original before deleting the original file!
Q: I have many .lzma files. Can I quickly convert them to the .xz format?
A: For now, no. Since XZ Utils supports the .lzma format, it's usually
not too bad to keep the old files in the old format. If you want to
do the conversion anyway, you need to decompress the .lzma files and
then recompress to the .xz format.
Technically, there is a way to make the conversion relatively fast
(roughly twice the time that normal decompression takes). Writing
such a tool would take quite a bit of time though, and would probably
be useful to only a few people. If you really want such a conversion
tool, contact Lasse Collin and offer some money.
Q: I have installed xz, but my tar doesn't recognize .tar.xz files.
How can I extract .tar.xz files?
A: xz -dc foo.tar.xz | tar xf -
Q: Can I recover parts of a broken .xz file (e.g. a corrupted CD-R)?
A: It may be possible if the file consists of multiple blocks, which
typically is not the case if the file was created in single-threaded
mode. There is no recovery program yet.
Q: Is (some part of) XZ Utils patented?
A: Lasse Collin is not aware of any patents that could affect XZ Utils.
However, due to the nature of software patents, it's not possible to
guarantee that XZ Utils isn't affected by any third party patent(s).
Q: Where can I find documentation about the file format and algorithms?
A: The .xz format is documented in xz-file-format.txt. It is a container
format only, and doesn't include descriptions of any non-trivial
filters.
Documenting LZMA and LZMA2 is planned, but for now, there is no other
documentation than the source code. Before you begin, you should know
the basics of LZ77 and range-coding algorithms. LZMA is based on LZ77,
but LZMA is a lot more complex. Range coding is used to compress
the final bitstream like Huffman coding is used in Deflate.
Q: I cannot find BCJ and BCJ2 filters. Don't they exist in liblzma?
A: BCJ filter is called "x86" in liblzma. BCJ2 is not included,
because it requires using more than one encoded output stream.
A streamable version of BCJ2-style filtering is planned.
Q: I need to use a script that runs "xz -9". On a system with 256 MiB
of RAM, xz says that it cannot allocate memory. Can I make the
script work without modifying it?
A: Set a default memory usage limit for compression. You can do it e.g.
in a shell initialization script such as ~/.bashrc or /etc/profile:
XZ_DEFAULTS=--memlimit-compress=150MiB
export XZ_DEFAULTS
xz will then scale the compression settings down so that the given
memory usage limit is not reached. This way xz shouldn't run out
of memory.
Check also that memory-related resource limits are high enough.
On most systems, "ulimit -a" will show the current resource limits.
Q: How do I create files that can be decompressed with XZ Embedded?
A: See the documentation in XZ Embedded. In short, something like
this is a good start:
xz --check=crc32 --lzma2=preset=6e,dict=64KiB
Or if a BCJ filter is needed too, e.g. if compressing
a kernel image for PowerPC:
xz --check=crc32 --powerpc --lzma2=preset=6e,dict=64KiB
Adjust the dictionary size to get a good compromise between
compression ratio and decompressor memory usage. Note that
in single-call decompression mode of XZ Embedded, a big
dictionary doesn't increase memory usage.
Q: Will xz support threaded compression?
A: It is planned and has been taken into account when designing
the .xz file format. Eventually there will probably be three types
of threading, each method having its own advantages and disadvantages.
The simplest method is splitting the uncompressed data into blocks
and compressing them in parallel independent from each other.
Since the blocks are compressed independently, they can also be
decompressed independently. Together with the index feature in .xz,
this allows using threads to create .xz files for random-access
reading. This also makes threaded decompression possible, although
it is not clear if threaded decompression will ever be implemented.
The independent blocks method has a couple of disadvantages too. It
will compress worse than a single-block method. Often the difference
is not too big (maybe 1-2 %) but sometimes it can be too big. Also,
the memory usage of the compressor increases linearly when adding
threads.
Match finder parallelization is another threading method. It has
been in 7-Zip for ages. It doesn't affect compression ratio or
memory usage significantly. Among the three threading methods, only
this is useful when compressing small files (files that are not
significantly bigger than the dictionary). Unfortunately this method
scales only to about two CPU cores.
The third method is pigz-style threading (I use that name, because
pigz uses that method). It doesn't
affect compression ratio significantly and scales to many cores.
The memory usage scales linearly when threads are added. This isn't
significant with pigz, because Deflate uses only a 32 KiB dictionary,
but with LZMA2 the memory usage will increase dramatically just like
with the independent-blocks method. There is also a constant
computational overhead, which may make pigz-method a bit dull on
dual-core compared to the parallel match finder method, but with more
cores the overhead is not a big deal anymore.
Combining the threading methods will be possible and also useful.
E.g. combining match finder parallelization with pigz-style threading
can cut the memory usage by 50 %.
It is possible that the single-threaded method will be modified to
create files identical to the pigz-style method. We'll see once
pigz-style threading has been implemented in liblzma.
Q: How do I build a program that needs liblzmadec (lzmadec.h)?
A: liblzmadec is part of LZMA Utils. XZ Utils has liblzma, but no
liblzmadec. The code using liblzmadec should be ported to use
liblzma instead. If you cannot or don't want to do that, download
LZMA Utils from .
Q: The default build of liblzma is too big. How can I make it smaller?
A: Give --enable-small to the configure script. Use also appropriate
--enable or --disable options to include only those filter encoders
and decoders and integrity checks that you actually need. Use
CFLAGS=-Os (with GCC) or equivalent to tell your compiler to optimize
for size. See INSTALL for information about configure options.
If the result is still too big, take a look at XZ Embedded. It is
a separate project, which provides a limited but significantly
smaller XZ decoder implementation than XZ Utils. You can find it
at .
070701000996ac000041ed0000000000000000000000075174a54c00000000000000b600010002ffffffffffffffff0000001c00000000root/usr/local/share/locale 070701000996ad000041ed0000000000000000000000035174a54c00000000000000b600010002ffffffffffffffff0000001f00000000root/usr/local/share/locale/cs 070701000996ae000041ed0000000000000000000000025174a54c00000000000000b600010002ffffffffffffffff0000002b00000000root/usr/local/share/locale/cs/LC_MESSAGES 070701000996af000081a40000000000000000000000015174a52f000055b9000000b600010002ffffffffffffffff0000003100000000root/usr/local/share/locale/cs/LC_MESSAGES/xz.mo Þ• … 0 X µ € T d l Ô Ñ Õ 7 § È ß - ¨ F Ö 5 7 G ‚ ó œ ö ® “ H B E ‹ œ Ñ n > ~ 9 ½ • ÷ „ Í ” “ b ½ ö l ´ ! ; U o § Á Û z õ p Š ¤ . ¶ å ø ! " ! D ' f Ž ® Î * í / % H n / € , ° Ý 4 ó (! D! b! z! “! °! Ñ! h ò! <