diff --git a/LZRW.H b/LZRW.H new file mode 100755 index 0000000..c782e11 --- /dev/null +++ b/LZRW.H @@ -0,0 +1 @@ +LZRW HEADER FILES ================= Author : Ross Williams. Date : 25-Jun-1991. 1. This file contains two header files used by the LZRW series data compression algorithms: compress.h - The header file for the algorithms. port.h - Contains portability definitions. 2. This code is public domain. 3. port.h is included only as an example. Please ensure that your own definitions for these macros are correct. UPDATED ======= Author : Matt Slot, Ambrosia Software. Date : 18-Jan-2007. Added dst_max field, routine now returns -1 instead of writing past buffer. This change is also public domain. /******************************************************************************/ /* */ /* COMPRESS.H */ /* */ /******************************************************************************/ /* */ /* Author : Ross Williams. */ /* Date : December 1989. */ /* */ /* This header file defines the interface to a set of functions called */ /* 'compress', each member of which implements a particular data compression */ /* algorithm. */ /* */ /* Normally in C programming, for each .H file, there is a corresponding .C */ /* file that implements the functions promised in the .H file. */ /* Here, there are many .C files corresponding to this header file. */ /* Each comforming implementation file contains a single function */ /* called 'compress' that implements a single data compression */ /* algorithm that conforms with the interface specified in this header file. */ /* Only one algorithm can be linked in at a time in this organization. */ /* */ /******************************************************************************/ /* */ /* DEFINITION OF FUNCTION COMPRESS */ /* =============================== */ /* */ /* Summary of Function Compress */ /* ---------------------------- */ /* The action that 'compress' takes depends on its first argument called */ /* 'action'. The function provides three actions: */ /* */ /* - Return information about the algorithm. */ /* - Compress a block of memory. */ /* - Decompress a block of memory. */ /* */ /* Parameters */ /* ---------- */ /* See the formal C definition later for a description of the parameters. */ /* */ /* Constants */ /* --------- */ /* COMPRESS_OVERRUN: The constant COMPRESS_OVERRUN defines by how many bytes */ /* an algorithm is allowed to expand a block during a compression operation. */ /* */ /* Although compression algorithms usually compress data, there will always */ /* be data that a given compressor will expand (this can be proven). */ /* Fortunately, the degree of expansion can be limited to a single bit, by */ /* copying over the input data if the data gets bigger during compression. */ /* To allow for this possibility, the first bit of a compressed */ /* representation can be used as a flag indicating whether the */ /* input data was copied over, or truly compressed. In practice, the first */ /* byte would be used to store this bit so as to maintain byte alignment. */ /* */ /* Unfortunately, in general, the only way to tell if an algorithm will */ /* expand a particular block of data is to run the algorithm on the data. */ /* If the algorithm does not continuously monitor how many output bytes it */ /* has written, it might write an output block far larger than the input */ /* block before realizing that it has done so. */ /* On the other hand, continuous checks on output length are inefficient. */ /* */ /* To cater for all these problems, this interface definition: */ /* > Allows a compression algorithm to return an output block that is up to */ /* COMPRESS_OVERRUN bytes longer than the input block. */ /* > Allows a compression algorithm to write up to COMPRESS_OVERRUN bytes */ /* more than the length of the input block to the memory of the output */ /* block regardless of the length of the output block eventually returned. */ /* This allows an algorithm to overrun the length of the input block in the */ /* output block by up to COMPRESS_OVERRUN bytes between expansion checks. */ /* */ /* The problem does not arise for decompression. */ /* */ /* Identity Action */ /* --------------- */ /* > action must be COMPRESS_ACTION_IDENTITY. */ /* > p_dst_len must point to a longword to receive a longword address. */ /* > The value of the other parameters does not matter. */ /* > After execution, the longword that p_dst_len points to will be a pointer */ /* to a structure of type compress_identity. */ /* Thus, for example, after the call, (*p_dst_len)->memory will return the */ /* number of bytes of working memory that the algorithm requires to run. */ /* > The values of the identity structure returned are fixed constant */ /* attributes of the algorithm and must not vary from call to call. */ /* */ /* Common Requirements for Compression and Decompression Actions */ /* ------------------------------------------------------------- */ /* > wrk_mem must point to an unused block of memory of a length specified in */ /* the algorithm's identity block. The identity block can be obtained by */ /* making a separate call to compress, specifying the identity action. */ /* > The INPUT BLOCK is defined to be Memory[src_addr,src_addr+src_len-1]. */ /* > dst_len will be used to denote *p_dst_len. */ /* > dst_len is not read by compress, only written. */ /* > The value of dst_len is defined only upon termination. */ /* > The OUTPUT BLOCK is defined to be Memory[dst_addr,dst_addr+dst_len-1]. */ /* */ /* Compression Action */ /* ------------------ */ /* > action must be COMPRESS_ACTION_COMPRESS. */ /* > src_len must be in the range [0,COMPRESS_MAX_ORG]. */ /* > The OUTPUT ZONE is defined to be */ /* Memory[dst_addr,dst_addr+src_len-1+COMPRESS_OVERRUN]. */ /* > The function can modify any part of the output zone regardless of the */ /* final length of the output block. */ /* > The input block and the output zone must not overlap. */ /* > dst_len will be in the range [0,src_len+COMPRESS_OVERRUN]. */ /* > dst_len will be in the range [0,COMPRESS_MAX_COM] (from prev fact). */ /* > The output block will consist of a representation of the input block. */ /* */ /* Decompression Action */ /* -------------------- */ /* > action must be COMPRESS_ACTION_DECOMPRESS. */ /* > The input block must be the result of an earlier compression operation. */ /* > If the previous fact is true, the following facts must also be true: */ /* > src_len will be in the range [0,COMPRESS_MAX_COM]. */ /* > dst_len will be in the range [0,COMPRESS_MAX_ORG]. */ /* > The input and output blocks must not overlap. */ /* > Only the output block is modified. */ /* > Upon termination, the output block will consist of the bytes contained */ /* in the input block passed to the earlier compression operation. */ /* */ /******************************************************************************/ #include "port.h" #define COMPRESS_ACTION_IDENTITY 0 #define COMPRESS_ACTION_COMPRESS 1 #define COMPRESS_ACTION_DECOMPRESS 2 #define COMPRESS_OVERRUN 1024 #define COMPRESS_MAX_COM 0x70000000 #define COMPRESS_MAX_ORG (COMPRESS_MAX_COM-COMPRESS_OVERRUN) #define COMPRESS_MAX_STRLEN 255 /* The following structure provides information about the algorithm. */ /* > The top bit of id must be zero. The remaining bits must be chosen by */ /* the author of the algorithm by tossing a coin 31 times. */ /* > The amount of memory requested by the algorithm is specified in bytes */ /* and must be in the range [0,0x70000000]. */ /* > All strings s must be such that strlen(s)<=COMPRESS_MAX_STRLEN. */ struct compress_identity { ULONG id; /* Identifying number of algorithm. */ ULONG memory; /* Number of bytes of working memory required. */ char *name; /* Name of algorithm. */ char *version; /* Version number. */ char *date; /* Date of release of this version. */ char *copyright; /* Copyright message. */ char *author; /* Author of algorithm. */ char *affiliation; /* Affiliation of author. */ char *vendor; /* Where the algorithm can be obtained. */ }; int compress( /* Single function interface to compression algorithm. */ UWORD action, /* Action to be performed. */ UBYTE *wrk_mem, /* Working memory temporarily given to routine to use. */ UBYTE *src_adr, /* Address of input data. */ ULONG src_len, /* Length of input data. */ ULONG dst_max, /* Allocated length of output buffer. */ UBYTE *dst_adr, /* Address of output data. */ ULONG *p_dst_len /* Pointer to a longword where routine will write: */ /* If action=..IDENTITY => Adr of id structure. */ /* If action=..COMPRESS => Length of output data. */ /* If action=..DECOMPRESS => Length of output data. */ ); /******************************************************************************/ /* End of COMPRESS.H */ /******************************************************************************/ /******************************************************************************/ /* */ /* PORT.H */ /* */ /******************************************************************************/ /* */ /* This module contains macro definitions and types that are likely to */ /* change between computers. */ /* */ /******************************************************************************/ #ifndef DONE_PORT /* Only do this if not previously done. */ #ifdef THINK_C #define UBYTE unsigned char /* Unsigned byte */ #define UWORD unsigned int /* Unsigned word (2 bytes) */ #define ULONG unsigned long /* Unsigned word (4 bytes) */ #define BOOL unsigned char /* Boolean */ #define FOPEN_BINARY_READ "rb" /* Mode string for binary reading. */ #define FOPEN_BINARY_WRITE "wb" /* Mode string for binary writing. */ #define FOPEN_TEXT_APPEND "a" /* Mode string for text appending. */ #define REAL double /* USed for floating point stuff. */ #endif #define DONE_PORT /* Don't do all this again. */ #define MALLOC_FAIL NULL /* Failure status from malloc() */ #define LOCAL static /* For non-exported routines. */ #define EXPORT /* Signals exported function. */ #define then /* Useful for aligning ifs. */ #endif /******************************************************************************/ /* End of PORT.H */ /******************************************************************************/ \ No newline at end of file diff --git a/LZRW3-A.C b/LZRW3-A.C new file mode 100755 index 0000000..7a14094 --- /dev/null +++ b/LZRW3-A.C @@ -0,0 +1,899 @@ +//#pragma cplusplus off + +/******************************************************************************/ +/* */ +/* LZRW3-A.C */ +/* */ +/******************************************************************************/ +/* */ +/* Author : Ross Williams. */ +/* Date : 15-Jul-1991. */ +/* Release : 1. */ +/* */ +/******************************************************************************/ +/* */ +/* This file contains an implementation of the LZRW3-A data compression */ +/* algorithm in the C programming language. */ +/* */ +/* The LZRW3-A algorithm has the following features: */ +/* */ +/* 1 Requires only 16K of memory (for both compression and decompression). */ +/* 2 The compressor runs about two times faster than Unix compress's. */ +/* 3 The decompressor runs about three times faster than Unix compress's. */ +/* 4 Yields a few percent better compression than Unix compress for */ +/* most files. */ +/* 5 Allows you to dial up extra compression at a speed cost in the */ +/* compressor. The speed of the decompressor is not affected. */ +/* 6 Algorithm is deterministic. */ +/* 7 Algorithm is free of patent problems. The algorithm has not been */ +/* patented (nor will it be) and is of the LZ77 class which is fairly */ +/* clear of patents. */ +/* 8 This implementation in C is in the public domain. */ +/* */ +/* (Timing tests for the speed comparison were performed on a Pyramid 9820.) */ +/* */ +/* LZRW3-A is LZRW3 with a deepened hash table. This simple change yields */ +/* about a 6% (absolute) improvement in compression. */ +/* */ +/* Here are the results of applying this code, compiled under THINK C 4.0 */ +/* and running on a Mac-SE (8MHz 68000), to the standard calgary corpus. */ +/* */ +/* +----------------------------------------------------------------+ */ +/* | DATA COMPRESSION TEST | */ +/* | ===================== | */ +/* | Time of run : Mon 15-Jul-1991 05:29PM | */ +/* | Timing accuracy : One part in 100 | */ +/* | Context length : 262144 bytes (= 256.0000K) | */ +/* | Test suite : Calgary Corpus Suite | */ +/* | Files in suite : 14 | */ +/* | Algorithm : LZRW3-A | */ +/* | Note: All averages are calculated from the un-rounded values. | */ +/* +----------------------------------------------------------------+ */ +/* | File Name Length CxB ComLen %Remn Bits Com K/s Dec K/s | */ +/* | ---------- ------ --- ------ ----- ---- ------- ------- | */ +/* | rpus:Bib.D 111261 1 49044 44.1 3.53 8.47 31.19 | */ +/* | us:Book1.D 768771 3 420464 54.7 4.38 7.27 30.07 | */ +/* | us:Book2.D 610856 3 277955 45.5 3.64 8.51 33.40 | */ +/* | rpus:Geo.D 102400 1 84218 82.2 6.58 4.23 15.04 | */ +/* | pus:News.D 377109 2 192880 51.1 4.09 7.08 25.89 | */ +/* | pus:Obj1.D 21504 1 12651 58.8 4.71 5.23 17.44 | */ +/* | pus:Obj2.D 246814 1 108044 43.8 3.50 8.01 28.11 | */ +/* | s:Paper1.D 53161 1 24526 46.1 3.69 8.11 30.24 | */ +/* | s:Paper2.D 82199 1 39483 48.0 3.84 8.11 32.04 | */ +/* | rpus:Pic.D 513216 2 111622 21.7 1.74 10.64 49.31 | */ +/* | us:Progc.D 39611 1 17923 45.2 3.62 8.06 29.01 | */ +/* | us:Progl.D 71646 1 24362 34.0 2.72 10.74 39.51 | */ +/* | us:Progp.D 49379 1 16805 34.0 2.72 10.64 37.58 | */ +/* | us:Trans.D 93695 1 30296 32.3 2.59 11.02 38.06 | */ +/* +----------------------------------------------------------------+ */ +/* | Average 224401 1 100733 45.8 3.67 8.29 31.21 | */ +/* +----------------------------------------------------------------+ */ +/* */ +/******************************************************************************/ + + /* INCLUDE FILES */ + /* ============= */ +#include "port.h" /* Defines symbols for the non portable stuff. */ +#include "compress.h" /* Defines single exported function "compress". */ +//#include "fast_copy.h" /* Fast memory copy routine. */ + +/******************************************************************************/ + +/* The following structure is returned by the "compress" function below when */ +/* the user asks the function to return identifying information. */ +/* The most important field in the record is the working memory field which */ +/* tells the calling program how much working memory should be passed to */ +/* "compress" when it is called to perform a compression or decompression. */ +/* LZRW3-A uses the same amount of memory during compression and */ +/* decompression. For more information on this structure see "compress.h". */ +/* The alignment fudge below really only needs to be 4 (but I play it safe!). */ +/* The id looks non-random, but it really was generated by coin tossing! */ + +#define U(X) ((ULONG) X) +#define SIZE_P_BYTE (U(sizeof(UBYTE *))) +#define ALIGNMENT_FUDGE (U(16)) + +#define MEM_REQ ( U(4096)*(SIZE_P_BYTE) + ALIGNMENT_FUDGE ) + +static compress_identity identity = +{ + U(0x01B90B91), /* Algorithm identification number. */ + MEM_REQ, /* Working memory (bytes) required. */ + "LZRW3-A", /* Name of algorithm. */ + "1.0 (safe)", /* Version number of algorithm. */ + "15-Jul-1990", /* Date of algorithm. */ + "Public Domain", /* Copyright notice. */ + "Ross N. Williams", /* Author of algorithm. */ + "Renaissance Software", /* Affiliation of author. */ + "Public Domain" /* Vendor of algorithm. */ +}; + +int compress_compress (UBYTE *,UBYTE *,ULONG,ULONG,UBYTE *,ULONG *); +int compress_decompress(UBYTE *,UBYTE *,ULONG,ULONG,UBYTE *,ULONG *); + +/******************************************************************************/ + +/* This function is the only function exported by this module. */ +/* Depending on its first parameter, the function can be requested to */ +/* compress a block of memory, decompress a block of memory, or to identify */ +/* itself. For more information, see the specification file "compress.h". */ + +EXPORT int xcompress( +UWORD action, /* Action to be performed. */ +UBYTE *wrk_mem, /* Address of working memory we can use. */ +UBYTE *src_adr, /* Address of input data. */ +ULONG src_len, /* Length of input data. */ +ULONG dst_max, /* Maximum length of output buffer. */ +UBYTE *dst_adr, /* Address to put output data. */ +ULONG *p_dst_len) /* Address of longword for length of output data. */ +{ + switch (action) + { + case COMPRESS_ACTION_IDENTITY: + if (dst_max && (dst_max < sizeof(ULONG))) + return -1; /* Overflow */ + *p_dst_len=(ULONG) &identity; + return 0; + break; + case COMPRESS_ACTION_COMPRESS: + return compress_compress + (wrk_mem,src_adr,src_len,dst_max,dst_adr,p_dst_len); + break; + case COMPRESS_ACTION_DECOMPRESS: + return compress_decompress + (wrk_mem,src_adr,src_len,dst_max,dst_adr,p_dst_len); + break; + } + return -1; /* Invalid selector */ +} + +/******************************************************************************/ +/* */ +/* BRIEF DESCRIPTION OF THE LZRW3-A ALGORITHM */ +/* ========================================== */ +/* Note: Before attempting to understand this algorithm, you should first */ +/* understand the LZRW3 algorithm from which this algorithm is derived. */ +/* */ +/* The LZRW3-A algorithm is identical to the LZRW3 algorithm except that the */ +/* hash table has been "deepened". The LZRW3 algorithm has a hash table of */ +/* 4096 pointers which point to strings in the buffer. LZRW3-A generalizes */ +/* this to 4096/(2^n) partitions each of which contains (2^n) pointers. */ +/* In LZRW3-A, the hash function hashes to a partition number. */ +/* */ +/* During the processing of each phrase, LZRW3 overwrites the pointer in the */ +/* position selected by the hash function. LZRW3-A overwrites one of the */ +/* pointers in the partition that was selected by the hash function. */ +/* */ +/* When searching for a match, LZRW3-A matches against all (2^n) strings */ +/* pointed to by the pointers in the target partition. */ +/* */ +/* Deep hash tables were used in early versions of LZRW1 in late 1989, but */ +/* were discarded in an effort to increase speed (which was the primary */ +/* requirement for LZRW1). They were revived for use in LZRW3-A in order to */ +/* produce an algorithm with compression performance competitive with Unix */ +/* compress. */ +/* */ +/* Until 14-Jul-1991, deep hash tables used in prototype LZRW* algorithms */ +/* used a queue discipline within each partition. Upon the arrival of a new */ +/* pointer, the pointers in the partition would be block copied back one */ +/* position (with the oldest pointer being overwritten) and the new pointer */ +/* being inserted in the space at the front (the youngest position). */ +/* This meant that pointers to the (2^n) most recent phrases corresponding to */ +/* each hash was kept. The only flaw in this system was the time-consuming */ +/* block copy operation which was cheap for shallow tables but expensive for */ +/* deep tables. */ +/* */ +/* The traditional solution to ring buffer block copy problems is to maintain */ +/* a cyclic counter which points to the "head" of the queue. However, this */ +/* would have required one counter to be stored for each partition and would */ +/* have been slightly messy. After some thought (on 14-Jul-1991) a better */ +/* solution was found. Instead of maintaining a counter for each partition, */ +/* LZRW3-A maintains a single counter for all partitions! This counter is */ +/* maintained in both the compressor and decompressor and means that the */ +/* algorithm (effectively) overwrites a RANDOM element of the partition to be */ +/* updated. The result was to increase the speed of the compressor and */ +/* decompressor, to make the decompressor's speed independent from whatever */ +/* depth was selected, and to impair compression by less than 1% absolute. */ +/* */ +/* Setting the depth is a speed/compression tradeoff. The table below gives */ +/* the tradeoff observed for a typical 50K text file on a Mac-SE. */ +/* Note: %Rem=Percentage Remaining (after compression). */ +/* */ +/* Depth %Rem CmpK/s DecK/s */ +/* 1 45.2 14.77 32.24 */ +/* 2 42.6 12.12 31.26 */ +/* 4 40.9 10.28 31.91 */ +/* 8 40.0 7.81 32.36 */ +/* 16 39.5 5.30 32.47 */ +/* 32 39.0 3.23 32.59 */ +/* */ +/* I have chosen a depth of 8 as the "default" depth for LZRW3-A. If you use */ +/* a depth different to this (e.g. 4), you should use the name LZRW3-A(4) to */ +/* indicate that a different depth is being used. LZRW3-A(8) is an acceptable */ +/* longhand for LZRW3-A. */ +/* */ +/* To change the depth, search for "HERE IT IS" in the rest of this file. */ +/* */ +/* +---+ */ +/* |___|4095 */ +/* |===| */ +/* +---------------------*_|<---+ /----+---\ */ +/* | |___| +---|Hash | */ +/* | 512 partitions |___| |Function| */ +/* | of 8 pointers |===| \--------/ */ +/* | each (or any |___|0 ^ */ +/* | a*b=4096) +---+ | */ +/* | Hash +-----+ */ +/* | Table | */ +/* | --- */ +/* v ^^^ */ +/* +-------------------------------------|----------------+ */ +/* |||||||||||||||||||||||||||||||||||||||||||||||||||||||| */ +/* +-------------------------------------|----------------+ */ +/* | |1......18| | */ +/* |<------- Lempel=History ------------>|<--Ziv-->| | */ +/* | (=bytes already processed) |<-Still to go-->| */ +/* |<-------------------- INPUT BLOCK ------------------->| */ +/* */ +/* */ +/******************************************************************************/ +/* */ +/* DEFINITION OF COMPRESSED FILE FORMAT */ +/* ==================================== */ +/* * A compressed file consists of a COPY FLAG followed by a REMAINDER. */ +/* * The copy flag CF uses up four bytes with the first byte being the */ +/* least significant. */ +/* * If CF=1, then the compressed file represents the remainder of the file */ +/* exactly. Otherwise CF=0 and the remainder of the file consists of zero */ +/* or more GROUPS, each of which represents one or more bytes. */ +/* * Each group consists of two bytes of CONTROL information followed by */ +/* sixteen ITEMs except for the last group which can contain from one */ +/* to sixteen items. */ +/* * An item can be either a LITERAL item or a COPY item. */ +/* * Each item corresponds to a bit in the control bytes. */ +/* * The first control byte corresponds to the first 8 items in the group */ +/* with bit 0 corresponding to the first item in the group and bit 7 to */ +/* the eighth item in the group. */ +/* * The second control byte corresponds to the second 8 items in the group */ +/* with bit 0 corresponding to the ninth item in the group and bit 7 to */ +/* the sixteenth item in the group. */ +/* * A zero bit in a control word means that the corresponding item is a */ +/* literal item. A one bit corresponds to a copy item. */ +/* * A literal item consists of a single byte which represents itself. */ +/* * A copy item consists of two bytes that represent from 3 to 18 bytes. */ +/* * The first byte in a copy item will be denoted C1. */ +/* * The second byte in a copy item will be denoted C2. */ +/* * Bits will be selected using square brackets. */ +/* For example: C1[0..3] is the low nibble of the first control byte. */ +/* of copy item C1. */ +/* * The LENGTH of a copy item is defined to be C1[0..3]+3 which is a number */ +/* in the range [3,18]. */ +/* * The INDEX of a copy item is defined to be C1[4..7]*256+C2[0..8] which */ +/* is a number in the range [0,4095]. */ +/* * A copy item represents the sequence of bytes */ +/* text[POS-OFFSET..POS-OFFSET+LENGTH-1] where */ +/* text is the entire text of the uncompressed string. */ +/* POS is the index in the text of the character following the */ +/* string represented by all the items preceeding the item */ +/* being defined. */ +/* OFFSET is obtained from INDEX by looking up the hash table. */ +/* */ +/******************************************************************************/ + +/* When I first started to get concerned about the portability of my C code, */ +/* I switched over to using only macro defined types UBYTE, UWORD, ULONG and */ +/* one or two others. While, these are useful for most purposes, they impair */ +/* efficiency as, if I have a variable whose range will be [0,1000], I will */ +/* declare it as a UWORD. This will translate into (say) "short int" and */ +/* hence may be less efficient than just an "int" which represents the */ +/* natural size of the machine. Before releasing LZRW3-A, I realized this */ +/* mistake. Unfortunately, I can't access the ftp archive with my portability */ +/* header in it in time for this algorithm's release and so I am including an */ +/* extra definition. The definition UCARD stands for an unsigned (cardinal) */ +/* type that can hold values in the range [0,32767]. This is within the ANSI */ +/* range of a standard int or unsigned. No assumption about overflow of this */ +/* type is made in the code (i.e. all usages are within range and I do not */ +/* use the value -1 to detect the end of loops.). */ +/* You can use either "unsigned" or just "int" here depending on which is */ +/* more efficient in your environment (both the same probably). */ +#define UCARD unsigned + +/* The following #define defines the length of the copy flag that appears at */ +/* the start of the compressed file. The value of four bytes was chosen */ +/* because the fast_copy routine on my Macintosh runs faster if the source */ +/* and destination blocks are relatively longword aligned. */ +/* The actual flag data appears in the first byte. The rest are zeroed so as */ +/* to normalize the compressed representation (i.e. not non-deterministic). */ +#define FLAG_BYTES 4 + +/* The following #defines define the meaning of the values of the copy */ +/* flag at the start of the compressed file. */ +#define FLAG_COMPRESS 0 /* Signals that output was result of compression. */ +#define FLAG_COPY 1 /* Signals that output was simply copied over. */ + +/* The 68000 microprocessor (on which this algorithm was originally developed */ +/* is fussy about non-aligned arrays of words. To avoid these problems the */ +/* following macro can be used to "waste" from 0 to 3 bytes so as to align */ +/* the argument pointer. */ +#define ULONG_ALIGN_UP(X) ((((ULONG)X)+3)&~3) + +/* The following constant defines the maximum length of an uncompressed item. */ +/* This definition must not be changed; its value is hardwired into the code. */ +/* The longest number of bytes that can be spanned by a single item is 18 */ +/* for the longest copy item. */ +#define MAX_RAW_ITEM (18) + +/* The following constant defines the maximum length of an uncompressed group.*/ +/* This definition must not be changed; its value is hardwired into the code. */ +/* A group contains at most 16 items which explains this definition. */ +#define MAX_RAW_GROUP (16*MAX_RAW_ITEM) + +/* The following constant defines the maximum length of a compressed group. */ +/* This definition must not be changed; its value is hardwired into the code. */ +/* A compressed group consists of two control bytes followed by up to 16 */ +/* compressed items each of which can have a maximum length of two bytes. */ +#define MAX_CMP_GROUP (2+16*2) + +/* This constant defines the number of pointers in the hash table. The number */ +/* of partitions multiplied by the number of pointers in each partition must */ +/* multiply out to this value of 4096. In LZRW1, LZRW1-A, and LZRW2, this */ +/* table length value can be changed. However, in LZRW3-A (and LZRW3), the */ +/* table length cannot be changed because it is connected directly to the */ +/* coding scheme which is hardwired (the table index of a single pointer is */ +/* transmitted in the 12-bit index field). So don't change this constant! */ +#define HASH_TABLE_LENGTH (4096) + +/* HERE IT IS: THE PLACE TO CHANGE THE HASH TABLE DEPTH! */ +/* The following definition is the log_2 of the depth of the hash table. This */ +/* constant can be in the range [0,1,2,3,...,12]. Increasing the depth */ +/* increases compression at the expense of speed. However, you are not likely */ +/* to see much of a compression improvement (e.g. not more than 0.5%) above a */ +/* value of 6 and the algorithm will start to get very slow. See the table in */ +/* the earlier comments block for an idea of the trade-off involved. */ +/* Note: The parentheses are to avoid macro substitution funnies. */ +/* Note: The LZRW3-A default is a value of (3). */ +/* Note: If you end up choosing a value of 0, you should use LZRW3 instead. */ +/* Note: Changing the value of HASH_TABLE_DEPTH_BITS is the ONLY thing you */ +/* have to do to change the depth, so go ahead and recompile now! */ +/* Note: I have tested LZRW3-A for DEPTH_BITS=0,1,2,3,4 and a few other */ +/* values. However, I have not tested it for 12 as I can't wait that long! */ +#define HASH_TABLE_DEPTH_BITS (3) /* Must be in range [0,12]. */ + +/* The following definitions are all self-explanatory and follow from the */ +/* definition of HASH_TABLE_DEPTH_BITS and the hardwired requirement that the */ +/* hash table contain exactly 4096 pointers. */ +#define PARTITION_LENGTH_BITS (12-HASH_TABLE_DEPTH_BITS) +#define PARTITION_LENGTH (1<>4) & HASH_MASK) \ + << HASH_TABLE_DEPTH_BITS \ + ) + +/* Another operation that is performed more than once is the updating of the */ +/* hash table. Here two macros are defined to simplify update operations. */ +/* Updating consists of identifying and overwriting a pointer in a partition */ +/* with a newer pointer and then updating the global cycle value. */ +/* These macros accept the new pointer (NEWPTR) and either a pointer to */ +/* (P_BASE) or the index of (I_BASE) the zeroth (first, or base) pointer in */ +/* the partition that is to be updated. The macros use the 'cycle' variable */ +/* to locate and overwrite a pointer and then update the cycle value. */ +/* Note: Hardcoding 'cycle' in this macro is naughty (it should really be a */ +/* macro parameter), but I have done so because it neatens up the code. */ +#define UPDATE_P(P_BASE,NEWPTR) \ +{(P_BASE)[cycle++]=(NEWPTR); cycle&=DEPTH_MASK;} + +#define UPDATE_I(I_BASE,NEWPTR) \ +{hash[(I_BASE)+cycle++]=(NEWPTR); cycle&=DEPTH_MASK;} + +/* This constant supplies a legal (in-range) hash table index for use when */ +/* a legal-but-don't-care index is required. */ +#define ANY_HASH_INDEX (0) + +/******************************************************************************/ + +int compress_compress +( +/* Input : Hand over the required amount of working memory in p_wrk_mem. */ +/* Input : Specify input block using p_src_first and src_len. */ +/* Input : Point p_dst_first to the start of the output zone (OZ). */ +/* Input : Point p_dst_len to a ULONG to receive the output length. */ +/* Input : Input block and output zone must not overlap. */ +/* Input : Maximum length of output buffer. */ +/* Output : Length of output block written to *p_dst_len. */ +/* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May */ +/* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*/ +/* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES. */ +UBYTE *p_wrk_mem, +UBYTE *p_src_first, +ULONG src_len, +ULONG dst_max, +UBYTE *p_dst_first, +ULONG *p_dst_len) +{ + /* p_src and p_dst step through the source and destination blocks. */ + UBYTE *p_src = p_src_first; + UBYTE *p_dst = p_dst_first; + + /* The following variables are never modified and are used in the */ + /* calculations that determine when the main loop terminates. */ + UBYTE *p_src_post = p_src_first+src_len; + UBYTE *p_dst_post = p_dst_first+src_len; + UBYTE *p_src_max1 = p_src_first+src_len-MAX_RAW_ITEM; + UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16; + + /* The variables 'p_control' and 'control' are used to buffer control bits. */ + /* Before each group is processed, the next two bytes of the output block */ + /* are set aside for the control word for the group about to be processed. */ + /* 'p_control' is set to point to the first byte of that word. Meanwhile, */ + /* 'control' buffers the control bits being generated during the processing */ + /* of the group. Instead of having a counter to keep track of how many items */ + /* have been processed (=the number of bits in the control word), at the */ + /* start of each group, the top word of 'control' is filled with 1 bits. */ + /* As 'control' is shifted for each item, the 1 bits in the top word are */ + /* absorbed or destroyed. When they all run out (i.e. when the top word is */ + /* all zero bits, we know that we are at the end of a group. */ + #define TOPWORD 0xFFFF0000 + UBYTE *p_control; + ULONG control=TOPWORD; + + /* The variable 'hash' always points to the first element of the hash table. */ + UBYTE **hash= (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem); + + /* The following two variables represent the literal buffer. p_h1 points to */ + /* the partition (i.e. the zero'th (first) element of the partition) */ + /* corresponding to the youngest literal. p_h2 points to the partition */ + /* corresponding to the second youngest literal. */ + /* The value zero denotes an "empty" buffer value with p_h1=0 => p_h2=0. */ + UBYTE **p_h1=0; + UBYTE **p_h2=0; + + /* The following variable holds the current 'cycle' value. This value cycles */ + /* through the range [0,HASH_TABLE_DEPTH-1], being incremented every time */ + /* the hash table is updated. The value gives the within-partition number of */ + /* the next pointer to be overwritten. The decompressor maintains a cycle */ + /* value in synchrony. */ + UCARD cycle=0; + + /* Validate the output buffer size before starting any compression. */ + if (dst_max && (dst_max < src_len+FLAG_BYTES)) + goto dst_overrun; + + /* To start, we write the flag bytes. Being optimistic, we set the flag to */ + /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the */ + /* algorithm deterministic. */ + *p_dst++=FLAG_COMPRESS; + {UCARD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;} + + /* Reserve the first word of output as the control word for the first group. */ + /* Note: This is undone at the end if the input block is empty. */ + p_control=p_dst; p_dst+=2; + + /* Initialize all elements of the hash table to point to a constant string. */ + /* Use of an unrolled loop speeds this up considerably. */ + /* These variables should really be declared "register", but I am worried */ + /* about the possibility that extra register declarations will tempt stupid */ + /* compilers to allocate all registers before they get to the innermostloop. */ + {UCARD i; UBYTE **p_h=hash; + #define ZH *p_h++=START_STRING_18 + for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */ + {ZH;ZH;ZH;ZH; + ZH;ZH;ZH;ZH; + ZH;ZH;ZH;ZH; + ZH;ZH;ZH;ZH;} + } + + /* The main loop processes either 1 or 16 items per iteration. As its */ + /* termination logic is complicated, I have opted for an infinite loop */ + /* structure containing 'break' and 'goto' statements. */ + while (TRUE) + {/* Begin main processing loop. */ + + /* Note: All the variables here except unroll should be defined within */ + /* the inner loop. Unfortunately the loop hasn't got a block. */ + UBYTE *p_ziv; /* Points to first byte of current Ziv. */ + UCARD unroll; /* Loop counter for unrolled inner loop. */ + UCARD index; /* Index of current partition. */ + UBYTE **p_h0; /* Pointer to current partition. */ + register UCARD d; /* Depth looping variable. */ + register UCARD bestlen; /* Holds the best length seen so far. */ + register UCARD bestpos; /* Holds number of best pointer seen so far. */ + + /* Test for overrun and jump to overrun code if necessary. */ + if (p_dst>p_dst_post) + goto overrun; + + /* The following cascade of if statements efficiently catches and deals */ + /* with varying degrees of closeness to the end of the input block. */ + /* When we get very close to the end, we stop updating the table and */ + /* code the remaining bytes as literals. This makes the code simpler. */ + unroll=16; + if (p_src>p_src_max16) + { + unroll=1; + if (p_src>p_src_max1) + { + if (p_src==p_src_post) + break; + else + {p_h0=&hash[ANY_HASH_INDEX]; /* Avoid undefined pointer. */ + goto literal;} + } + } + + /* This inner unrolled loop processes 'unroll' (whose value is either 1 */ + /* or 16) items. I have chosen to implement this loop with labels and */ + /* gotos to heighten the ease with which the loop may be implemented with */ + /* a single decrement and branch instruction in assembly language and */ + /* also because the labels act as highly readable place markers. */ + /* (Also because we jump into the loop for endgame literals (see above)). */ + + begin_unrolled_loop: + + p_ziv=p_src; + + /* To process the next phrase, we hash the next three bytes to obtain */ + /* an index to the zeroth (first) pointer in a target partition. We */ + /* get the pointer. */ + index=HASH(p_src); + p_h0=&hash[index]; + + /* This next part runs through the pointers in the partition matching */ + /* the bytes they point to in the Lempel with the bytes in the Ziv. */ + /* The length (bestlen) and within-partition pointer number (bestpos) */ + /* of the longest match so far is maintained and is the output of this */ + /* segment of code. The s[bestlen]==... is an optimization only. */ + bestlen=0; + bestpos=0; + for (d=0;dbestlen) + { + bestpos=d; + bestlen=len; + } + } + } + + /* The length of the longest match determines whether we code a */ + /* literal item or a copy item. */ + + if (bestlen<3) + { + /* Literal. */ + + /* Code the literal byte as itself and a zero control bit. */ + literal: *p_dst++=*p_src++; control&=0xFFFEFFFF; + + /* We have just coded a literal. If we had two pending ones, that */ + /* makes three and we can update the hash table. */ + if (p_h2!=0) + {UPDATE_P(p_h2,p_ziv-2);} + + /* In any case, rotate the hash table pointers for next time. */ + p_h2=p_h1; p_h1=p_h0; + + } + else + { + /* Copy */ + + /* To code a copy item, we construct a hash table index of the */ + /* winning pointer (index+=bestpos) and code it and the best length */ + /* into a 2 byte code word. Bump up p_src. */ + index+=bestpos; + *p_dst++=((index&0xF00)>>4)|(bestlen-3); + *p_dst++=index&0xFF; + p_src+=bestlen; + + /* As we have just coded three bytes, we are now in a position to */ + /* update the hash table with the literal bytes that were pending */ + /* upon the arrival of extra context bytes. */ + if (p_h1!=0) + { + if (p_h2!=0) + {UPDATE_P(p_h2,p_ziv-2); p_h2=0;} + UPDATE_P(p_h1,p_ziv-1); p_h1=0; + } + + /* In any case, we can update the hash table based on the current */ + /* position as we just coded at least three bytes in a copy items. */ + UPDATE_P(p_h0,p_ziv); + } + control>>=1; + + /* This loop is all set up for a decrement and jump instruction! */ + end_unrolled_loop: if (--unroll) goto begin_unrolled_loop; + + /* At this point it will nearly always be the end of a group in which */ + /* case, we have to do some control-word processing. However, near the */ + /* end of the input block, the inner unrolled loop is only executed once. */ + /* This necessitates the 'if' test. */ + if ((control&TOPWORD)==0) + { + /* Write the control word to the place we saved for it in the output. */ + *p_control++= control &0xFF; + *p_control = (control>>8) &0xFF; + + /* Reserve the next word in the output block for the control word */ + /* for the group about to be processed. */ + p_control=p_dst; p_dst+=2; + + /* Reset the control bits buffer. */ + control=TOPWORD; + } + + } /* End main processing loop. */ + + /* After the main processing loop has executed, all the input bytes have */ + /* been processed. However, the control word has still to be written to the */ + /* word reserved for it in the output at the start of the most recent group. */ + /* Before writing, the control word has to be shifted so that all the bits */ + /* are in the right place. The "empty" bit positions are filled with 1s */ + /* which partially fill the top word. */ + while(control&TOPWORD) control>>=1; + *p_control++= control &0xFF; + *p_control++=(control>>8) &0xFF; + + /* If the last group contained no items, delete the control word too. */ + if (p_control==p_dst) p_dst-=2; + + /* Write the length of the output block to the dst_len parameter and return. */ + *p_dst_len=p_dst-p_dst_first; + return 0; + + /* Jump here as soon as an overrun is detected. An overrun is defined to */ + /* have occurred if p_dst>p_dst_first+src_len. That is, the moment the */ + /* length of the output written so far exceeds the length of the input block.*/ + /* The algorithm checks for overruns at least at the end of each group */ + /* which means that the maximum overrun is MAX_CMP_GROUP bytes. */ + /* Once an overrun occurs, the only thing to do is to set the copy flag and */ + /* copy the input over. */ + overrun: + *p_dst_first=FLAG_COPY; + fast_copy(p_src_first,p_dst_first+FLAG_BYTES,src_len); + *p_dst_len=src_len+FLAG_BYTES; + return 0; + + /* Jump here if the destination buffer is insufficient to hold the output */ + /* data. We return -1 to indicate an error and set the output length to 0. */ + dst_overrun: + *p_dst_len = 0; + return -1; +} + +/******************************************************************************/ + +int compress_decompress +( +/* Input : Hand over the required amount of working memory in p_wrk_mem. */ +/* Input : Specify input block using p_src_first and src_len. */ +/* Input : Point p_dst_first to the start of the output zone. */ +/* Input : Point p_dst_len to a ULONG to receive the output length. */ +/* Input : Maximum length of output buffer. */ +/* Input : Input block and output zone must not overlap. User knows */ +/* Input : upperbound on output block length from earlier compression. */ +/* Input : In any case, maximum expansion possible is nine times. */ +/* Output : Length of output block written to *p_dst_len. */ +/* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */ +/* Output : Writes only in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. */ +UBYTE *p_wrk_mem, +UBYTE *p_src_first, +ULONG src_len, +ULONG dst_max, +UBYTE *p_dst_first, +ULONG *p_dst_len) +{ + /* Byte pointers p_src and p_dst scan through the input and output blocks. */ + register UBYTE *p_src = p_src_first+FLAG_BYTES; + register UBYTE *p_dst = p_dst_first; + + /* The following two variables are never modified and are used to control */ + /* the main loop. */ + UBYTE *p_src_post = p_src_first+src_len; + UBYTE *p_src_max16 = p_src_first+src_len-(MAX_CMP_GROUP-2); + + /* The hash table is the only resident of the working memory. The hash table */ + /* contains HASH_TABLE_LENGTH=4096 pointers to positions in the history. To */ + /* keep Macintoshes happy, it is longword aligned. */ + UBYTE **hash = (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem); + + /* The variable 'control' is used to buffer the control bits which appear in */ + /* groups of 16 bits (control words) at the start of each compressed group. */ + /* When each group is read, bit 16 of the register is set to one. Whenever */ + /* a new bit is needed, the register is shifted right. When the value of the */ + /* register becomes 1, we know that we have reached the end of a group. */ + /* Initializing the register to 1 thus instructs the code to follow that it */ + /* should read a new control word immediately. */ + register ULONG control=1; + + /* The value of 'literals' is always in the range 0..3. It is the number of */ + /* consecutive literal items just seen. We have to record this number so as */ + /* to know when to update the hash table. When literals gets to 3, there */ + /* have been three consecutive literals and we can update at the position of */ + /* the oldest of the three. */ + register UCARD literals=0; + + /* The following variable holds the current 'cycle' value. This value cycles */ + /* through the range [0,HASH_TABLE_DEPTH-1], being incremented every time */ + /* the hash table is updated. The value give the within-partition number of */ + /* the next pointer to be overwritten. The compressor maintains a cycle */ + /* value in synchrony. */ + UCARD cycle=0; + + /* Check the leading copy flag to see if the compressor chose to use a copy */ + /* operation instead of a compression operation. If a copy operation was */ + /* used, then all we need to do is copy the data over, set the output length */ + /* and return. */ + if (*p_src_first==FLAG_COPY) + { + /* Check for destination buffer for overflow before writing data. */ + if (dst_max && (dst_max < src_len-FLAG_BYTES)) + goto dst_overrun; + + fast_copy(p_src_first+FLAG_BYTES,p_dst_first,src_len-FLAG_BYTES); + *p_dst_len=src_len-FLAG_BYTES; + return 0; + } + + /* Initialize all elements of the hash table to point to a constant string. */ + /* Use of an unrolled loop speeds this up considerably. */ + /* The comment about register declarations above similar code in the */ + /* compressor applies here too. */ + {UCARD i; UBYTE **p_h=hash; + #define ZJ *p_h++=START_STRING_18 + for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */ + {ZJ;ZJ;ZJ;ZJ; + ZJ;ZJ;ZJ;ZJ; + ZJ;ZJ;ZJ;ZJ; + ZJ;ZJ;ZJ;ZJ;} + } + + /* The outer loop processes either 1 or 16 items per iteration depending on */ + /* how close p_src is to the end of the input block. */ + while (p_src!=p_src_post) + {/* Start of outer loop */ + + register UCARD unroll; /* Counts unrolled loop executions. */ + + /* When 'control' has the value 1, it means that the 16 buffered control */ + /* bits that were read in at the start of the current group have all been */ + /* shifted out and that all that is left is the 1 bit that was injected */ + /* into bit 16 at the start of the current group. When we reach the end */ + /* of a group, we have to load a new control word and inject a new 1 bit. */ + if (control==1) + { + control=0x10000|*p_src++; + control|=(*p_src++)<<8; + } + + /* If it is possible that we are within 16 groups from the end of the */ + /* input, execute the unrolled loop only once, else process a whole group */ + /* of 16 items by looping 16 times. */ + unroll= p_src<=p_src_max16 ? 16 : 1; + + /* This inner loop processes one phrase (item) per iteration. */ + while (unroll--) + { /* Begin unrolled inner loop. */ + + /* Process a literal or copy item depending on the next control bit. */ + if (control&1) + { + /* Copy item. */ + + register UBYTE *p; /* Points to place from which to copy. */ + register UCARD lenmt; /* Length of copy item minus three. */ + register UBYTE *p_ziv=p_dst; /* Pointer to start of current Ziv. */ + register UCARD index; /* Index of hash table copy pointer. */ + + /* Read and dismantle the copy word. Work out from where to copy. */ + lenmt=*p_src++; + index=((lenmt&0xF0)<<4)|*p_src++; + p=hash[index]; + lenmt&=0xF; + + /* Check for destination buffer for overflow before writing data. */ + if (dst_max && (dst_max < p_dst-p_dst_first+lenmt+3)) + goto dst_overrun; + + /* Now perform the copy using a half unrolled loop. */ + *p_dst++=*p++; + *p_dst++=*p++; + *p_dst++=*p++; + while (lenmt--) + *p_dst++=*p++; + + /* Because we have just received 3 or more bytes in a copy item */ + /* (whose bytes we have just installed in the output), we are now */ + /* in a position to flush all the pending literal hashings that had */ + /* been postponed for lack of bytes. */ + if (literals>0) + { + register UBYTE *r=p_ziv-literals;; + UPDATE_I(HASH(r),r); + if (literals==2) + {r++; UPDATE_I(HASH(r),r);} + literals=0; + } + + /* In any case, we can immediately update the hash table with the */ + /* current position. We don't need to do a HASH(...) to work out */ + /* where to put the pointer, as the compressor just told us!!! */ + UPDATE_I(index&(~DEPTH_MASK),p_ziv); + } + else + { + /* Literal item. */ + + /* Check for destination buffer for overflow before writing data. */ + if (dst_max && (dst_max < p_dst-p_dst_first+1)) + goto dst_overrun; + + /* Copy over the literal byte. */ + *p_dst++=*p_src++; + + /* If we now have three literals waiting to be hashed into the hash */ + /* table, we can do one of them now (because there are three). */ + if (++literals == 3) + {register UBYTE *p=p_dst-3; + UPDATE_I(HASH(p),p); literals=2;} + } + + /* Shift the control buffer so the next control bit is in bit 0. */ + control>>=1; + + } /* End unrolled inner loop. */ + + } /* End of outer loop */ + + /* Write the length of the decompressed data before returning. */ + *p_dst_len=p_dst-p_dst_first; + return 0; + + /* Jump here if the destination buffer is insufficient to hold the output */ + /* data. We return -1 to indicate an error and set the output length to 0. */ + dst_overrun: + *p_dst_len = 0; + return -1; +} + +/******************************************************************************/ +/* End of LZRW3-A.C */ +/******************************************************************************/ + diff --git a/compress.h b/compress.h new file mode 100755 index 0000000..b9647d9 --- /dev/null +++ b/compress.h @@ -0,0 +1,175 @@ +/******************************************************************************/ +/* */ +/* COMPRESS.H */ +/* */ +/******************************************************************************/ +/* */ +/* Author : Ross Williams. */ +/* Date : December 1989. */ +/* */ +/* This header file defines the interface to a set of functions called */ +/* 'compress', each member of which implements a particular data compression */ +/* algorithm. */ +/* */ +/* Normally in C programming, for each .H file, there is a corresponding .C */ +/* file that implements the functions promised in the .H file. */ +/* Here, there are many .C files corresponding to this header file. */ +/* Each comforming implementation file contains a single function */ +/* called 'compress' that implements a single data compression */ +/* algorithm that conforms with the interface specified in this header file. */ +/* Only one algorithm can be linked in at a time in this organization. */ +/* */ +/******************************************************************************/ +/* */ +/* DEFINITION OF FUNCTION COMPRESS */ +/* =============================== */ +/* */ +/* Summary of Function Compress */ +/* ---------------------------- */ +/* The action that 'compress' takes depends on its first argument called */ +/* 'action'. The function provides three actions: */ +/* */ +/* - Return information about the algorithm. */ +/* - Compress a block of memory. */ +/* - Decompress a block of memory. */ +/* */ +/* Parameters */ +/* ---------- */ +/* See the formal C definition later for a description of the parameters. */ +/* */ +/* Constants */ +/* --------- */ +/* COMPRESS_OVERRUN: The constant COMPRESS_OVERRUN defines by how many bytes */ +/* an algorithm is allowed to expand a block during a compression operation. */ +/* */ +/* Although compression algorithms usually compress data, there will always */ +/* be data that a given compressor will expand (this can be proven). */ +/* Fortunately, the degree of expansion can be limited to a single bit, by */ +/* copying over the input data if the data gets bigger during compression. */ +/* To allow for this possibility, the first bit of a compressed */ +/* representation can be used as a flag indicating whether the */ +/* input data was copied over, or truly compressed. In practice, the first */ +/* byte would be used to store this bit so as to maintain byte alignment. */ +/* */ +/* Unfortunately, in general, the only way to tell if an algorithm will */ +/* expand a particular block of data is to run the algorithm on the data. */ +/* If the algorithm does not continuously monitor how many output bytes it */ +/* has written, it might write an output block far larger than the input */ +/* block before realizing that it has done so. */ +/* On the other hand, continuous checks on output length are inefficient. */ +/* */ +/* To cater for all these problems, this interface definition: */ +/* > Allows a compression algorithm to return an output block that is up to */ +/* COMPRESS_OVERRUN bytes longer than the input block. */ +/* > Allows a compression algorithm to write up to COMPRESS_OVERRUN bytes */ +/* more than the length of the input block to the memory of the output */ +/* block regardless of the length of the output block eventually returned. */ +/* This allows an algorithm to overrun the length of the input block in the */ +/* output block by up to COMPRESS_OVERRUN bytes between expansion checks. */ +/* */ +/* The problem does not arise for decompression. */ +/* */ +/* Identity Action */ +/* --------------- */ +/* > action must be COMPRESS_ACTION_IDENTITY. */ +/* > p_dst_len must point to a longword to receive a longword address. */ +/* > The value of the other parameters does not matter. */ +/* > After execution, the longword that p_dst_len points to will be a pointer */ +/* to a structure of type compress_identity. */ +/* Thus, for example, after the call, (*p_dst_len)->memory will return the */ +/* number of bytes of working memory that the algorithm requires to run. */ +/* > The values of the identity structure returned are fixed constant */ +/* attributes of the algorithm and must not vary from call to call. */ +/* */ +/* Common Requirements for Compression and Decompression Actions */ +/* ------------------------------------------------------------- */ +/* > wrk_mem must point to an unused block of memory of a length specified in */ +/* the algorithm's identity block. The identity block can be obtained by */ +/* making a separate call to compress, specifying the identity action. */ +/* > The INPUT BLOCK is defined to be Memory[src_addr,src_addr+src_len-1]. */ +/* > dst_len will be used to denote *p_dst_len. */ +/* > dst_len is not read by compress, only written. */ +/* > The value of dst_len is defined only upon termination. */ +/* > The OUTPUT BLOCK is defined to be Memory[dst_addr,dst_addr+dst_len-1]. */ +/* */ +/* Compression Action */ +/* ------------------ */ +/* > action must be COMPRESS_ACTION_COMPRESS. */ +/* > src_len must be in the range [0,COMPRESS_MAX_ORG]. */ +/* > The OUTPUT ZONE is defined to be */ +/* Memory[dst_addr,dst_addr+src_len-1+COMPRESS_OVERRUN]. */ +/* > The function can modify any part of the output zone regardless of the */ +/* final length of the output block. */ +/* > The input block and the output zone must not overlap. */ +/* > dst_len will be in the range [0,src_len+COMPRESS_OVERRUN]. */ +/* > dst_len will be in the range [0,COMPRESS_MAX_COM] (from prev fact). */ +/* > The output block will consist of a representation of the input block. */ +/* */ +/* Decompression Action */ +/* -------------------- */ +/* > action must be COMPRESS_ACTION_DECOMPRESS. */ +/* > The input block must be the result of an earlier compression operation. */ +/* > If the previous fact is true, the following facts must also be true: */ +/* > src_len will be in the range [0,COMPRESS_MAX_COM]. */ +/* > dst_len will be in the range [0,COMPRESS_MAX_ORG]. */ +/* > The input and output blocks must not overlap. */ +/* > Only the output block is modified. */ +/* > Upon termination, the output block will consist of the bytes contained */ +/* in the input block passed to the earlier compression operation. */ +/* */ +/******************************************************************************/ + +#include "port.h" + +#define COMPRESS_ACTION_IDENTITY 0 +#define COMPRESS_ACTION_COMPRESS 1 +#define COMPRESS_ACTION_DECOMPRESS 2 + +#define COMPRESS_OVERRUN 1024 +#define COMPRESS_MAX_COM 0x70000000 +#define COMPRESS_MAX_ORG (COMPRESS_MAX_COM-COMPRESS_OVERRUN) + +#define COMPRESS_MAX_STRLEN 255 +//#ifndef __MWERKS__ +extern "C"{ +//#endif +/* The following structure provides information about the algorithm. */ +/* > The top bit of id must be zero. The remaining bits must be chosen by */ +/* the author of the algorithm by tossing a coin 31 times. */ +/* > The amount of memory requested by the algorithm is specified in bytes */ +/* and must be in the range [0,0x70000000]. */ +/* > All strings s must be such that strlen(s)<=COMPRESS_MAX_STRLEN. */ +typedef struct + { + ULONG id; /* Identifying number of algorithm. */ + ULONG memory; /* Number of bytes of working memory required. */ + + char *name; /* Name of algorithm. */ + char *version; /* Version number. */ + char *date; /* Date of release of this version. */ + char *copyright; /* Copyright message. */ + + char *author; /* Author of algorithm. */ + char *affiliation; /* Affiliation of author. */ + char *vendor; /* Where the algorithm can be obtained. */ + }compress_identity; + +int xcompress( /* Single function interface to compression algorithm. */ +UWORD action, /* Action to be performed. */ +UBYTE *wrk_mem, /* Working memory temporarily given to routine to use. */ +UBYTE *src_adr, /* Address of input data. */ +ULONG src_len, /* Length of input data. */ +ULONG dst_max, /* Allocated length of output buffer. */ +UBYTE *dst_adr, /* Address of output data. */ +ULONG *p_dst_len /* Pointer to a longword where routine will write: */ + /* If action=..IDENTITY => Adr of id structure. */ + /* If action=..COMPRESS => Length of output data. */ + /* If action=..DECOMPRESS => Length of output data. */ +); +//#ifndef __MWERKS__ +} +//#endif +/******************************************************************************/ +/* End of COMPRESS.H */ +/******************************************************************************/ + diff --git a/game.xcodeproj/project.pbxproj b/game.xcodeproj/project.pbxproj index 55eb928..a37510d 100644 --- a/game.xcodeproj/project.pbxproj +++ b/game.xcodeproj/project.pbxproj @@ -19,6 +19,7 @@ 7489D6121CB000520063AC48 /* HID_Transaction_Utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = 7489D60C1CB000520063AC48 /* HID_Transaction_Utilities.c */; settings = {ASSET_TAGS = (); }; }; 7489D6131CB000520063AC48 /* HID_Utilities.c in Sources */ = {isa = PBXBuildFile; fileRef = 7489D60D1CB000520063AC48 /* HID_Utilities.c */; settings = {ASSET_TAGS = (); }; }; 7489D61C1CB000F00063AC48 /* ImmrHIDUtilAddOn.c in Sources */ = {isa = PBXBuildFile; fileRef = 7489D61A1CB000F00063AC48 /* ImmrHIDUtilAddOn.c */; settings = {ASSET_TAGS = (); }; }; + 7489D6211CB005370063AC48 /* LZRW3-A.C in Sources */ = {isa = PBXBuildFile; fileRef = 7489D61F1CB005370063AC48 /* LZRW3-A.C */; settings = {ASSET_TAGS = (); }; }; 7F06105A0876ED46001EA95C /* ai.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7F060FE50876ED46001EA95C /* ai.cpp */; }; 7F06105D0876ED46001EA95C /* carphysics.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7F060FEA0876ED46001EA95C /* carphysics.cpp */; }; 7F06105E0876ED46001EA95C /* carselection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7F060FEC0876ED46001EA95C /* carselection.cpp */; }; @@ -69,7 +70,6 @@ 7F0610960876ED46001EA95C /* vectors.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7F0610550876ED46001EA95C /* vectors.cpp */; }; 7F0610970876ED46001EA95C /* writeout.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7F0610570876ED46001EA95C /* writeout.cpp */; }; 7F06138F08770577001EA95C /* interface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7F06138E08770577001EA95C /* interface.cpp */; }; - 7F0613BB08771263001EA95C /* LZRW3-A.C in Sources */ = {isa = PBXBuildFile; fileRef = 7F0613B908771263001EA95C /* LZRW3-A.C */; }; 7F4348C7096D9B5B00C3981C /* GetPID.c in Sources */ = {isa = PBXBuildFile; fileRef = 7F4348C5096D9B5B00C3981C /* GetPID.c */; }; 7F434A4F0973EC9900C3981C /* HID_cookie_strings.plist in Resources */ = {isa = PBXBuildFile; fileRef = 7F434A4C0973EC9900C3981C /* HID_cookie_strings.plist */; }; 7F434A500973EC9900C3981C /* HID_device_usage_strings.plist in Resources */ = {isa = PBXBuildFile; fileRef = 7F434A4D0973EC9900C3981C /* HID_device_usage_strings.plist */; }; @@ -122,6 +122,10 @@ 7489D6191CB000850063AC48 /* HID_Utilities_External.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = HID_Utilities_External.h; sourceTree = ""; }; 7489D61A1CB000F00063AC48 /* ImmrHIDUtilAddOn.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ImmrHIDUtilAddOn.c; sourceTree = ""; }; 7489D61B1CB000F00063AC48 /* ImmrHIDUtilAddOn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ImmrHIDUtilAddOn.h; sourceTree = ""; }; + 7489D61D1CB005370063AC48 /* compress.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compress.h; sourceTree = ""; }; + 7489D61E1CB005370063AC48 /* LZRW.H */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = LZRW.H; sourceTree = ""; }; + 7489D61F1CB005370063AC48 /* LZRW3-A.C */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "LZRW3-A.C"; sourceTree = ""; }; + 7489D6201CB005370063AC48 /* port.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = port.h; sourceTree = ""; }; 7F060FE50876ED46001EA95C /* ai.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = ai.cpp; sourceTree = ""; }; 7F060FEA0876ED46001EA95C /* carphysics.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = carphysics.cpp; sourceTree = ""; }; 7F060FEB0876ED46001EA95C /* carphysics.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = carphysics.h; sourceTree = ""; }; @@ -218,10 +222,6 @@ 7F0610570876ED46001EA95C /* writeout.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = writeout.cpp; sourceTree = ""; }; 7F0610580876ED46001EA95C /* writeout.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = writeout.h; sourceTree = ""; }; 7F06138E08770577001EA95C /* interface.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = interface.cpp; sourceTree = ""; }; - 7F0613B708771262001EA95C /* compress.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = compress.h; path = ../packer/compress.h; sourceTree = SOURCE_ROOT; }; - 7F0613B808771263001EA95C /* LZRW.H */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.h; name = LZRW.H; path = ../packer/LZRW.H; sourceTree = SOURCE_ROOT; }; - 7F0613B908771263001EA95C /* LZRW3-A.C */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = "LZRW3-A.C"; path = "../packer/LZRW3-A.C"; sourceTree = SOURCE_ROOT; }; - 7F0613BA08771263001EA95C /* port.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = port.h; path = ../packer/port.h; sourceTree = SOURCE_ROOT; }; 7F16E74E0D57A68700706C33 /* rt3_redline.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = rt3_redline.h; path = /apps/dev/SDKs/ASW/ASWRegistration/redline/rt3_extras/rt3_redline.h; sourceTree = ""; }; 7F16E75E0D57AA8200706C33 /* librt3_nonag.o */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.objfile"; name = librt3_nonag.o; path = /apps/dev/SDKs/ASW/ASWRegistration/librt3_nonag.o; sourceTree = ""; }; 7F16E7640D57ABCC00706C33 /* ASWRegistrationCarbonAPI.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ASWRegistrationCarbonAPI.h; path = /apps/dev/SDKs/ASW/ASWRegistration/redline/CarbonAPI/ASWRegistrationCarbonAPI.h; sourceTree = ""; }; @@ -501,10 +501,10 @@ 7F0613AC08771244001EA95C /* lzrw */ = { isa = PBXGroup; children = ( - 7F0613B708771262001EA95C /* compress.h */, - 7F0613B808771263001EA95C /* LZRW.H */, - 7F0613B908771263001EA95C /* LZRW3-A.C */, - 7F0613BA08771263001EA95C /* port.h */, + 7489D61D1CB005370063AC48 /* compress.h */, + 7489D61E1CB005370063AC48 /* LZRW.H */, + 7489D61F1CB005370063AC48 /* LZRW3-A.C */, + 7489D6201CB005370063AC48 /* port.h */, ); name = lzrw; sourceTree = ""; @@ -653,6 +653,7 @@ 7F0610850876ED46001EA95C /* random.cpp in Sources */, 7F0610860876ED46001EA95C /* rendercar.cpp in Sources */, 7F0610870876ED46001EA95C /* renderframe.cpp in Sources */, + 7489D6211CB005370063AC48 /* LZRW3-A.C in Sources */, 7F0610880876ED46001EA95C /* roads.cpp in Sources */, 7F0610890876ED46001EA95C /* screen.cpp in Sources */, 7F06108A0876ED46001EA95C /* sky.cpp in Sources */, @@ -668,7 +669,6 @@ 7F0610960876ED46001EA95C /* vectors.cpp in Sources */, 7F0610970876ED46001EA95C /* writeout.cpp in Sources */, 7F06138F08770577001EA95C /* interface.cpp in Sources */, - 7F0613BB08771263001EA95C /* LZRW3-A.C in Sources */, 7FD85B0708BC760A00C3EB17 /* S3Decompression.cpp in Sources */, 7FD85E2308BC9B8400C3EB17 /* fpu_exc.c in Sources */, 7F4348C7096D9B5B00C3981C /* GetPID.c in Sources */, diff --git a/port.h b/port.h new file mode 100755 index 0000000..89d9a4e --- /dev/null +++ b/port.h @@ -0,0 +1,42 @@ +/******************************************************************************/ +/* */ +/* PORT.H */ +/* */ +/******************************************************************************/ +/* */ +/* This module contains macro definitions and types that are likely to */ +/* change between computers. */ +/* */ +/******************************************************************************/ + +#ifndef DONE_PORT /* Only do this if not previously done. */ + +#include + + // #ifdef THINK_C + #define UBYTE unsigned char /* Unsigned byte */ + #define UWORD unsigned int /* Unsigned word (2 bytes) */ + #define ULONG unsigned long /* Unsigned word (4 bytes) */ + #define BOOL unsigned char /* Boolean */ + #define FOPEN_BINARY_READ "rb" /* Mode string for binary reading. */ + #define FOPEN_BINARY_WRITE "wb" /* Mode string for binary writing. */ + #define FOPEN_TEXT_APPEND "a" /* Mode string for text appending. */ + #define REAL double /* USed for floating point stuff. */ + // #endif + + #define DONE_PORT /* Don't do all this again. */ + #define MALLOC_FAIL NULL /* Failure status from malloc() */ + #define LOCAL static /* For non-exported routines. */ + #define EXPORT /* Signals exported function. */ + #define then /* Useful for aligning ifs. */ +#ifndef TRUE + #define TRUE true +#endif + + #define fast_copy(src,dst,len) memcpy(dst,src,len) +#endif + +/******************************************************************************/ +/* End of PORT.H */ +/******************************************************************************/ +