LCOV - code coverage report
Current view: top level - lib/compression - lzxpress.c (source / functions) Hit Total Coverage
Test: coverage report for master 2b515b7d Lines: 191 216 88.4 %
Date: 2024-02-28 12:06:22 Functions: 6 7 85.7 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) Matthieu Suiche 2008
       3             :  *
       4             :  * All rights reserved.
       5             :  *
       6             :  * Redistribution and use in source and binary forms, with or without
       7             :  * modification, are permitted provided that the following conditions
       8             :  * are met:
       9             :  *
      10             :  * 1. Redistributions of source code must retain the above copyright
      11             :  *    notice, this list of conditions and the following disclaimer.
      12             :  *
      13             :  * 2. Redistributions in binary form must reproduce the above copyright
      14             :  *    notice, this list of conditions and the following disclaimer in the
      15             :  *    documentation and/or other materials provided with the distribution.
      16             :  *
      17             :  * 3. Neither the name of the author nor the names of its contributors
      18             :  *    may be used to endorse or promote products derived from this software
      19             :  *    without specific prior written permission.
      20             :  *
      21             :  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
      22             :  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      23             :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      24             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
      25             :  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      26             :  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      27             :  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      28             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      29             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      30             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      31             :  * SUCH DAMAGE.
      32             :  *
      33             :  */
      34             : 
      35             : #include "replace.h"
      36             : #include "lzxpress.h"
      37             : #include "../lib/util/byteorder.h"
      38             : 
      39             : 
      40             : #define __CHECK_BYTES(__size, __index, __needed) do { \
      41             :         if (unlikely(__index >= __size)) { \
      42             :                 return -1; \
      43             :         } else { \
      44             :                 uint32_t __avail = __size - __index; \
      45             :                 if (unlikely(__needed > __avail)) { \
      46             :                         return -1; \
      47             :                 } \
      48             :         } \
      49             : } while(0)
      50             : 
      51             : 
      52             : /*
      53             :  * LZX_PLAIN_COMP_HASH_BITS determines how big the hash table for finding
      54             :  * matches will be.
      55             :  *
      56             :  * The window in which we look for matches is 8192 bytes. That means with
      57             :  * random data a value of 13 is getting close to no collisions, while a 12
      58             :  * will miss about half the possible matches. With compressible data there
      59             :  * will generally be fewer and less diverse entries, so collisions are rarer.
      60             :  *
      61             :  * In the testsuite, bith 12 and 13 give better compression than Windows, but
      62             :  * 12 is faster. 11 does not save time and costs accuracy. Thus we prefer 12.
      63             :  */
      64             : #define LZX_PLAIN_COMP_HASH_BITS 12
      65             : /*
      66             :  * LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS is how far ahead to search in the
      67             :  * circular hash table for a match, before we give up. A bigger number will
      68             :  * generally lead to better but slower compression, but a stupidly big number
      69             :  * will just be worse.
      70             :  */
      71             : #define LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS 5
      72             : #define HASH_MASK ((1 << LZX_PLAIN_COMP_HASH_BITS) - 1)
      73             : 
      74    22635896 : static inline uint16_t three_byte_hash(const uint8_t *bytes)
      75             : {
      76    22635896 :         uint16_t a = bytes[0];
      77    22635896 :         uint16_t b = bytes[1] ^ 0x2e;
      78    22635896 :         uint16_t c = bytes[2] ^ 0x55;
      79    22635896 :         uint16_t ca = c - a;
      80    22635896 :         uint16_t d = ((a + b) << 8) ^ (ca << 5) ^ (c + b) ^ (0xcab + a);
      81    22635896 :         return d & HASH_MASK;
      82             : }
      83             : 
      84             : 
      85    22635896 : static inline void store_match(uint32_t *hash_table,
      86             :                                uint16_t h,
      87             :                                uint32_t offset)
      88             : {
      89    22635896 :         int i;
      90    22635896 :         uint32_t o = hash_table[h];
      91    22635896 :         uint16_t h2;
      92    22635896 :         uint16_t worst_h;
      93    22635896 :         int worst_score;
      94             : 
      95    22635896 :         if (o >= offset) {
      96             :                 /* there is nothing there yet */
      97       75989 :                 hash_table[h] = offset;
      98       75989 :                 return;
      99             :         }
     100   112598497 :         for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
     101    90108091 :                 h2 = (h + i) & HASH_MASK;
     102    90108091 :                 if (hash_table[h2] >= offset) {
     103       69501 :                         hash_table[h2] = offset;
     104       69501 :                         return;
     105             :                 }
     106             :         }
     107             :         /*
     108             :          * There are no slots, but we really want to store this, so we'll kick
     109             :          * out the one with the longest distance.
     110             :          */
     111    22490406 :         worst_h = h;
     112    22490406 :         worst_score = offset - o;
     113   112452030 :         for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
     114    89961624 :                 int score;
     115    89961624 :                 h2 = (h + i) & HASH_MASK;
     116    89961624 :                 o = hash_table[h2];
     117    89961624 :                 score = offset - o;
     118    89961624 :                 if (score > worst_score) {
     119    29063751 :                         worst_score = score;
     120    29063751 :                         worst_h = h2;
     121             :                 }
     122             :         }
     123    22490406 :         hash_table[worst_h] = offset;
     124             : }
     125             : 
     126             : 
     127             : struct match {
     128             :         const uint8_t *there;
     129             :         uint32_t length;
     130             : };
     131             : 
     132             : 
     133    22635896 : static inline struct match lookup_match(uint32_t *hash_table,
     134             :                                         uint16_t h,
     135             :                                         const uint8_t *data,
     136             :                                         uint32_t offset,
     137             :                                         size_t max_len)
     138             : {
     139    22635896 :         int i;
     140    22635896 :         uint32_t o;
     141    22635896 :         uint16_t h2;
     142    22635896 :         size_t len;
     143    22635896 :         const uint8_t *there = NULL;
     144    22635896 :         const uint8_t *here = data + offset;
     145    22635896 :         struct match best = {0};
     146             : 
     147   135234393 :         for (i = 0; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
     148   112743987 :                 h2 = (h + i) & HASH_MASK;
     149   112743987 :                 o = hash_table[h2];
     150   112743987 :                 if (o >= offset) {
     151             :                         /*
     152             :                          * Either this is 0xffffffff, or something is really
     153             :                          * wrong.
     154             :                          *
     155             :                          * In setting this, we would never have stepped over
     156             :                          * an 0xffffffff, so we won't now.
     157             :                          */
     158           0 :                         break;
     159             :                 }
     160   112598497 :                 if (offset - o > 8192) {
     161             :                         /* Too far away to use */
     162     4184082 :                         continue;
     163             :                 }
     164   108414415 :                 there = data + o;
     165             :                 /*
     166             :                  * When we already have a long match, we can try to avoid
     167             :                  * measuring out another long, but shorter match.
     168             :                  */
     169   108414415 :                 if (best.length > 1000 &&
     170         345 :                     there[best.length - 1] != best.there[best.length - 1]) {
     171         153 :                         continue;
     172             :                 }
     173             : 
     174           0 :                 for (len = 0;
     175   199241968 :                      len < max_len && here[len] == there[len];
     176    90827706 :                      len++) {
     177             :                         /* counting */
     178    90827706 :                 }
     179   108414262 :                 if (len > 2) {
     180     3976197 :                         if (len > best.length) {
     181     2019135 :                                 best.length = len;
     182     2019135 :                                 best.there = there;
     183             :                         }
     184             :                 }
     185             :         }
     186    22635896 :         return best;
     187             : }
     188             : 
     189             : struct write_context {
     190             :         uint8_t *compressed;
     191             :         uint32_t compressed_pos;
     192             :         uint32_t max_compressed_size;
     193             :         uint32_t indic;
     194             :         uint32_t indic_bit;
     195             :         uint32_t indic_pos;
     196             :         uint32_t nibble_index;
     197             : };
     198             : 
     199             : 
     200             : #define CHECK_INPUT_BYTES(__needed) \
     201             :         __CHECK_BYTES(uncompressed_size, uncompressed_pos, __needed)
     202             : #define CHECK_OUTPUT_BYTES(__needed) \
     203             :         __CHECK_BYTES(wc->max_compressed_size, wc->compressed_pos, __needed)
     204             : 
     205             : 
     206    22635950 : static inline ssize_t push_indicator_bit(struct write_context *wc, uint32_t bit)
     207             : {
     208    22635950 :         wc->indic = (wc->indic << 1) | bit;
     209    22635950 :         wc->indic_bit += 1;
     210             : 
     211    22635950 :         if (wc->indic_bit == 32) {
     212      707349 :                 PUSH_LE_U32(wc->compressed, wc->indic_pos, wc->indic);
     213      707349 :                 wc->indic_bit = 0;
     214      707349 :                 CHECK_OUTPUT_BYTES(sizeof(uint32_t));
     215      707349 :                 wc->indic_pos = wc->compressed_pos;
     216      707349 :                 wc->compressed_pos += sizeof(uint32_t);
     217             :         }
     218    22635950 :         return wc->indic_pos;
     219             : }
     220             : 
     221             : 
     222     1709716 : static ssize_t encode_match(struct write_context *wc,
     223             :                             struct match match,
     224             :                             const uint8_t *here)
     225             : {
     226     1709716 :         uint32_t match_len = match.length - 3;
     227     1709716 :         uint32_t best_offset = here - match.there - 1;
     228     1709716 :         uint16_t metadata;
     229             : 
     230     1709716 :         if (best_offset > 8191) {
     231           0 :                 return -1;
     232             :         }
     233             : 
     234     1709716 :         CHECK_OUTPUT_BYTES(sizeof(uint16_t));
     235     1709716 :         metadata = (uint16_t)((best_offset << 3) | MIN(match_len, 7));
     236     1709716 :         PUSH_LE_U16(wc->compressed, wc->compressed_pos, metadata);
     237     1709716 :         wc->compressed_pos += sizeof(uint16_t);
     238             : 
     239     1709716 :         if (match_len >= 7) {
     240       60016 :                 match_len -= 7;
     241             : 
     242       60016 :                 if (wc->nibble_index == 0) {
     243       30026 :                         wc->nibble_index = wc->compressed_pos;
     244             : 
     245       30026 :                         CHECK_OUTPUT_BYTES(sizeof(uint8_t));
     246       30026 :                         wc->compressed[wc->nibble_index] = MIN(match_len, 15);
     247       30026 :                         wc->compressed_pos += sizeof(uint8_t);
     248             :                 } else {
     249       29990 :                         wc->compressed[wc->nibble_index] |= MIN(match_len, 15) << 4;
     250       29990 :                         wc->nibble_index = 0;
     251             :                 }
     252             : 
     253       60016 :                 if (match_len >= 15) {
     254        5331 :                         match_len -= 15;
     255             : 
     256        5331 :                         CHECK_OUTPUT_BYTES(sizeof(uint8_t));
     257        5331 :                         wc->compressed[wc->compressed_pos] = MIN(match_len, 255);
     258        5331 :                         wc->compressed_pos += sizeof(uint8_t);
     259             : 
     260        5331 :                         if (match_len >= 255) {
     261             :                                 /* Additional match_len */
     262             : 
     263        2240 :                                 match_len += 7 + 15;
     264             : 
     265        2240 :                                 if (match_len < (1 << 16)) {
     266        2240 :                                         CHECK_OUTPUT_BYTES(sizeof(uint16_t));
     267        2240 :                                         PUSH_LE_U16(wc->compressed, wc->compressed_pos,
     268             :                                                     match_len);
     269        2240 :                                         wc->compressed_pos += sizeof(uint16_t);
     270             :                                 } else {
     271           0 :                                         CHECK_OUTPUT_BYTES(sizeof(uint16_t) +
     272             :                                                            sizeof(uint32_t));
     273           0 :                                         PUSH_LE_U16(wc->compressed,
     274             :                                                     wc->compressed_pos, 0);
     275           0 :                                         wc->compressed_pos += sizeof(uint16_t);
     276             : 
     277           0 :                                         PUSH_LE_U32(wc->compressed,
     278             :                                                     wc->compressed_pos,
     279             :                                                     match_len);
     280           0 :                                         wc->compressed_pos += sizeof(uint32_t);
     281             :                                 }
     282             :                         }
     283             :                 }
     284             :         }
     285     1709716 :         return push_indicator_bit(wc, 1);
     286             : }
     287             : 
     288             : #undef CHECK_OUTPUT_BYTES
     289             : #define CHECK_OUTPUT_BYTES(__needed) \
     290             :         __CHECK_BYTES(wc.max_compressed_size, wc.compressed_pos, __needed)
     291             : 
     292             : 
     293          74 : ssize_t lzxpress_compress(const uint8_t *uncompressed,
     294             :                           uint32_t uncompressed_size,
     295             :                           uint8_t *compressed,
     296             :                           uint32_t max_compressed_size)
     297             : {
     298             :         /*
     299             :          * This is the algorithm in [MS-XCA] 2.3 "Plain LZ77 Compression".
     300             :          *
     301             :          * It avoids Huffman encoding by including literal bytes inline when a
     302             :          * match is not found. Every so often it includes a uint32 bit map
     303             :          * flagging which positions contain matches and which contain
     304             :          * literals. The encoding of matches is of variable size, depending on
     305             :          * the match length; they are always at least 16 bits long, and can
     306             :          * implicitly use unused half-bytes from earlier in the stream.
     307             :          */
     308          74 :         ssize_t ret;
     309          74 :         uint32_t uncompressed_pos;
     310          74 :         struct write_context wc = {
     311             :                 .indic = 0,
     312             :                 .indic_pos = 0,
     313             :                 .indic_bit = 0,
     314             :                 .nibble_index = 0,
     315             :                 .compressed = compressed,
     316             :                 .compressed_pos = 0,
     317             :                 .max_compressed_size = max_compressed_size
     318             :         };
     319          74 :         uint32_t hash_table[1 << LZX_PLAIN_COMP_HASH_BITS];
     320          74 :         memset(hash_table, 0xff, sizeof(hash_table));
     321             : 
     322          74 :         if (!uncompressed_size) {
     323           0 :                 return 0;
     324             :         }
     325             : 
     326          72 :         uncompressed_pos = 0;
     327          72 :         CHECK_OUTPUT_BYTES(sizeof(uint32_t));
     328          72 :         PUSH_LE_U32(wc.compressed, wc.compressed_pos, 0);
     329          72 :         wc.compressed_pos += sizeof(uint32_t);
     330             : 
     331    22636022 :         while ((uncompressed_pos < uncompressed_size) &&
     332    22635950 :                (wc.compressed_pos < wc.max_compressed_size)) {
     333             : 
     334             :                 /* maximum len we can encode into metadata */
     335    22635950 :                 const uint32_t max_len = MIN(0xFFFF + 3,
     336             :                                              uncompressed_size - uncompressed_pos);
     337    22635950 :                 const uint8_t *here = uncompressed + uncompressed_pos;
     338    22635950 :                 uint16_t h;
     339    22635950 :                 struct match match = {0};
     340             : 
     341    22635950 :                 if (max_len >= 3) {
     342    22635896 :                         h = three_byte_hash(here);
     343    22635896 :                         match = lookup_match(hash_table,
     344             :                                              h,
     345             :                                              uncompressed,
     346             :                                              uncompressed_pos,
     347             :                                              max_len);
     348             : 
     349    22635896 :                         store_match(hash_table, h, uncompressed_pos);
     350             :                 } else {
     351           0 :                         match.there = NULL;
     352           0 :                         match.length = 0;
     353             :                 }
     354             : 
     355    22635950 :                 if (match.there == NULL) {
     356             :                         /*
     357             :                          * This is going to be a literal byte, which we flag
     358             :                          * by setting a bit in an indicator field somewhere
     359             :                          * earlier in the stream.
     360             :                          */
     361           0 :                         CHECK_INPUT_BYTES(sizeof(uint8_t));
     362    20926234 :                         CHECK_OUTPUT_BYTES(sizeof(uint8_t));
     363    20926234 :                         wc.compressed[wc.compressed_pos++] = *here;
     364    20926234 :                         uncompressed_pos++;
     365             : 
     366    20926234 :                         ret = push_indicator_bit(&wc, 0);
     367    20926234 :                         if (ret < 0) {
     368           0 :                                 return ret;
     369             :                         }
     370             :                 } else {
     371     1709716 :                         ret = encode_match(&wc, match, here);
     372     1709716 :                         if (ret < 0) {
     373           0 :                                 return ret;
     374             :                         }
     375     1709716 :                         uncompressed_pos += match.length;
     376             :                 }
     377             :         }
     378             : 
     379          72 :         if (wc.indic_bit != 0) {
     380          66 :                 wc.indic <<= 32 - wc.indic_bit;
     381             :         }
     382          72 :         wc.indic |= UINT32_MAX >> wc.indic_bit;
     383          72 :         PUSH_LE_U32(wc.compressed, wc.indic_pos, wc.indic);
     384             : 
     385          72 :         return wc.compressed_pos;
     386             : }
     387             : 
     388         157 : ssize_t lzxpress_decompress(const uint8_t *input,
     389             :                             uint32_t input_size,
     390             :                             uint8_t *output,
     391             :                             uint32_t max_output_size)
     392             : {
     393             :         /*
     394             :          * This is the algorithm in [MS-XCA] 2.4 "Plain LZ77 Decompression
     395             :          * Algorithm Details".
     396             :          */
     397         157 :         uint32_t output_index, input_index;
     398         157 :         uint32_t indicator, indicator_bit;
     399         157 :         uint32_t nibble_index;
     400             : 
     401         157 :         if (input_size == 0) {
     402           0 :                 return 0;
     403             :         }
     404             : 
     405           0 :         output_index = 0;
     406           0 :         input_index = 0;
     407           0 :         indicator = 0;
     408           0 :         indicator_bit = 0;
     409           0 :         nibble_index = 0;
     410             : 
     411             : #undef CHECK_INPUT_BYTES
     412             : #define CHECK_INPUT_BYTES(__needed) \
     413             :         __CHECK_BYTES(input_size, input_index, __needed)
     414             : #undef CHECK_OUTPUT_BYTES
     415             : #define CHECK_OUTPUT_BYTES(__needed) \
     416             :         __CHECK_BYTES(max_output_size, output_index, __needed)
     417             : 
     418    24880070 :         do {
     419    24880070 :                 if (indicator_bit == 0) {
     420      777595 :                         CHECK_INPUT_BYTES(sizeof(uint32_t));
     421      777595 :                         indicator = PULL_LE_U32(input, input_index);
     422      777595 :                         input_index += sizeof(uint32_t);
     423      777595 :                         if (input_index == input_size) {
     424             :                                 /*
     425             :                                  * The compressor left room for indicator
     426             :                                  * flags for data that doesn't exist.
     427             :                                  */
     428           0 :                                 break;
     429             :                         }
     430           0 :                         indicator_bit = 32;
     431             :                 }
     432    24880068 :                 indicator_bit--;
     433             : 
     434             :                 /*
     435             :                  * check whether the bit specified by indicator_bit is set or not
     436             :                  * set in indicator. For example, if indicator_bit has value 4
     437             :                  * check whether the 4th bit of the value in indicator is set
     438             :                  */
     439    24880068 :                 if (((indicator >> indicator_bit) & 1) == 0) {
     440    22805507 :                         CHECK_INPUT_BYTES(sizeof(uint8_t));
     441    22805507 :                         CHECK_OUTPUT_BYTES(sizeof(uint8_t));
     442    22805507 :                         output[output_index] = input[input_index];
     443    22805507 :                         input_index += sizeof(uint8_t);
     444    22805507 :                         output_index += sizeof(uint8_t);
     445             :                 } else {
     446     2074561 :                         uint32_t length;
     447     2074561 :                         uint32_t offset;
     448             : 
     449     2074561 :                         CHECK_INPUT_BYTES(sizeof(uint16_t));
     450     2074561 :                         length = PULL_LE_U16(input, input_index);
     451     2074561 :                         input_index += sizeof(uint16_t);
     452     2074561 :                         offset = (length >> 3) + 1;
     453     2074561 :                         length &= 7;
     454             : 
     455     2074561 :                         if (length == 7) {
     456       80507 :                                 if (nibble_index == 0) {
     457       40297 :                                         CHECK_INPUT_BYTES(sizeof(uint8_t));
     458       40297 :                                         nibble_index = input_index;
     459       40297 :                                         length = input[input_index] & 0xf;
     460       40297 :                                         input_index += sizeof(uint8_t);
     461             :                                 } else {
     462       40210 :                                         length = input[nibble_index] >> 4;
     463       40210 :                                         nibble_index = 0;
     464             :                                 }
     465             : 
     466       80507 :                                 if (length == 15) {
     467        9057 :                                         CHECK_INPUT_BYTES(sizeof(uint8_t));
     468        9057 :                                         length = input[input_index];
     469        9057 :                                         input_index += sizeof(uint8_t);
     470        9057 :                                         if (length == 255) {
     471        2754 :                                                 CHECK_INPUT_BYTES(sizeof(uint16_t));
     472        2754 :                                                 length = PULL_LE_U16(input, input_index);
     473        2754 :                                                 input_index += sizeof(uint16_t);
     474        2754 :                                                 if (length == 0) {
     475           4 :                                                         CHECK_INPUT_BYTES(sizeof(uint32_t));
     476           4 :                                                         length = PULL_LE_U32(input, input_index);
     477           4 :                                                         input_index += sizeof(uint32_t);
     478             :                                                 }
     479             : 
     480        2754 :                                                 if (length < (15 + 7)) {
     481           0 :                                                         return -1;
     482             :                                                 }
     483        2754 :                                                 length -= (15 + 7);
     484             :                                         }
     485        9057 :                                         length += 15;
     486             :                                 }
     487       80507 :                                 length += 7;
     488             :                         }
     489     2074561 :                         length += 3;
     490             : 
     491     2074561 :                         if (length == 0) {
     492           0 :                                 return -1;
     493             :                         }
     494             : 
     495    67912219 :                         for (; length > 0; --length) {
     496    65837660 :                                 if (offset > output_index) {
     497           0 :                                         return -1;
     498             :                                 }
     499    65837660 :                                 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
     500    65837658 :                                 output[output_index] = output[output_index - offset];
     501    65837658 :                                 output_index += sizeof(uint8_t);
     502             :                         }
     503             :                 }
     504    24880066 :         } while ((output_index < max_output_size) && (input_index < (input_size)));
     505             : 
     506         153 :         return output_index;
     507             : }

Generated by: LCOV version 1.14