Ticket #16446: patch-gzip-rsyncable.diff

File patch-gzip-rsyncable.diff, 10.0 KB (added by macports@…, 16 years ago)

Patch for gzip to support --rsyncable flag

  • deflate.c

    diff -u -r ../gzip-1.3.12.orig/deflate.c ./deflate.c
    old new  
    135135#endif
    136136/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
    137137
     138#ifndef RSYNC_WIN
     139#  define RSYNC_WIN 4096
     140#endif
     141/* Size of rsync window, must be < MAX_DIST */
     142
     143#define RSYNC_SUM_MATCH(sum) ((sum) % RSYNC_WIN == 0)
     144/* Whether window sum matches magic value */
     145
    138146/* ===========================================================================
    139147 * Local data used by the "longest match" routines.
    140148 */
     
    216224unsigned near good_match;
    217225/* Use a faster search when the previous match is longer than this */
    218226
     227local ulg rsync_sum;  /* rolling sum of rsync window */
     228local ulg rsync_chunk_end; /* next rsync sequence point */
    219229
    220230/* Values for max_lazy_match, good_match and max_chain_length, depending on
    221231 * the desired pack level (0..9). The values given below have been tuned to
     
    314324#endif
    315325    /* prev will be initialized on the fly */
    316326
     327    /* rsync params */
     328    rsync_chunk_end = 0xFFFFFFFFUL;
     329    rsync_sum = 0;
     330
    317331    /* Set the default configuration parameters:
    318332     */
    319333    max_lazy_match   = configuration_table[pack_level].max_lazy;
     
    550564        memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE);
    551565        match_start -= WSIZE;
    552566        strstart    -= WSIZE; /* we now have strstart >= MAX_DIST: */
     567        if (rsync_chunk_end != 0xFFFFFFFFUL)
     568            rsync_chunk_end -= WSIZE;
    553569
    554570        block_start -= (long) WSIZE;
    555571
     
    577593    }
    578594}
    579595
     596local void rsync_roll(start, num)
     597    unsigned start;
     598    unsigned num;
     599{
     600    unsigned i;
     601
     602    if (start < RSYNC_WIN) {
     603        /* before window fills. */
     604        for (i = start; i < RSYNC_WIN; i++) {
     605            if (i == start + num) return;
     606            rsync_sum += (ulg)window[i];
     607        }
     608        num -= (RSYNC_WIN - start);
     609        start = RSYNC_WIN;
     610    }
     611
     612    /* buffer after window full */
     613    for (i = start; i < start+num; i++) {
     614        /* New character in */
     615        rsync_sum += (ulg)window[i];
     616        /* Old character out */
     617        rsync_sum -= (ulg)window[i - RSYNC_WIN];
     618        if (rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH(rsync_sum))
     619            rsync_chunk_end = i;
     620    }
     621}
     622
     623/* ===========================================================================
     624 * Set rsync_chunk_end if window sum matches magic value.
     625 */
     626#define RSYNC_ROLL(s, n) \
     627   do { if (rsync) rsync_roll((s), (n)); } while(0)
     628
    580629/* ===========================================================================
    581630 * Flush the current block, with given end-of-file flag.
    582631 * IN assertion: strstart is set to the end of the current match.
    583632 */
    584633#define FLUSH_BLOCK(eof) \
    585634   flush_block(block_start >= 0L ? (char*)&window[(unsigned)block_start] : \
    586                 (char*)NULL, (long)strstart - block_start, (eof))
     635                (char*)NULL, (long)strstart - block_start, flush-1, (eof))
    587636
    588637/* ===========================================================================
    589638 * Processes a new input file and return its compressed length. This
     
    594643local off_t deflate_fast()
    595644{
    596645    IPos hash_head; /* head of the hash chain */
    597     int flush;      /* set if current block must be flushed */
     646    int flush;      /* set if current block must be flushed, 2=>and padded */
    598647    unsigned match_length = 0;  /* length of best match */
    599648
    600649    prev_length = MIN_MATCH-1;
     
    624673
    625674            lookahead -= match_length;
    626675
     676            RSYNC_ROLL(strstart, match_length);
    627677            /* Insert new strings in the hash table only if the match length
    628678             * is not too large. This saves time but degrades compression.
    629679             */
     
    652702            /* No match, output a literal byte */
    653703            Tracevv((stderr,"%c",window[strstart]));
    654704            flush = ct_tally (0, window[strstart]);
     705            RSYNC_ROLL(strstart, 1);
    655706            lookahead--;
    656707            strstart++;
    657708        }
     709        if (rsync && strstart > rsync_chunk_end) {
     710            rsync_chunk_end = 0xFFFFFFFFUL;
     711            flush = 2;
     712        }
    658713        if (flush) FLUSH_BLOCK(0), block_start = strstart;
    659714
    660715        /* Make sure that we always have enough lookahead, except
     
    728783             */
    729784            lookahead -= prev_length-1;
    730785            prev_length -= 2;
     786            RSYNC_ROLL(strstart, prev_length+1);
    731787            do {
    732788                strstart++;
    733789                INSERT_STRING(strstart, hash_head);
     
    740796            match_available = 0;
    741797            match_length = MIN_MATCH-1;
    742798            strstart++;
    743             if (flush) FLUSH_BLOCK(0), block_start = strstart;
    744799
     800            if (rsync && strstart > rsync_chunk_end) {
     801                rsync_chunk_end = 0xFFFFFFFFUL;
     802                flush = 2;
     803            }
     804            if (flush) FLUSH_BLOCK(0), block_start = strstart;
    745805        } else if (match_available) {
    746806            /* If there was no match at the previous position, output a
    747807             * single literal. If there was a match but the current match
    748808             * is longer, truncate the previous match to a single literal.
    749809             */
    750810            Tracevv((stderr,"%c",window[strstart-1]));
    751             if (ct_tally (0, window[strstart-1])) {
    752                 FLUSH_BLOCK(0), block_start = strstart;
    753             }
     811            flush = ct_tally (0, window[strstart-1]);
     812            if (rsync && strstart > rsync_chunk_end) {
     813                rsync_chunk_end = 0xFFFFFFFFUL;
     814                flush = 2;
     815            }
     816            if (flush) FLUSH_BLOCK(0), block_start = strstart;
     817            RSYNC_ROLL(strstart, 1);
    754818            strstart++;
    755819            lookahead--;
    756820        } else {
    757821            /* There is no previous match to compare with, wait for
    758822             * the next step to decide.
    759823             */
     824            if (rsync && strstart > rsync_chunk_end) {
     825                /* Reset huffman tree */
     826                rsync_chunk_end = 0xFFFFFFFFUL;
     827                flush = 2;
     828                FLUSH_BLOCK(0), block_start = strstart;
     829            }
    760830            match_available = 1;
     831            RSYNC_ROLL(strstart, 1);
    761832            strstart++;
    762833            lookahead--;
    763834        }
  • doc/gzip.texi

    diff -u -r ../gzip-1.3.12.orig/doc/gzip.texi ./doc/gzip.texi
    old new  
    350350into the directory and compress all the files it finds there (or
    351351decompress them in the case of @command{gunzip}).
    352352
     353@item --rsyncable
     354While compressing, synchronize the output occasionally based on the
     355input.  This reduces compression by about 1 percent most cases, but
     356means that the @command{rsync} program can take advantage of similarities
     357in the uncompressed input when syncronizing two files compressed with
     358this flag.  @command{gunzip} cannot tell the difference between a
     359compressed file created with this option, and one created without it.
     360
    353361@item --suffix @var{suf}
    354362@itemx -S @var{suf}
    355363Use suffix @var{suf} instead of @samp{.gz}.  Any suffix can be
  • gzip.c

    diff -u -r ../gzip-1.3.12.orig/gzip.c ./gzip.c
    old new  
    231231unsigned insize;           /* valid bytes in inbuf */
    232232unsigned inptr;            /* index of next byte to be processed in inbuf */
    233233unsigned outcnt;           /* bytes in output buffer */
     234int rsync = 0;             /* make ryncable chunks */
    234235
    235236struct option longopts[] =
    236237{
     
    260261    {"best",       0, 0, '9'}, /* compress better */
    261262    {"lzw",        0, 0, 'Z'}, /* make output compatible with old compress */
    262263    {"bits",       1, 0, 'b'}, /* max number of bits per code (implies -Z) */
     264    {"rsyncable",  0, 0, 'R'}, /* make rsync-friendly archive */
    263265    { 0, 0, 0, 0 }
    264266};
    265267
     
    341343 "  -Z, --lzw         produce output compatible with old compress",
    342344 "  -b, --bits=BITS   max number of bits per code (implies -Z)",
    343345#endif
     346 "      --rsyncable   Make rsync-friendly archive",
    344347 "",
    345348 "With no FILE, or when FILE is -, read standard input.",
    346349 "",
     
    469472            recursive = 1;
    470473#endif
    471474            break;
     475        case 'R':
     476            rsync = 1; break;
    472477        case 'S':
    473478#ifdef NO_MULTIPLE_DOTS
    474479            if (*optarg == '.') optarg++;
  • gzip.h

    diff -u -r ../gzip-1.3.12.orig/gzip.h ./gzip.h
    old new  
    158158extern unsigned insize; /* valid bytes in inbuf */
    159159extern unsigned inptr;  /* index of next byte to be processed in inbuf */
    160160extern unsigned outcnt; /* bytes in output buffer */
     161extern int rsync;  /* deflate into rsyncable chunks */
    161162
    162163extern off_t bytes_in;   /* number of input bytes */
    163164extern off_t bytes_out;  /* number of output bytes */
     
    306307        /* in trees.c */
    307308void ct_init     OF((ush *attr, int *method));
    308309int  ct_tally    OF((int dist, int lc));
    309 off_t flush_block OF((char *buf, ulg stored_len, int eof));
     310off_t flush_block OF((char *buf, ulg stored_len, int pad, int eof));
    310311
    311312        /* in bits.c */
    312313void     bi_init    OF((file_t zipfile));
  • trees.c

    diff -u -r ../gzip-1.3.12.orig/trees.c ./trees.c
    old new  
    860860 * trees or store, and output the encoded block to the zip file. This function
    861861 * returns the total compressed length for the file so far.
    862862 */
    863 off_t flush_block(buf, stored_len, eof)
     863off_t flush_block(buf, stored_len, pad, eof)
    864864    char *buf;        /* input block, or NULL if too old */
    865865    ulg stored_len;   /* length of input block */
     866    int pad;          /* pad output to byte boundary */
    866867    int eof;          /* true if this is the last block for a file */
    867868{
    868869    ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
     
    955956        Assert (input_len == bytes_in, "bad input size");
    956957        bi_windup();
    957958        compressed_len += 7;  /* align on byte boundary */
     959    } else if (pad && (compressed_len % 8) != 0) {
     960        send_bits((STORED_BLOCK<<1)+eof, 3);  /* send block type */
     961        compressed_len = (compressed_len + 3 + 7) & ~7L;
     962        copy_block(buf, 0, 1); /* with header */
    958963    }
    959964
    960965    return compressed_len >> 3;