1 /* ------------------------------------------------------------------------ */
3 /* slide.c -- sliding dictionary with percolating update */
5 /* Modified Nobutaka Watazaki */
7 /* Ver. 1.14d Exchanging a search algorithm 1997.01.11 T.Okamoto */
8 /* ------------------------------------------------------------------------ */
18 static int noslide = 1;
21 /* variables for hash */
24 int too_flag; /* if 1, matching candidate is too many */
26 static unsigned int *prev; /* previous posiion associated with hash */
28 /* hash function: it represents 3 letters from `pos' on `text' */
29 #define INIT_HASH(pos) \
30 ((( (text[(pos)] << 5) \
31 ^ text[(pos) + 1] ) << 5) \
32 ^ text[(pos) + 2] ) & (unsigned)(HSHSIZ - 1);
33 #define NEXT_HASH(hash,pos) \
35 ^ text[(pos) + 2] ) & (unsigned)(HSHSIZ - 1);
37 static struct encode_option encode_define[2] = {
38 #if defined(__STDC__) || defined(AIX)
40 {(void (*) ()) output_dyn,
41 (void (*) ()) encode_start_fix,
42 (void (*) ()) encode_end_dyn},
44 {(void (*) ()) output_st1,
45 (void (*) ()) encode_start_st1,
46 (void (*) ()) encode_end_st1}
49 {(int (*) ()) output_dyn,
50 (int (*) ()) encode_start_fix,
51 (int (*) ()) encode_end_dyn},
53 {(int (*) ()) output_st1,
54 (int (*) ()) encode_start_st1,
55 (int (*) ()) encode_end_st1}
59 static struct decode_option decode_define[] = {
61 {decode_c_dyn, decode_p_st0, decode_start_fix},
63 {decode_c_dyn, decode_p_dyn, decode_start_dyn},
65 {decode_c_st0, decode_p_st0, decode_start_st0},
67 {decode_c_st1, decode_p_st1, decode_start_st1},
69 {decode_c_st1, decode_p_st1, decode_start_st1},
71 {decode_c_st1, decode_p_st1, decode_start_st1},
73 {decode_c_st1, decode_p_st1, decode_start_st1},
75 {decode_c_lzs, decode_p_lzs, decode_start_lzs},
77 {decode_c_lz5, decode_p_lz5, decode_start_lz5}
80 static struct encode_option encode_set;
81 static struct decode_option decode_set;
83 #define TXTSIZ (MAX_DICSIZ * 2L + MAXMATCH)
84 #define HSHSIZ (((unsigned long)1) <<15)
86 #define LIMIT 0x100 /* limit of hash chain */
88 static unsigned int txtsiz;
89 static unsigned long dicsiz;
90 static unsigned int remainder;
102 case LZHUFF1_METHOD_NUM:
103 encode_set = encode_define[0];
105 dicbit = LZHUFF1_DICBIT; /* 12 bits Changed N.Watazaki */
107 case LZHUFF5_METHOD_NUM:
108 encode_set = encode_define[1];
110 dicbit = LZHUFF5_DICBIT; /* 13 bits */
112 case LZHUFF6_METHOD_NUM:
113 encode_set = encode_define[1];
115 dicbit = LZHUFF6_DICBIT; /* 15 bits */
117 case LZHUFF7_METHOD_NUM:
118 encode_set = encode_define[1];
120 dicbit = LZHUFF7_DICBIT; /* 16 bits */
123 error("unknown method %d", method);
127 dicsiz = (((unsigned long)1) << dicbit);
128 txtsiz = dicsiz*2+maxmatch;
130 if (hash) return method;
134 hash = (struct hash*)xmalloc(HSHSIZ * sizeof(struct hash));
135 prev = (unsigned int*)xmalloc(MAX_DICSIZ * sizeof(unsigned int));
136 text = (unsigned char*)xmalloc(TXTSIZ);
146 for (i = 0; i < HSHSIZ; i++) {
148 hash[i].too_flag = 0;
152 /* update dictionary */
154 update_dict(pos, crc)
161 memmove(&text[0], &text[dicsiz], txtsiz - dicsiz);
163 n = fread_crc(crc, &text[txtsiz - dicsiz], dicsiz, infile);
168 for (i = 0; i < HSHSIZ; i++) {
170 hash[i].pos = (j > dicsiz) ? j - dicsiz : NIL;
171 hash[i].too_flag = 0;
173 for (i = 0; i < dicsiz; i++) {
175 prev[i] = (j > dicsiz) ? j - dicsiz : NIL;
179 /* associate position with token */
181 insert_hash(token, pos)
185 prev[pos & (dicsiz - 1)] = hash[token].pos; /* chain the previous pos. */
186 hash[token].pos = pos;
190 search_dict_1(token, pos, off, max, m)
194 unsigned int max; /* max. length of matching string */
197 unsigned int chain = 0;
198 unsigned int scan_pos = hash[token].pos;
199 int scan_beg = scan_pos - off;
200 int scan_end = pos - dicsiz;
203 while (scan_beg > scan_end) {
206 if (text[scan_beg + m->len] == text[pos + m->len]) {
209 unsigned char *a = &text[scan_beg];
210 unsigned char *b = &text[pos];
212 for (len = 0; len < max && *a++ == *b++; len++);
216 m->off = pos - scan_beg;
223 if (pos - m->off < dicsiz) {
224 printf("matchpos=%u scan_pos=%u dicsiz=%u\n",
225 pos - m->off, scan_pos, dicsiz);
231 scan_pos = prev[scan_pos & (dicsiz - 1)];
232 scan_beg = scan_pos - off;
236 hash[token].too_flag = 1;
239 /* search the longest token matching to current token */
241 search_dict(token, pos, min, m)
242 unsigned int token; /* search token */
243 unsigned int pos; /* position of token */
244 int min; /* min. length of matching string */
247 unsigned int off, tok, max;
249 if (min < THRESHOLD - 1) min = THRESHOLD - 1;
256 for (tok = token; hash[tok].too_flag && off < maxmatch - THRESHOLD; ) {
257 /* If matching position is too many, The search key is
258 changed into following token from `off' (for speed). */
260 tok = NEXT_HASH(tok, pos+off);
262 if (off == maxmatch - THRESHOLD) {
267 search_dict_1(tok, pos, off, max, m);
269 if (off > 0 && m->len < off + 3)
271 search_dict_1(token, pos, 0, off+2, m);
273 if (m->len > remainder) m->len = remainder;
276 /* slide dictionary */
278 next_token(token, pos, crc)
284 if (++*pos >= txtsiz - maxmatch) {
285 update_dict(pos, crc);
290 *token = NEXT_HASH(*token, *pos);
295 struct interfacing *interface;
297 unsigned int token, pos, crc;
299 struct matchdata match, last;
303 fout = xfopen("en", "wt");
304 fprintf(fout, "[filename: %s]\n", reading_filename);
306 infile = interface->infile;
307 outfile = interface->outfile;
308 origsize = interface->original;
309 compsize = count = 0L;
316 encode_set.encode_start();
317 memset(text, ' ', TXTSIZ);
319 remainder = fread_crc(&crc, &text[dicsiz], txtsiz-dicsiz, infile);
321 match.len = THRESHOLD - 1;
323 if (match.len > remainder) match.len = remainder;
326 token = INIT_HASH(pos);
327 insert_hash(token, pos); /* associate token and pos */
329 while (remainder > 0 && ! unpackable) {
332 next_token(&token, &pos, &crc);
333 search_dict(token, pos, last.len-1, &match);
334 insert_hash(token, pos);
336 if (match.len > last.len || last.len < THRESHOLD) {
337 /* output a letter */
338 encode_set.output(text[pos - 1], 0);
340 fprintf(fout, "%u C %02X\n", count, text[pos-1]);
344 /* output length and offset */
345 encode_set.output(last.len + (256 - THRESHOLD),
346 (last.off-1) & (dicsiz-1) );
352 unsigned int offset = (last.off & (dicsiz-1));
354 fprintf(fout, "%u M <%u %u> ",
355 count, last.len, count - offset);
357 ptr = &text[pos-1 - offset];
358 for (i=0; i < last.len; i++)
359 fprintf(fout, "%02X ", ptr[i]);
366 while (--last.len > 0) {
367 next_token(&token, &pos, &crc);
368 insert_hash(token, pos);
370 next_token(&token, &pos, &crc);
371 search_dict(token, pos, THRESHOLD - 1, &match);
372 insert_hash(token, pos);
375 encode_set.encode_end();
377 interface->packed = compsize;
378 interface->original = count;
385 struct interfacing *interface;
388 unsigned int dicsiz1, adjust;
389 unsigned char *dtext;
394 fout = xfopen("de", "wt");
395 fprintf(fout, "[filename: %s]\n", writing_filename);
398 infile = interface->infile;
399 outfile = interface->outfile;
400 dicbit = interface->dicbit;
401 origsize = interface->original;
402 compsize = interface->packed;
403 decode_set = decode_define[interface->method - 1];
406 dicsiz = 1L << dicbit;
407 dtext = (unsigned char *)xmalloc(dicsiz);
409 if (extract_broken_archive)
411 /* LHa for UNIX (autoconf) had a fatal bug since version
412 1.14i-ac20030713 (slide.c revision 1.20).
414 This bug is possible to make a broken archive, proper LHA
415 cannot extract it (probably it report CRC error).
417 If the option "--extract-broken-archive" specified, extract
418 the broken archive made by old LHa for UNIX. */
419 memset(dtext, 0, dicsiz);
421 memset(dtext, ' ', dicsiz);
422 decode_set.decode_start();
423 dicsiz1 = dicsiz - 1;
424 adjust = 256 - THRESHOLD;
425 if (interface->method == LARC_METHOD_NUM)
430 while (decode_count < origsize) {
431 c = decode_set.decode_c();
434 fprintf(fout, "%u C %02X\n", decode_count, c);
438 fwrite_crc(&crc, dtext, dicsiz, outfile);
444 struct matchdata match;
445 unsigned int matchpos;
447 match.len = c - adjust;
448 match.off = decode_set.decode_p() + 1;
449 matchpos = (loc - match.off) & dicsiz1;
451 fprintf(fout, "%u M <%u %u> ",
452 decode_count, match.len, decode_count-match.off);
454 decode_count += match.len;
455 for (i = 0; i < match.len; i++) {
456 c = dtext[(matchpos + i) & dicsiz1];
458 fprintf(fout, "%02X ", c & 0xff);
462 fwrite_crc(&crc, dtext, dicsiz, outfile);
472 fwrite_crc(&crc, dtext, loc, outfile);
477 /* usually read size is interface->packed */
478 interface->read_size = interface->packed - compsize;