OSDN Git Service

* src/header.c (get_header_level2): check CRC value for reading
[lha/lha.git] / src / slide.c
1 /* ------------------------------------------------------------------------ */
2 /* LHa for UNIX                                                                                                                         */
3 /*                              slice.c -- sliding dictionary with percolating update           */
4 /*                                                                                                                                                      */
5 /*              Modified                        Nobutaka Watazaki                                                       */
6 /*                                                                                                                                                      */
7 /*      Ver. 1.14d      Exchanging a search algorithm  1997.01.11    T.Okamoto      */
8 /* ------------------------------------------------------------------------ */
9
10 #if 0
11 #define DEBUG 1
12 #endif
13
14 #include "lha.h"
15
16
17 #ifdef DEBUG
18 FILE *fout = NULL;
19 static int noslide = 1;
20 #endif
21
22 /* ------------------------------------------------------------------------ */
23
24 static unsigned long encoded_origsize;
25
26 /* ------------------------------------------------------------------------ */
27
28 static unsigned int *hash;
29 static unsigned int *prev;
30
31 /* static unsigned char  *text; */
32 unsigned char *too_flag;
33
34
35 static struct encode_option encode_define[2] = {
36 #if defined(__STDC__) || defined(AIX)
37         /* lh1 */
38         {(void (*) ()) output_dyn,
39                 (void (*) ()) encode_start_fix,
40         (void (*) ()) encode_end_dyn},
41         /* lh4, 5,6 */
42         {(void (*) ()) output_st1,
43                 (void (*) ()) encode_start_st1,
44         (void (*) ()) encode_end_st1}
45 #else
46         /* lh1 */
47         {(int (*) ()) output_dyn,
48                 (int (*) ()) encode_start_fix,
49         (int (*) ()) encode_end_dyn},
50         /* lh4, 5,6 */
51         {(int (*) ()) output_st1,
52                 (int (*) ()) encode_start_st1,
53         (int (*) ()) encode_end_st1}
54 #endif
55 };
56
57 static struct decode_option decode_define[] = {
58         /* lh1 */
59         {decode_c_dyn, decode_p_st0, decode_start_fix},
60         /* lh2 */
61         {decode_c_dyn, decode_p_dyn, decode_start_dyn},
62         /* lh3 */
63         {decode_c_st0, decode_p_st0, decode_start_st0},
64         /* lh4 */
65         {decode_c_st1, decode_p_st1, decode_start_st1},
66         /* lh5 */
67         {decode_c_st1, decode_p_st1, decode_start_st1},
68         /* lh6 */
69         {decode_c_st1, decode_p_st1, decode_start_st1},
70         /* lh7 */
71         {decode_c_st1, decode_p_st1, decode_start_st1},
72         /* lzs */
73         {decode_c_lzs, decode_p_lzs, decode_start_lzs},
74         /* lz5 */
75         {decode_c_lz5, decode_p_lz5, decode_start_lz5}
76 };
77
78 static struct encode_option encode_set;
79 static struct decode_option decode_set;
80
81 #if 0
82 static node     pos, matchpos, avail, *position, *parent, *prev;
83 static int      remainder, matchlen;
84 static unsigned char *level, *childcount;
85 static unsigned long dicsiz;  /* t.okamoto */
86 static unsigned short max_hash_val;
87 static unsigned short hash1, hash2;
88 #endif
89
90 #ifdef SUPPORT_LH7
91 #define DICSIZ (1L << 16)
92 #define TXTSIZ (DICSIZ * 2L + MAXMATCH)
93 #else
94 #define DICSIZ (((unsigned long)1) << 15)
95 #define TXTSIZ (DICSIZ * 2 + MAXMATCH)
96 #endif
97
98 #define HSHSIZ (((unsigned long)1) <<15)
99 #define NIL 0
100 #define LIMIT 0x100     /* chain Ä¹¤Î limit */
101
102 static unsigned int txtsiz;
103
104 static unsigned long dicsiz;
105
106 static unsigned int hval;
107 static int matchlen;
108 static unsigned int matchpos;
109 static unsigned int pos;
110 static unsigned int remainder;
111
112
113 /* ------------------------------------------------------------------------ */
114 int
115 encode_alloc(method)
116         int             method;
117 {
118         if (method == LZHUFF1_METHOD_NUM) {     /* Changed N.Watazaki */
119                 encode_set = encode_define[0];
120                 maxmatch = 60;
121                 dicbit = 12;   /* 12 Changed N.Watazaki */
122         } else { /* method LH4(12),LH5(13),LH6(15) */
123                 encode_set = encode_define[1];
124                 maxmatch = MAXMATCH;
125                 if (method == LZHUFF7_METHOD_NUM)
126                         dicbit = LH7_DICBIT;    /* 16 bits */
127                 else if (method == LZHUFF6_METHOD_NUM)
128                         dicbit = LH6_DICBIT;    /* 15 bits */
129                 else /* LH5  LH4 is not used */
130                         dicbit = LH5_DICBIT;    /* 13 bits */
131         }
132
133         dicsiz = (((unsigned long)1) << dicbit);
134         txtsiz = dicsiz*2+maxmatch;
135
136         if (hash) return method;
137
138         alloc_buf();
139
140         hash = (unsigned int*)xmalloc(HSHSIZ * sizeof(unsigned int));
141         prev = (unsigned int*)xmalloc(DICSIZ * sizeof(unsigned int));
142         text = (unsigned char*)xmalloc(TXTSIZ);
143         too_flag = (unsigned char*)xmalloc(HSHSIZ);
144
145         return method;
146 }
147
148 /* ------------------------------------------------------------------------ */
149 /* ¥Ý¥¤¥ó¥¿¤Î½é´ü²½ */
150
151 static void init_slide()
152 {
153         unsigned int i;
154
155         for (i = 0; i < HSHSIZ; i++) {
156                 hash[i] = NIL;
157                 too_flag[i] = 0;
158         }
159         /*
160         for (i = 0; i < DICSIZ; i++) {
161             prev[i] = NIL;
162         }
163         */
164 }
165
166 /* ¼­½ñ¤ò DICSIZ Ê¬ Á°¤Ë¤º¤é¤¹ */
167
168 static unsigned int
169 update(crc)
170     unsigned int crc;
171 {
172         unsigned int i, j;
173         long n;
174
175 #if 0
176         memmove(&text[0], &text[dicsiz], (unsigned)(txtsiz - dicsiz));
177 #else
178         {
179                 int m;
180                 i = 0; j = dicsiz; m = txtsiz-dicsiz;
181                 while (m-- > 0) {
182                         text[i++] = text[j++];
183                 }
184         }
185 #endif
186         n = fread_crc(&crc, &text[(unsigned)(txtsiz - dicsiz)], 
187                                    (unsigned)dicsiz, infile);
188
189         remainder += n;
190         encoded_origsize += n;
191
192         pos -= dicsiz;
193         for (i = 0; i < HSHSIZ; i++) {
194                 j = hash[i];
195                 hash[i] = (j > dicsiz) ? j - dicsiz : NIL;
196                 too_flag[i] = 0;
197         }
198         for (i = 0; i < dicsiz; i++) {
199                 j = prev[i];
200                 prev[i] = (j > dicsiz) ? j - dicsiz : NIL;
201         }
202
203     return crc;
204 }
205
206
207 /* ¸½ºß¤Îʸ»úÎó¤ò¥Á¥§¡¼¥ó¤ËÄɲ乤ë */
208
209 static void insert()
210 {
211         prev[pos & (dicsiz - 1)] = hash[hval];
212         hash[hval] = pos;
213 }
214
215
216 /* ¸½ºß¤Îʸ»úÎó¤ÈºÇĹ°ìÃפ¹¤ëʸ»úÎó¤ò¸¡º÷¤·¡¢¥Á¥§¡¼¥ó¤ËÄɲ乤ë */
217
218 static void match_insert()
219 {
220         unsigned int scan_pos, scan_end, len;
221         unsigned char *a, *b;
222         unsigned int chain, off, h, max;
223
224         max = maxmatch; /* MAXMATCH; */
225         if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
226         matchpos = pos;
227
228         off = 0;
229         for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
230                 h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
231         }
232         if (off == maxmatch - THRESHOLD) off = 0;
233         for (;;) {
234                 chain = 0;
235                 scan_pos = hash[h];
236                 scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
237                 while (scan_pos > scan_end) {
238                         chain++;
239
240                         if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
241                                 {
242                                         a = text + scan_pos - off;  b = text + pos;
243                                         for (len = 0; len < max && *a++ == *b++; len++);
244                                 }
245
246                                 if (len > matchlen) {
247                                         matchpos = scan_pos - off;
248                                         if ((matchlen = len) == max) {
249                                                 break;
250                                         }
251 #ifdef DEBUG
252                                         if (noslide) {
253                                           if (matchpos < dicsiz) {
254                                                 printf("matchpos=%u scan_pos=%u dicsiz=%u\n"
255                                                            ,matchpos, scan_pos, dicsiz);
256                                           }
257                                         }
258 #endif
259                                 }
260                         }
261                         scan_pos = prev[scan_pos & (dicsiz - 1)];
262                 }
263
264                 if (chain >= LIMIT)
265                         too_flag[h] = 1;
266
267                 if (matchlen > off + 2 || off == 0)
268                         break;
269                 max = off + 2;
270                 off = 0;
271                 h = hval;
272         }
273         prev[pos & (dicsiz - 1)] = hash[hval];
274         hash[hval] = pos;
275 }
276
277
278 /* ¥Ý¥¤¥ó¥¿¤ò¿Ê¤á¡¢¼­½ñ¤ò¹¹¿·¤·¡¢¥Ï¥Ã¥·¥åÃͤò¹¹¿·¤¹¤ë */
279
280 static unsigned int
281 get_next(crc)
282     unsigned int crc;
283 {
284         remainder--;
285         if (++pos >= txtsiz - maxmatch) {
286                 crc = update(crc);
287 #ifdef DEBUG
288                 noslide = 0;
289 #endif
290         }
291         hval = ((hval << 5) ^ text[pos + 2]) & (unsigned)(HSHSIZ - 1);
292
293     return crc;
294 }
295
296 unsigned int
297 encode(interface)
298     struct interfacing *interface;
299 {
300         int lastmatchlen;
301         unsigned int lastmatchoffset;
302     unsigned int crc;
303
304 #ifdef DEBUG
305         unsigned int addr;
306
307         addr = 0;
308
309         fout = fopen("en", "wt");
310         if (fout == NULL) exit(1);
311 #endif
312         infile = interface->infile;
313         outfile = interface->outfile;
314         origsize = interface->original;
315         compsize = count = 0L;
316         unpackable = 0;
317
318     INITIALIZE_CRC(crc);
319
320         /* encode_alloc(); */ /* allocate_memory(); */
321         init_slide();  
322
323         encode_set.encode_start();
324         memset(&text[0], ' ', (long)TXTSIZ);
325
326         remainder = fread_crc(&crc, &text[dicsiz], txtsiz-dicsiz, infile);
327         encoded_origsize = remainder;
328         matchlen = THRESHOLD - 1;
329
330         pos = dicsiz;
331
332         if (matchlen > remainder) matchlen = remainder;
333         hval = ((((text[dicsiz] << 5) ^ text[dicsiz + 1]) << 5) 
334                 ^ text[dicsiz + 2]) & (unsigned)(HSHSIZ - 1);
335
336         insert();
337         while (remainder > 0 && ! unpackable) {
338                 lastmatchlen = matchlen;  lastmatchoffset = pos - matchpos - 1;
339                 --matchlen;
340                 crc = get_next(crc);  match_insert();
341                 if (matchlen > remainder) matchlen = remainder;
342                 if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {
343                         encode_set.output(text[pos - 1], 0);
344 #ifdef DEBUG
345                         fprintf(fout, "%u C %02X\n", addr, text[pos-1]);
346                         addr++;
347 #endif
348                         count++;
349                 } else {
350                         encode_set.output(lastmatchlen + (UCHAR_MAX + 1 - THRESHOLD),
351                            (lastmatchoffset) & (dicsiz-1) );
352                         --lastmatchlen;
353
354 #ifdef DEBUG
355                         fprintf(fout, "%u M %u %u ", addr,
356                                         lastmatchoffset & (dicsiz-1), lastmatchlen+1);
357                         addr += lastmatchlen +1 ;
358
359                         {
360                 int t,cc;
361                 for (t=0; t<lastmatchlen+1; t++) {
362                     cc = text[pos - lastmatchoffset - 2 + t];
363                     fprintf(fout, "%02X ", cc);
364                 }
365                 fprintf(fout, "\n");
366                         }
367 #endif
368                         while (--lastmatchlen > 0) {
369                                 crc = get_next(crc);  insert();
370                                 count++;
371                         }
372                         crc = get_next(crc);
373                         matchlen = THRESHOLD - 1;
374                         match_insert();
375                         if (matchlen > remainder) matchlen = remainder;
376                 }
377         }
378         encode_set.encode_end();
379
380         interface->packed = compsize;
381         interface->original = encoded_origsize;
382
383     return crc;
384 }
385
386 /* ------------------------------------------------------------------------ */
387 unsigned int
388 decode(interface)
389         struct interfacing *interface;
390 {
391         unsigned int i, j, k, c;
392         unsigned int dicsiz1, offset;
393         unsigned char *dtext;
394         unsigned int crc;
395
396 #ifdef DEBUG
397         fout = fopen("de", "wt");
398         if (fout == NULL) exit(1);
399 #endif
400
401         infile = interface->infile;
402         outfile = interface->outfile;
403         dicbit = interface->dicbit;
404         origsize = interface->original;
405         compsize = interface->packed;
406         decode_set = decode_define[interface->method - 1];
407
408         INITIALIZE_CRC(crc);
409         prev_char = -1;
410         dicsiz = 1L << dicbit;
411         dtext = (unsigned char *)xmalloc(dicsiz);
412         for (i=0; i<dicsiz; i++) dtext[i] = 0x20;
413         decode_set.decode_start();
414         dicsiz1 = dicsiz - 1;
415         offset = (interface->method == LARC_METHOD_NUM) ? 0x100 - 2 : 0x100 - 3;
416         count = 0;
417         loc = 0;
418         while (count < origsize) {
419                 c = decode_set.decode_c();
420                 if (c <= UCHAR_MAX) {
421 #ifdef DEBUG
422                   fprintf(fout, "%u C %02X\n", count, c);
423 #endif
424                         dtext[loc++] = c;
425                         if (loc == dicsiz) {
426                                 fwrite_crc(&crc, dtext, dicsiz, outfile);
427                                 loc = 0;
428                         }
429                         count++;
430                 }
431                 else {
432                         j = c - offset;
433                         i = (loc - decode_set.decode_p() - 1) & dicsiz1;
434 #ifdef DEBUG
435                         fprintf(fout, "%u M %u %u ", count, (loc-1-i) & dicsiz1, j);
436 #endif
437                         count += j;
438                         for (k = 0; k < j; k++) {
439                                 c = dtext[(i + k) & dicsiz1];
440
441 #ifdef DEBUG
442                                 fprintf(fout, "%02X ", c & 0xff);
443 #endif
444                                 dtext[loc++] = c;
445                                 if (loc == dicsiz) {
446                                         fwrite_crc(&crc, dtext, dicsiz, outfile);
447                                         loc = 0;
448                                 }
449                         }
450 #ifdef DEBUG
451                         fprintf(fout, "\n");
452 #endif
453                 }
454         }
455         if (loc != 0) {
456                 fwrite_crc(&crc, dtext, loc, outfile);
457         }
458
459         free(dtext);
460     return crc;
461 }
462
463 /* Local Variables: */
464 /* mode:c */
465 /* tab-width:4 */
466 /* End: */
467 /* vi: set tabstop=4: */