OSDN Git Service

596816487cb7f817af491ee91735439be70a2f03
[lha/lha.git] / src / slide.c
1 /* ------------------------------------------------------------------------ */
2 /* LHa for UNIX                                                                                                                         */
3 /*                              slice.c -- sliding dictionary with percolating update           */
4 /*                                                                                                                                                      */
5 /*              Modified                        Nobutaka Watazaki                                                       */
6 /*                                                                                                                                                      */
7 /*      Ver. 1.14d      Exchanging a search algorithm  1997.01.11    T.Okamoto      */
8 /* ------------------------------------------------------------------------ */
9
10 #if 0
11 #define DEBUG 1
12 #endif
13
14 #include "lha.h"
15
16
17 #ifdef DEBUG
18 FILE *fout = NULL;
19 static int noslide = 1;
20 #endif
21
22 /* ------------------------------------------------------------------------ */
23
24 static unsigned long encoded_origsize;
25
26 /* ------------------------------------------------------------------------ */
27
28 static unsigned int *hash;
29 static unsigned int *prev;
30
31 /* static unsigned char  *text; */
32 unsigned char *too_flag;
33
34
35 static struct encode_option encode_define[2] = {
36 #if defined(__STDC__) || defined(AIX)
37         /* lh1 */
38         {(void (*) ()) output_dyn,
39                 (void (*) ()) encode_start_fix,
40         (void (*) ()) encode_end_dyn},
41         /* lh4, 5,6 */
42         {(void (*) ()) output_st1,
43                 (void (*) ()) encode_start_st1,
44         (void (*) ()) encode_end_st1}
45 #else
46         /* lh1 */
47         {(int (*) ()) output_dyn,
48                 (int (*) ()) encode_start_fix,
49         (int (*) ()) encode_end_dyn},
50         /* lh4, 5,6 */
51         {(int (*) ()) output_st1,
52                 (int (*) ()) encode_start_st1,
53         (int (*) ()) encode_end_st1}
54 #endif
55 };
56
57 static struct decode_option decode_define[] = {
58         /* lh1 */
59         {decode_c_dyn, decode_p_st0, decode_start_fix},
60         /* lh2 */
61         {decode_c_dyn, decode_p_dyn, decode_start_dyn},
62         /* lh3 */
63         {decode_c_st0, decode_p_st0, decode_start_st0},
64         /* lh4 */
65         {decode_c_st1, decode_p_st1, decode_start_st1},
66         /* lh5 */
67         {decode_c_st1, decode_p_st1, decode_start_st1},
68         /* lh6 */
69         {decode_c_st1, decode_p_st1, decode_start_st1},
70         /* lh7 */
71         {decode_c_st1, decode_p_st1, decode_start_st1},
72         /* lzs */
73         {decode_c_lzs, decode_p_lzs, decode_start_lzs},
74         /* lz5 */
75         {decode_c_lz5, decode_p_lz5, decode_start_lz5}
76 };
77
78 static struct encode_option encode_set;
79 static struct decode_option decode_set;
80
81 #if 0
82 static node     pos, matchpos, avail, *position, *parent, *prev;
83 static int      remainder, matchlen;
84 static unsigned char *level, *childcount;
85 static unsigned long dicsiz;  /* t.okamoto */
86 static unsigned short max_hash_val;
87 static unsigned short hash1, hash2;
88 #endif
89
90 #ifdef SUPPORT_LH7
91 #define DICSIZ (1L << 16)
92 #define TXTSIZ (DICSIZ * 2L + MAXMATCH)
93 #else
94 #define DICSIZ (((unsigned long)1) << 15)
95 #define TXTSIZ (DICSIZ * 2 + MAXMATCH)
96 #endif
97
98 #define HSHSIZ (((unsigned long)1) <<15)
99 #define NIL 0
100 #define LIMIT 0x100     /* chain Ä¹¤Î limit */
101
102 static unsigned int txtsiz;
103
104 static unsigned long dicsiz;
105
106 static unsigned int hval;
107 static int matchlen;
108 static unsigned int matchpos;
109 static unsigned int pos;
110 static unsigned int remainder;
111
112
113 /* ------------------------------------------------------------------------ */
114 int
115 encode_alloc(method)
116         int             method;
117 {
118         if (method == LZHUFF1_METHOD_NUM) {     /* Changed N.Watazaki */
119                 encode_set = encode_define[0];
120                 maxmatch = 60;
121                 dicbit = 12;   /* 12 Changed N.Watazaki */
122         } else { /* method LH4(12),LH5(13),LH6(15) */
123                 encode_set = encode_define[1];
124                 maxmatch = MAXMATCH;
125                 if (method == LZHUFF7_METHOD_NUM)
126                         dicbit = MAX_DICBIT; /* 16 bits */
127                 else if (method == LZHUFF6_METHOD_NUM)
128                         dicbit = MAX_DICBIT-1;          /* 15 bits */
129                 else /* LH5  LH4 is not used */
130                         dicbit = MAX_DICBIT - 3;        /* 13 bits */
131         }
132
133         dicsiz = (((unsigned long)1) << dicbit);
134         txtsiz = dicsiz*2+maxmatch;
135
136         if (hash) return method;
137
138         alloc_buf();
139
140         hash = (unsigned int*)xmalloc(HSHSIZ * sizeof(unsigned int));
141         prev = (unsigned int*)xmalloc(DICSIZ * sizeof(unsigned int));
142         text = (unsigned char*)xmalloc(TXTSIZ);
143         too_flag = (unsigned char*)xmalloc(HSHSIZ);
144
145         return method;
146 }
147
148 /* ------------------------------------------------------------------------ */
149 /* ¥Ý¥¤¥ó¥¿¤Î½é´ü²½ */
150
151 static void init_slide()
152 {
153         unsigned int i;
154
155         for (i = 0; i < HSHSIZ; i++) {
156                 hash[i] = NIL;
157                 too_flag[i] = 0;
158         }
159         /*
160         for (i = 0; i < DICSIZ; i++) {
161             prev[i] = NIL;
162         }
163         */
164 }
165
166 /* ¼­½ñ¤ò DICSIZ Ê¬ Á°¤Ë¤º¤é¤¹ */
167
168 static void update()
169 {
170         unsigned int i, j;
171         unsigned int k;
172         long n;
173
174 #if 0
175         memmove(&text[0], &text[dicsiz], (unsigned)(txtsiz - dicsiz));
176 #else
177         {
178                 int m;
179                 i = 0; j = dicsiz; m = txtsiz-dicsiz;
180                 while (m-- > 0) {
181                         text[i++] = text[j++];
182                 }
183         }
184 #endif
185         n = fread_crc(&text[(unsigned)(txtsiz - dicsiz)], 
186                                    (unsigned)dicsiz, infile);
187
188         remainder += n;
189         encoded_origsize += n;
190
191         pos -= dicsiz;
192         for (i = 0; i < HSHSIZ; i++) {
193                 j = hash[i];
194                 hash[i] = (j > dicsiz) ? j - dicsiz : NIL;
195                 too_flag[i] = 0;
196         }
197         for (i = 0; i < dicsiz; i++) {
198                 j = prev[i];
199                 prev[i] = (j > dicsiz) ? j - dicsiz : NIL;
200         }
201 }
202
203
204 /* ¸½ºß¤Îʸ»úÎó¤ò¥Á¥§¡¼¥ó¤ËÄɲ乤ë */
205
206 static void insert()
207 {
208         prev[pos & (dicsiz - 1)] = hash[hval];
209         hash[hval] = pos;
210 }
211
212
213 /* ¸½ºß¤Îʸ»úÎó¤ÈºÇĹ°ìÃפ¹¤ëʸ»úÎó¤ò¸¡º÷¤·¡¢¥Á¥§¡¼¥ó¤ËÄɲ乤ë */
214
215 static void match_insert()
216 {
217         unsigned int scan_pos, scan_end, len;
218         unsigned char *a, *b;
219         unsigned int chain, off, h, max;
220
221         max = maxmatch; /* MAXMATCH; */
222         if (matchlen < THRESHOLD - 1) matchlen = THRESHOLD - 1;
223         matchpos = pos;
224
225         off = 0;
226         for (h = hval; too_flag[h] && off < maxmatch - THRESHOLD; ) {
227                 h = ((h << 5) ^ text[pos + (++off) + 2]) & (unsigned)(HSHSIZ - 1);
228         }
229         if (off == maxmatch - THRESHOLD) off = 0;
230         for (;;) {
231                 chain = 0;
232                 scan_pos = hash[h];
233                 scan_end = (pos > dicsiz) ? pos + off - dicsiz : off;
234                 while (scan_pos > scan_end) {
235                         chain++;
236
237                         if (text[scan_pos + matchlen - off] == text[pos + matchlen]) {
238                                 {
239                                         a = text + scan_pos - off;  b = text + pos;
240                                         for (len = 0; len < max && *a++ == *b++; len++);
241                                 }
242
243                                 if (len > matchlen) {
244                                         matchpos = scan_pos - off;
245                                         if ((matchlen = len) == max) {
246                                                 break;
247                                         }
248 #ifdef DEBUG
249                                         if (noslide) {
250                                           if (matchpos < dicsiz) {
251                                                 printf("matchpos=%u scan_pos=%u dicsiz=%u\n"
252                                                            ,matchpos, scan_pos, dicsiz);
253                                           }
254                                         }
255 #endif
256                                 }
257                         }
258                         scan_pos = prev[scan_pos & (dicsiz - 1)];
259                 }
260
261                 if (chain >= LIMIT)
262                         too_flag[h] = 1;
263
264                 if (matchlen > off + 2 || off == 0)
265                         break;
266                 max = off + 2;
267                 off = 0;
268                 h = hval;
269         }
270         prev[pos & (dicsiz - 1)] = hash[hval];
271         hash[hval] = pos;
272 }
273
274
275 /* ¥Ý¥¤¥ó¥¿¤ò¿Ê¤á¡¢¼­½ñ¤ò¹¹¿·¤·¡¢¥Ï¥Ã¥·¥åÃͤò¹¹¿·¤¹¤ë */
276
277 static void get_next()
278 {
279         remainder--;
280         if (++pos >= txtsiz - maxmatch) {
281                 update();
282 #ifdef DEBUG
283                 noslide = 0;
284 #endif
285         }
286         hval = ((hval << 5) ^ text[pos + 2]) & (unsigned)(HSHSIZ - 1);
287 }
288
289 void encode(interface)
290 struct interfacing *interface;
291 {
292         int lastmatchlen;
293         unsigned int lastmatchoffset;
294
295 #ifdef DEBUG
296         unsigned int addr;
297
298         addr = 0;
299
300         fout = fopen("en", "wt");
301         if (fout == NULL) exit(1);
302 #endif
303         infile = interface->infile;
304         outfile = interface->outfile;
305         origsize = interface->original;
306         compsize = count = 0L;
307         crc = unpackable = 0;
308
309         /* encode_alloc(); */ /* allocate_memory(); */
310         init_slide();  
311
312         encode_set.encode_start();
313         memset(&text[0], ' ', (long)TXTSIZ);
314
315         remainder = fread_crc(&text[dicsiz], txtsiz-dicsiz, infile);
316         encoded_origsize = remainder;
317         matchlen = THRESHOLD - 1;
318
319         pos = dicsiz;
320
321         if (matchlen > remainder) matchlen = remainder;
322         hval = ((((text[dicsiz] << 5) ^ text[dicsiz + 1]) << 5) 
323                 ^ text[dicsiz + 2]) & (unsigned)(HSHSIZ - 1);
324
325         insert();
326         while (remainder > 0 && ! unpackable) {
327                 lastmatchlen = matchlen;  lastmatchoffset = pos - matchpos - 1;
328                 --matchlen;
329                 get_next();  match_insert();
330                 if (matchlen > remainder) matchlen = remainder;
331                 if (matchlen > lastmatchlen || lastmatchlen < THRESHOLD) {
332                         encode_set.output(text[pos - 1], 0);
333 #ifdef DEBUG
334                         fprintf(fout, "%u C %02X\n", addr, text[pos-1]);
335                         addr++;
336 #endif
337                         count++;
338                 } else {
339                         encode_set.output(lastmatchlen + (UCHAR_MAX + 1 - THRESHOLD),
340                            (lastmatchoffset) & (dicsiz-1) );
341                         --lastmatchlen;
342
343 #ifdef DEBUG
344                         fprintf(fout, "%u M %u %u ", addr,
345                                         lastmatchoffset & (dicsiz-1), lastmatchlen+1);
346                         addr += lastmatchlen +1 ;
347
348                         {
349                           int t,cc;
350                         for (t=0; t<lastmatchlen+1; t++) {
351                           cc = text[(pos-(lastmatchoffset)) & (dicsiz-1)];
352                           fprintf(fout, "%02X ", cc);
353                         }
354                         fprintf(fout, "\n");
355                         }
356 #endif
357                         while (--lastmatchlen > 0) {
358                                 get_next();  insert();
359                                 count++;
360                         }
361                         get_next();
362                         matchlen = THRESHOLD - 1;
363                         match_insert();
364                         if (matchlen > remainder) matchlen = remainder;
365                 }
366         }
367         encode_set.encode_end();
368
369         interface->packed = compsize;
370         interface->original = encoded_origsize;
371 }
372
373 /* ------------------------------------------------------------------------ */
374 void
375 decode(interface)
376         struct interfacing *interface;
377 {
378         unsigned int i, j, k, c;
379         unsigned int dicsiz1, offset;
380         unsigned char *dtext;
381         
382
383 #ifdef DEBUG
384         fout = fopen("de", "wt");
385         if (fout == NULL) exit(1);
386 #endif
387
388         infile = interface->infile;
389         outfile = interface->outfile;
390         dicbit = interface->dicbit;
391         origsize = interface->original;
392         compsize = interface->packed;
393         decode_set = decode_define[interface->method - 1];
394
395         crc = 0;
396         prev_char = -1;
397         dicsiz = 1L << dicbit;
398         dtext = (unsigned char *)xmalloc(dicsiz);
399         for (i=0; i<dicsiz; i++) dtext[i] = 0x20;
400         decode_set.decode_start();
401         dicsiz1 = dicsiz - 1;
402         offset = (interface->method == LARC_METHOD_NUM) ? 0x100 - 2 : 0x100 - 3;
403         count = 0;
404         loc = 0;
405         while (count < origsize) {
406                 c = decode_set.decode_c();
407                 if (c <= UCHAR_MAX) {
408 #ifdef DEBUG
409                   fprintf(fout, "%u C %02X\n", count, c);
410 #endif
411                         dtext[loc++] = c;
412                         if (loc == dicsiz) {
413                                 fwrite_crc(dtext, dicsiz, outfile);
414                                 loc = 0;
415                         }
416                         count++;
417                 }
418                 else {
419                         j = c - offset;
420                         i = (loc - decode_set.decode_p() - 1) & dicsiz1;
421 #ifdef DEBUG
422                         fprintf(fout, "%u M %u %u ", count, (loc-1-i) & dicsiz1, j);
423 #endif
424                         count += j;
425                         for (k = 0; k < j; k++) {
426                                 c = dtext[(i + k) & dicsiz1];
427
428 #ifdef DEBUG
429                                 fprintf(fout, "%02X ", c & 0xff);
430 #endif
431                                 dtext[loc++] = c;
432                                 if (loc == dicsiz) {
433                                         fwrite_crc(dtext, dicsiz, outfile);
434                                         loc = 0;
435                                 }
436                         }
437 #ifdef DEBUG
438                         fprintf(fout, "\n");
439 #endif
440                 }
441         }
442         if (loc != 0) {
443                 fwrite_crc(dtext, loc, outfile);
444         }
445
446         free(dtext);
447 }
448
449 /* Local Variables: */
450 /* mode:c */
451 /* tab-width:4 */
452 /* End: */