dictzip-lib/src/test/java/org/dict/zip/DictZipFileTest.java

   1 /*
   2  * DictZip Library test.
   3  *
   4  * Copyright (C) 2021-2022 Hiroshi Miura
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License
   8  * as published by the Free Software Foundation; either version 2
   9  * of the License, or (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19  *
  20  * Linking this library statically or dynamically with other modules is
  21  * making a combined work based on this library.  Thus, the terms and
  22  * conditions of the GNU General Public License cover the whole
  23  * combination.
  24  *
  25  * As a special exception, the copyright holders of this library give you
  26  * permission to link this library with independent modules to produce an
  27  * executable, regardless of the license terms of these independent
  28  * modules, and to copy and distribute the resulting executable under
  29  * terms of your choice, provided that you also meet, for each linked
  30  * independent module, the terms and conditions of the license of that
  31  * module.  An independent module is a module which is not derived from
  32  * or based on this library.  If you modify this library, you may extend
  33  * this exception to your version of the library, but you are not
  34  * obligated to do so.  If you do not wish to do so, delete this
  35  * exception statement from your version.
  36  */
  37 package org.dict.zip;
  38
  39 import org.junit.jupiter.api.Assumptions;
  40 import org.junit.jupiter.api.Test;
  41 import org.junit.jupiter.api.io.TempDir;
  42
  43 import java.io.BufferedReader;
  44 import java.io.BufferedWriter;
  45 import java.io.File;
  46 import java.io.FileInputStream;
  47 import java.io.FileOutputStream;
  48 import java.io.IOException;
  49 import java.io.InputStream;
  50 import java.io.InputStreamReader;
  51 import java.io.OutputStreamWriter;
  52 import java.io.PrintWriter;
  53 import java.io.RandomAccessFile;
  54 import java.nio.charset.StandardCharsets;
  55 import java.nio.file.Path;
  56 import java.nio.file.Paths;
  57 import java.util.Random;
  58 import java.util.concurrent.Executors;
  59 import java.util.function.Consumer;
  60 import java.util.zip.Deflater;
  61
  62 import static org.junit.jupiter.api.Assertions.assertEquals;
  63 import static org.junit.jupiter.api.Assertions.assertTrue;
  64
  65 public class DictZipFileTest {
  66
  67     private static final int BUF_LEN = 58315;
  68
  69     void prepareTextData(final Path outTextPath, final int size) throws IOException {
  70         Random random = new Random();
  71         File outTextFile = outTextPath.toFile();
  72         try (PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
  73                 new FileOutputStream(outTextFile), StandardCharsets.US_ASCII)), false)) {
  74             for (long i = 0; i < size; i++) {
  75                 int number = random.nextInt(94);
  76                 writer.print((char) (32 + number));
  77             }
  78         };
  79     }
  80
  81     void prepareLargeTextData(final Path outTextPath, final int size) throws IOException {
  82         Random random = new Random();
  83         File outTextFile = outTextPath.toFile();
  84         PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
  85                 new FileOutputStream(outTextFile), StandardCharsets.US_ASCII)), false);
  86         int outSize = 0;
  87         while (true) {
  88             for (int j = 0; j < 1000; j++) {
  89                 for (int i = 0; i < 99; i++) {
  90                     int number = random.nextInt(94);
  91                     writer.print((char) (32 + number));
  92                 }
  93                 writer.print("\n");
  94             }
  95             outSize += 1000 * 100;
  96             if (outSize >= size) {
  97                 writer.close();
  98                 break;
  99             }
 100         }
 101     }
 102
 103     /**
 104      * Test case to extract an archive file.
 105      * @param tempDir JUnit5.jupiter TempDir.
 106      * @throws IOException when i/o error occurred.
 107      * @throws InterruptedException when external dictzip not executed well.
 108      */
 109     @Test
 110     public void testFileReadAceess(@TempDir Path tempDir) throws IOException, InterruptedException {
 111         // Run test when running on Linux and dictzip command installed
 112         Assumptions.assumeTrue(Paths.get("/usr/bin/dictzip").toFile().exists());
 113         int size = 65536;  // 64kB
 114         byte[] buf = new byte[BUF_LEN];
 115         // create archive with dictzip command
 116         Path outTextPath = tempDir.resolve("DictZipText.txt");
 117         prepareTextData(outTextPath, size);
 118         File inputFile = outTextPath.toFile();
 119         assertEquals(size, inputFile.length());
 120         // get expectation
 121         try (RandomAccessInputStream is = new RandomAccessInputStream(new RandomAccessFile(inputFile, "r"))) {
 122             is.seek(size -2);
 123             int len = is.read(buf, 0, 1);
 124             assertEquals(1, len);
 125         }
 126         byte expected = buf[0];
 127         Process process = Runtime.getRuntime().exec(String.format("dictzip %s", outTextPath.toAbsolutePath()));
 128         int returnCode = process.waitFor();
 129         assertEquals(0, returnCode);
 130         File zippedFile = tempDir.resolve("DictZipText.txt.dz").toFile();
 131         // read dictZip archive
 132         try (DictZipInputStream din = new DictZipInputStream(new RandomAccessInputStream(new
 133                 RandomAccessFile(zippedFile, "r")))) {
 134             din.seek(size - 2);
 135             int len = din.read(buf, 0, 1);
 136             assertTrue(len > 0);
 137         }
 138         assertEquals(expected, buf[0]);
 139     }
 140
 141     /**
 142      * Test case to create large archive.
 143      */
 144     @Test
 145     public void testFileCreation(@TempDir Path tempDir) throws IOException, InterruptedException {
 146         // Run test when running on Linux and dictzip command installed
 147         Assumptions.assumeTrue(Paths.get("/usr/bin/dictzip").toFile().exists());
 148         int size = BUF_LEN * 512 + 100;
 149         byte[] buf = new byte[BUF_LEN];
 150         // create data
 151         Path outTextPath = tempDir.resolve("DictZipText.orig.txt");
 152         prepareTextData(outTextPath, size);
 153         File inputFile = outTextPath.toFile();
 154         Path zippedPath = tempDir.resolve("DictZipText.txt.dz");
 155         assertEquals(size, inputFile.length());
 156         // create dictZip archive
 157         int defLevel = Deflater.DEFAULT_COMPRESSION;
 158         try (FileInputStream ins = new FileInputStream(inputFile);
 159              DictZipOutputStream dout = new DictZipOutputStream(
 160                      new RandomAccessOutputStream(new RandomAccessFile(zippedPath.toFile(), "rws")),
 161                      BUF_LEN, inputFile.length(), defLevel)) {
 162             int len;
 163             while ((len = ins.read(buf, 0, BUF_LEN)) > 0) {
 164                 dout.write(buf, 0, len);
 165             }
 166             dout.finish();
 167         }
 168         Process process = Runtime.getRuntime().exec(
 169                 String.format("dictzip -d -f -k -v %s", zippedPath.toAbsolutePath()));
 170         StreamGobbler streamGobbler = new StreamGobbler(process.getInputStream(), System.out::println);
 171         Executors.newSingleThreadExecutor().submit(streamGobbler);
 172         int returnCode = process.waitFor();
 173         assertEquals(0, returnCode);
 174     }
 175
 176     /**
 177      * Test case to extract large archive file.
 178      * @param tempDir JUnit5.jupiter TempDir.
 179      * @throws IOException when i/o error occurred.
 180      * @throws InterruptedException when external dictzip not executed well.
 181      */
 182     @Test
 183     public void testLargeFileReadAceess(@TempDir Path tempDir) throws IOException, InterruptedException {
 184         // Run test when running on Linux and dictzip command installed
 185         Assumptions.assumeTrue(Paths.get("/usr/bin/dictzip").toFile().exists());
 186         int size = 45000000;  // about 45MB
 187         byte[] buf = new byte[BUF_LEN];
 188         // create archive with dictzip command
 189         Path outTextPath = tempDir.resolve("DictZipText.txt");
 190         prepareLargeTextData(outTextPath, size);
 191         File inputFile = outTextPath.toFile();
 192         assertEquals(size, inputFile.length());
 193         // get expectation
 194         try (RandomAccessInputStream is = new RandomAccessInputStream(new RandomAccessFile(inputFile, "r"))) {
 195             is.seek(size -2);
 196             int len = is.read(buf, 0, 1);
 197             assertEquals(1, len);
 198         }
 199         byte expected = buf[0];
 200         Process process = Runtime.getRuntime().exec(String.format("dictzip %s", outTextPath.toAbsolutePath()));
 201         int returnCode = process.waitFor();
 202         assertEquals(0, returnCode);
 203         File zippedFile = tempDir.resolve("DictZipText.txt.dz").toFile();
 204         // read dictZip archive
 205         try (DictZipInputStream din = new DictZipInputStream(new RandomAccessInputStream(new
 206                 RandomAccessFile(zippedFile, "r")))) {
 207             din.seek(size - 2);
 208             int len = din.read(buf, 0, 1);
 209             assertTrue(len > 0);
 210         }
 211         assertEquals(expected, buf[0]);
 212     }
 213
 214     /**
 215      * Test case to create large archive.
 216      */
 217     @Test
 218     public void testLargeFileCreation(@TempDir Path tempDir) throws IOException, InterruptedException {
 219         // Run test when running on Linux and dictzip command installed
 220         Assumptions.assumeTrue(Paths.get("/usr/bin/dictzip").toFile().exists());
 221         int size = 45000000;  // about 45MB
 222         byte[] buf = new byte[BUF_LEN];
 223         // create data
 224         Path outTextPath = tempDir.resolve("DictZipText.orig.txt");
 225         prepareLargeTextData(outTextPath, size);
 226         File inputFile = outTextPath.toFile();
 227         Path zippedPath = tempDir.resolve("DictZipText.txt.dz");
 228         assertEquals(size, inputFile.length());
 229         // create dictZip archive
 230         int defLevel = Deflater.DEFAULT_COMPRESSION;
 231         try (FileInputStream ins = new FileInputStream(inputFile);
 232              DictZipOutputStream dout = new DictZipOutputStream(
 233                      new RandomAccessOutputStream(new RandomAccessFile(zippedPath.toFile(), "rws")),
 234                      BUF_LEN, inputFile.length(), defLevel)) {
 235             int len;
 236             while ((len = ins.read(buf, 0, BUF_LEN)) > 0) {
 237                 dout.write(buf, 0, len);
 238             }
 239             dout.finish();
 240         }
 241         Process process = Runtime.getRuntime().exec(
 242                 String.format("dictzip -d -f -k -v %s", zippedPath.toAbsolutePath()));
 243         StreamGobbler streamGobbler = new StreamGobbler(process.getInputStream(), System.out::println);
 244         Executors.newSingleThreadExecutor().submit(streamGobbler);
 245         int returnCode = process.waitFor();
 246         assertEquals(0, returnCode);
 247     }
 248
 249     /**
 250      * Test case to reproduce issue #24.
 251      * <p>
 252      *     When seek to almost end of large dictionary, it cause error
 253      *     Caused by: java.util.zip.ZipException: invalid distance too far back
 254      * </p>
 255      */
 256     @Test
 257     public void testLargeFileInputOutput(@TempDir Path tempDir) throws IOException {
 258         int size = 45000000;  // about 45MB
 259         int num_chunk = size / BUF_LEN + 1;
 260         byte[] buf = new byte[BUF_LEN];
 261         int[] positions = new int[] {
 262                 BUF_LEN - 10,
 263                 BUF_LEN + 10,
 264                 BUF_LEN * 2 + 10,
 265                 BUF_LEN * 256 - 10,
 266                 BUF_LEN * 256 + 10,
 267                 BUF_LEN * (num_chunk /2 - 1) - 10,
 268                 BUF_LEN * (num_chunk /2 + 1) + 10,
 269                 size - BUF_LEN + 5
 270         };
 271         int cases = positions.length;
 272         byte[] expected = new byte[cases];
 273         int len;
 274         // create data
 275         Path outTextPath = tempDir.resolve("DictZipText.txt");
 276         prepareTextData(outTextPath, size);
 277         File inputFile = outTextPath.toFile();
 278         File zippedFile = tempDir.resolve("DictZipText.txt.dz").toFile();
 279         assertEquals(size, inputFile.length());
 280         // get expectations
 281         try (RandomAccessInputStream is = new RandomAccessInputStream(new RandomAccessFile(inputFile, "r"))) {
 282             for (int i = 0; i < cases; i++) {
 283                 is.seek(positions[i]);
 284                 len = is.read(buf, 0, buf.length);
 285                 assertTrue(len > 0);
 286                 expected[i] = buf[0];
 287             }
 288         }
 289         // create dictZip archive
 290         int defLevel = Deflater.DEFAULT_COMPRESSION;
 291         try (RandomAccessFile raf = new RandomAccessFile(zippedFile, "rws")) {
 292             try (FileInputStream ins = new FileInputStream(inputFile);
 293                  DictZipOutputStream dout = new DictZipOutputStream(new RandomAccessOutputStream(raf),
 294                          BUF_LEN, inputFile.length(), defLevel)) {
 295                 while ((len = ins.read(buf, 0, BUF_LEN)) > 0) {
 296                     dout.write(buf, 0, len);
 297                 }
 298                 dout.finish();
 299             }
 300             raf.seek(0);
 301             // read dictZip archive
 302             try (DictZipInputStream din = new DictZipInputStream(new RandomAccessInputStream(raf))) {
 303                 for (int i = 0; i < cases; i++) {
 304                     System.out.printf("seek position: %d%n", positions[i]);
 305                     din.seek(positions[i]);
 306                     len = din.read(buf, 0, 10);
 307                     assertTrue(len > 0);
 308                     assertEquals(expected[i], buf[0], String.format("Read data invalid at position %d", positions[i]));
 309                 }
 310             }
 311         }
 312     }
 313
 314     private static class StreamGobbler implements Runnable {
 315         private final InputStream inputStream;
 316         private final Consumer<String> consumer;
 317
 318         public StreamGobbler(InputStream inputStream, Consumer<String> consumer) {
 319             this.inputStream = inputStream;
 320             this.consumer = consumer;
 321         }
 322
 323         @Override
 324         public void run() {
 325             new BufferedReader(new InputStreamReader(inputStream)).lines()
 326               .forEach(consumer);
 327         }
 328     }
 329 }