OSDN Git Service

[add] initialize master
authorUser <taka0125.biz@gmail.com>
Sat, 23 Jan 2010 17:56:42 +0000 (02:56 +0900)
committerUser <taka0125.biz@gmail.com>
Sat, 23 Jan 2010 17:56:42 +0000 (02:56 +0900)
lib/Converter/Factory.class.php [new file with mode: 0644]
lib/Converter/Pdf.class.php [new file with mode: 0644]
lib/Converter/Template.class.php [new file with mode: 0644]
lib/Converter/require.php [new file with mode: 0644]
test/Converter/data/test.pdf [new file with mode: 0644]
test/Converter/pdf_test.php [new file with mode: 0644]

diff --git a/lib/Converter/Factory.class.php b/lib/Converter/Factory.class.php
new file mode 100644 (file)
index 0000000..6780b94
--- /dev/null
@@ -0,0 +1,28 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4: */
+// +----------------------------------------------------------------------+
+// | PHP Version 5                                                        |
+// +----------------------------------------------------------------------+
+// | Copyright (c) 2010 Takahiro Ooishi                                   |
+// +----------------------------------------------------------------------+
+// | This source file is subject to version 3.0 of the PHP license,       |
+// | that is bundled with this package in the file LICENSE, and is        |
+// | available through the world-wide-web at the following url:           |
+// | http://www.php.net/license/3_0.txt.                                  |
+// | If you did not receive a copy of the PHP license and are unable to   |
+// | obtain it through the world-wide-web, please send a note to          |
+// | license@php.net so we can mail you a copy immediately.               |
+// +----------------------------------------------------------------------+
+// | Author: Takahiro Ooishi <taka0125.biz@gmail.com>                     |
+// +----------------------------------------------------------------------+
+//
+// $Id$
+
+class Converter_Factory
+{
+    static function create($extension)
+    {
+        if ($extension === 'pdf') return new Converter_Pdf();
+        throw new Exception("Converterer not found. [extension = $extension]");
+    }
+}
diff --git a/lib/Converter/Pdf.class.php b/lib/Converter/Pdf.class.php
new file mode 100644 (file)
index 0000000..93518d0
--- /dev/null
@@ -0,0 +1,71 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4: */
+// +----------------------------------------------------------------------+
+// | PHP Version 5                                                        |
+// +----------------------------------------------------------------------+
+// | Copyright (c) 2010 Takahiro Ooishi                                   |
+// +----------------------------------------------------------------------+
+// | This source file is subject to version 3.0 of the PHP license,       |
+// | that is bundled with this package in the file LICENSE, and is        |
+// | available through the world-wide-web at the following url:           |
+// | http://www.php.net/license/3_0.txt.                                  |
+// | If you did not receive a copy of the PHP license and are unable to   |
+// | obtain it through the world-wide-web, please send a note to          |
+// | license@php.net so we can mail you a copy immediately.               |
+// +----------------------------------------------------------------------+
+// | Author: Takahiro Ooishi <taka0125.biz@gmail.com>                     |
+// +----------------------------------------------------------------------+
+//
+// $Id$
+
+class Converter_Pdf extends Converter_Template
+{
+    protected $inputFile;
+    protected $outputFile;
+
+    function __construct()
+    {
+        $this->inputFile = tempnam('/tmp', 'convert_pdf_input_');
+        $this->outputFile = tempnam('/tmp', 'convert_pdf_output_');
+    }
+
+    function __destruct()
+    {
+        @unlink($this->inputFile);
+        @unlink($this->outputFile);
+    }
+
+    function toText($string)
+    {
+        return $this->convert($string);
+    }
+
+    function toHtml($string)
+    {
+        return $this->convert($string, true);
+    }
+
+    protected function isInstalledUseCommand()
+    {
+        $check_command = 'which pdftotext 1>/dev/null 2>/dev/null';
+        exec($check_command, $output, $status);
+        if ($status) return false;
+        return true;
+    }
+
+    protected function convert($string, $to_html = false)
+    {
+        file_put_contents($this->inputFile, $string);
+
+        $option = '';
+        if ($to_html) {
+            $option = '-htmlmeta';
+        }
+        $command = "pdftotext {$option} -enc UTF-8 -nopgbrk -eol unix {$this->inputFile} {$this->outputFile} 2>&1";
+
+        exec($command, $output, $status);
+        if ($status) throw new Exception("command error.[$command]\nstatus={$status}\noutput=".implode($output));
+
+        return file_get_contents($this->outputFile);
+    }
+}
diff --git a/lib/Converter/Template.class.php b/lib/Converter/Template.class.php
new file mode 100644 (file)
index 0000000..4f55b53
--- /dev/null
@@ -0,0 +1,34 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4: */
+// +----------------------------------------------------------------------+
+// | PHP Version 5                                                        |
+// +----------------------------------------------------------------------+
+// | Copyright (c) 2010 Takahiro Ooishi                                   |
+// +----------------------------------------------------------------------+
+// | This source file is subject to version 3.0 of the PHP license,       |
+// | that is bundled with this package in the file LICENSE, and is        |
+// | available through the world-wide-web at the following url:           |
+// | http://www.php.net/license/3_0.txt.                                  |
+// | If you did not receive a copy of the PHP license and are unable to   |
+// | obtain it through the world-wide-web, please send a note to          |
+// | license@php.net so we can mail you a copy immediately.               |
+// +----------------------------------------------------------------------+
+// | Author: Takahiro Ooishi <taka0125.biz@gmail.com>                     |
+// +----------------------------------------------------------------------+
+//
+// $Id$
+
+abstract class Converter_Template
+{
+    abstract function toText($string);
+    abstract function toHtml($string);
+    abstract protected function isInstalledUseCommand();
+
+    function __construct()
+    {
+        // コンバートかける度にチェックするのでオーバーヘッドになる。
+        // コマンドが実行できなくなる可能性は低いのでチェックを飛ばしても
+        // 運用上問題ないかもしれない。
+        if (!$this->isInstalledUseCommand()) throw new Exception('essential command is not installed.');
+    }
+}
diff --git a/lib/Converter/require.php b/lib/Converter/require.php
new file mode 100644 (file)
index 0000000..69a4b25
--- /dev/null
@@ -0,0 +1,23 @@
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4: */
+// +----------------------------------------------------------------------+
+// | PHP Version 5                                                        |
+// +----------------------------------------------------------------------+
+// | Copyright (c) 2010 Takahiro Ooishi                                   |
+// +----------------------------------------------------------------------+
+// | This source file is subject to version 3.0 of the PHP license,       |
+// | that is bundled with this package in the file LICENSE, and is        |
+// | available through the world-wide-web at the following url:           |
+// | http://www.php.net/license/3_0.txt.                                  |
+// | If you did not receive a copy of the PHP license and are unable to   |
+// | obtain it through the world-wide-web, please send a note to          |
+// | license@php.net so we can mail you a copy immediately.               |
+// +----------------------------------------------------------------------+
+// | Author: Takahiro Ooishi <taka0125.biz@gmail.com>                     |
+// +----------------------------------------------------------------------+
+//
+// $Id$
+
+require_once dirname(__FILE__) . '/Template.class.php';
+require_once dirname(__FILE__) . '/Pdf.class.php';
+require_once dirname(__FILE__) . '/Factory.class.php';
diff --git a/test/Converter/data/test.pdf b/test/Converter/data/test.pdf
new file mode 100644 (file)
index 0000000..d097335
Binary files /dev/null and b/test/Converter/data/test.pdf differ
diff --git a/test/Converter/pdf_test.php b/test/Converter/pdf_test.php
new file mode 100644 (file)
index 0000000..0e36603
--- /dev/null
@@ -0,0 +1,21 @@
+<?php
+require_once dirname(__FILE__) . '/../../lib/Converter/require.php';
+
+$file = dirname(__FILE__) . '/data/test.pdf';
+
+try {
+
+    $converter = Converter_Factory::create('pdf');
+
+    $data = file_get_contents($file);
+    $string = $converter->toText($data);
+    $html = $converter->toHtml($data);
+
+    var_dump($string);
+    var_dump($html);
+
+} catch (Exception $e) {
+    echo $e->getMessage() . "\n";
+    exit(1);
+}
+