--- /dev/null
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4: */
+// +----------------------------------------------------------------------+
+// | PHP Version 5 |
+// +----------------------------------------------------------------------+
+// | Copyright (c) 2010 Takahiro Ooishi |
+// +----------------------------------------------------------------------+
+// | This source file is subject to version 3.0 of the PHP license, |
+// | that is bundled with this package in the file LICENSE, and is |
+// | available through the world-wide-web at the following url: |
+// | http://www.php.net/license/3_0.txt. |
+// | If you did not receive a copy of the PHP license and are unable to |
+// | obtain it through the world-wide-web, please send a note to |
+// | license@php.net so we can mail you a copy immediately. |
+// +----------------------------------------------------------------------+
+// | Author: Takahiro Ooishi <taka0125.biz@gmail.com> |
+// +----------------------------------------------------------------------+
+//
+// $Id$
+
+class Converter_Factory
+{
+ static function create($extension)
+ {
+ if ($extension === 'pdf') return new Converter_Pdf();
+ throw new Exception("Converterer not found. [extension = $extension]");
+ }
+}
--- /dev/null
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4: */
+// +----------------------------------------------------------------------+
+// | PHP Version 5 |
+// +----------------------------------------------------------------------+
+// | Copyright (c) 2010 Takahiro Ooishi |
+// +----------------------------------------------------------------------+
+// | This source file is subject to version 3.0 of the PHP license, |
+// | that is bundled with this package in the file LICENSE, and is |
+// | available through the world-wide-web at the following url: |
+// | http://www.php.net/license/3_0.txt. |
+// | If you did not receive a copy of the PHP license and are unable to |
+// | obtain it through the world-wide-web, please send a note to |
+// | license@php.net so we can mail you a copy immediately. |
+// +----------------------------------------------------------------------+
+// | Author: Takahiro Ooishi <taka0125.biz@gmail.com> |
+// +----------------------------------------------------------------------+
+//
+// $Id$
+
+class Converter_Pdf extends Converter_Template
+{
+ protected $inputFile;
+ protected $outputFile;
+
+ function __construct()
+ {
+ $this->inputFile = tempnam('/tmp', 'convert_pdf_input_');
+ $this->outputFile = tempnam('/tmp', 'convert_pdf_output_');
+ }
+
+ function __destruct()
+ {
+ @unlink($this->inputFile);
+ @unlink($this->outputFile);
+ }
+
+ function toText($string)
+ {
+ return $this->convert($string);
+ }
+
+ function toHtml($string)
+ {
+ return $this->convert($string, true);
+ }
+
+ protected function isInstalledUseCommand()
+ {
+ $check_command = 'which pdftotext 1>/dev/null 2>/dev/null';
+ exec($check_command, $output, $status);
+ if ($status) return false;
+ return true;
+ }
+
+ protected function convert($string, $to_html = false)
+ {
+ file_put_contents($this->inputFile, $string);
+
+ $option = '';
+ if ($to_html) {
+ $option = '-htmlmeta';
+ }
+ $command = "pdftotext {$option} -enc UTF-8 -nopgbrk -eol unix {$this->inputFile} {$this->outputFile} 2>&1";
+
+ exec($command, $output, $status);
+ if ($status) throw new Exception("command error.[$command]\nstatus={$status}\noutput=".implode($output));
+
+ return file_get_contents($this->outputFile);
+ }
+}
--- /dev/null
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4: */
+// +----------------------------------------------------------------------+
+// | PHP Version 5 |
+// +----------------------------------------------------------------------+
+// | Copyright (c) 2010 Takahiro Ooishi |
+// +----------------------------------------------------------------------+
+// | This source file is subject to version 3.0 of the PHP license, |
+// | that is bundled with this package in the file LICENSE, and is |
+// | available through the world-wide-web at the following url: |
+// | http://www.php.net/license/3_0.txt. |
+// | If you did not receive a copy of the PHP license and are unable to |
+// | obtain it through the world-wide-web, please send a note to |
+// | license@php.net so we can mail you a copy immediately. |
+// +----------------------------------------------------------------------+
+// | Author: Takahiro Ooishi <taka0125.biz@gmail.com> |
+// +----------------------------------------------------------------------+
+//
+// $Id$
+
+abstract class Converter_Template
+{
+ abstract function toText($string);
+ abstract function toHtml($string);
+ abstract protected function isInstalledUseCommand();
+
+ function __construct()
+ {
+ // コンバートかける度にチェックするのでオーバーヘッドになる。
+ // コマンドが実行できなくなる可能性は低いのでチェックを飛ばしても
+ // 運用上問題ないかもしれない。
+ if (!$this->isInstalledUseCommand()) throw new Exception('essential command is not installed.');
+ }
+}
--- /dev/null
+<?php
+/* vim: set expandtab tabstop=4 shiftwidth=4: */
+// +----------------------------------------------------------------------+
+// | PHP Version 5 |
+// +----------------------------------------------------------------------+
+// | Copyright (c) 2010 Takahiro Ooishi |
+// +----------------------------------------------------------------------+
+// | This source file is subject to version 3.0 of the PHP license, |
+// | that is bundled with this package in the file LICENSE, and is |
+// | available through the world-wide-web at the following url: |
+// | http://www.php.net/license/3_0.txt. |
+// | If you did not receive a copy of the PHP license and are unable to |
+// | obtain it through the world-wide-web, please send a note to |
+// | license@php.net so we can mail you a copy immediately. |
+// +----------------------------------------------------------------------+
+// | Author: Takahiro Ooishi <taka0125.biz@gmail.com> |
+// +----------------------------------------------------------------------+
+//
+// $Id$
+
+require_once dirname(__FILE__) . '/Template.class.php';
+require_once dirname(__FILE__) . '/Pdf.class.php';
+require_once dirname(__FILE__) . '/Factory.class.php';
--- /dev/null
+<?php
+require_once dirname(__FILE__) . '/../../lib/Converter/require.php';
+
+$file = dirname(__FILE__) . '/data/test.pdf';
+
+try {
+
+ $converter = Converter_Factory::create('pdf');
+
+ $data = file_get_contents($file);
+ $string = $converter->toText($data);
+ $html = $converter->toHtml($data);
+
+ var_dump($string);
+ var_dump($html);
+
+} catch (Exception $e) {
+ echo $e->getMessage() . "\n";
+ exit(1);
+}
+