From e4731ba1cc7014a3072114a05be0f6d2373792c7 Mon Sep 17 00:00:00 2001 From: wordring Date: Tue, 1 Sep 2015 06:14:27 +0900 Subject: [PATCH] =?utf8?q?HTTP=E3=83=AA=E3=82=AF=E3=82=A8=E3=82=B9?= =?utf8?q?=E3=83=88=E3=83=91=E3=83=BC=E3=82=B5=E3=81=8C=E3=83=95=E3=82=A3?= =?utf8?q?=E3=83=BC=E3=83=AB=E3=83=89=E5=80=A4=E3=81=AE=E3=82=AF=E3=82=A9?= =?utf8?q?=E3=83=BC=E3=83=88=E3=81=A7=E9=81=B7=E7=A7=BB=E3=82=92=E9=96=93?= =?utf8?q?=E9=81=95=E3=81=88=E3=81=A6=E3=81=84=E3=81=9F=E5=95=8F=E9=A1=8C?= =?utf8?q?=E3=82=92=E4=BF=AE=E6=AD=A3=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- http/httprequest.cpp | 4 +- proxy/image/edit-find.png | Bin 0 -> 1517 bytes proxy/main.cpp | 48 ++- proxy/mainwindow.cpp | 6 + proxy/mainwindow.h | 2 + proxy/proxy.qrc | 1 + proxy/tmcandidate.h | 16 +- proxy/tmcandidatewidget.cpp | 93 +++-- proxy/tmcandidatewidget.h | 33 +- proxy/tmdatabase.cpp | 259 ++++++++----- proxy/tmdatabase.h | 23 +- proxy/tmeditorwidget.cpp | 56 +-- proxy/tmeditorwidget.h | 15 +- proxy/tmservice.cpp | 87 ++++- proxy/tmservice.h | 26 +- proxy/tmsocket.cpp | 47 ++- proxy/tmsocket.h | 15 +- proxy/tmtext.cpp | 42 ++- proxy/tmtext.h | 30 +- third-party/include/dtl/Diff.hpp | 692 ++++++++++++++++++++++++++++++++++ third-party/include/dtl/Diff3.hpp | 245 ++++++++++++ third-party/include/dtl/Lcs.hpp | 55 +++ third-party/include/dtl/Sequence.hpp | 65 ++++ third-party/include/dtl/Ses.hpp | 132 +++++++ third-party/include/dtl/dtl.hpp | 47 +++ third-party/include/dtl/functors.hpp | 137 +++++++ third-party/include/dtl/variables.hpp | 142 +++++++ 27 files changed, 2057 insertions(+), 261 deletions(-) create mode 100644 proxy/image/edit-find.png create mode 100644 third-party/include/dtl/Diff.hpp create mode 100644 third-party/include/dtl/Diff3.hpp create mode 100644 third-party/include/dtl/Lcs.hpp create mode 100644 third-party/include/dtl/Sequence.hpp create mode 100644 third-party/include/dtl/Ses.hpp create mode 100644 third-party/include/dtl/dtl.hpp create mode 100644 third-party/include/dtl/functors.hpp create mode 100644 third-party/include/dtl/variables.hpp diff --git a/http/httprequest.cpp b/http/httprequest.cpp index bb671e5..424c1ff 100644 --- a/http/httprequest.cpp +++ b/http/httprequest.cpp @@ -282,7 +282,7 @@ int HttpRequestParser::parse(char ch) case 221: if(ch == '\r') goto s260; if(ch == ' ' || ch == '\t') break; - case 230: s230: LOOPCHECK m_state0 = 230; + case 230: LOOPCHECK m_state0 = 230; if(ch == '"') goto s240; if(ch == '\r') goto s260; if(isCTL(ch)) goto sError; @@ -292,7 +292,7 @@ int HttpRequestParser::parse(char ch) case 240: s240: LOOPCHECK m_state0 = 241; break; case 241: - if(ch == '"') goto s230; + if(ch == '"') m_state0 = 230; break; if(ch == '\\') goto s250; if(isCTL(ch)) goto sError; m_request->m_attributes.last().second.append(ch); diff --git a/proxy/image/edit-find.png b/proxy/image/edit-find.png new file mode 100644 index 0000000000000000000000000000000000000000..8cc0349cabc8a2eac103f627a7b0b3aaba7b7ead GIT binary patch literal 1517 zcmV2#(;X$y3xg|Jjqpa}$J z3(~}d2StcH@FI~{6NwQe#upQLaY0Bx7J-N&i71QOv_PYv7%Y~SE~cF>Gjz(#w0G`y z7GH{Cso}hwKM} z<)DYb^*hdn6bT>n60&wttk^TFz=y(K9b`!WMF45j6xRkut)csqKRb-S{`aAldrt+| zZ9lzI#^~(A;$q+Y!UE;jKU-m-?*_}{l1P#jh=xmG>9SR5d@R!8NZQEtE0peW84AXmWfQ+W$C56&*$icCTZa8^#EMi#+9&;l;qDlDBrhEh$P6 z_I=krX){*YeQay$aWDn|kOVzg6$pAJC&r-duk+OBG0)5m-v4kxnR@Px?JWZ*4t*Ry z@yWaH#^n|JHPt?|_oH`cB9Va23#vR4cfXv;SUT1jU9BRtXK_(f|LOO0PEgC8ZYWbP zR9Dv~=glimZQ9gQT~;=GQ#2ZDTCse2nJ(Gqdj=At9XD?@Y+SKH9!REUy?>+;EuMnT&8SzVEm$F^l5yiC8R}jYhn;(zz5SK)|JTP5Fj>=n-Sa9KL*&PP?va3U5{>vgBY&2=jV979om52m#x6K)4PPf+GO!LhA~bEF)_*Iq(mY?nwpwOE*F$!*}xWY@RVUF zIfFj$(<4eUlN8jk+MmdnbABn28O?h&f;@6<$(5V(&WW*XfH<0zVvIPBqi~*98Dk`! zrgA1T<^A?*&pOkF#*%_++?N{2Gfsbe0zA^T|B}O;UwTrVwv7E)rWM=1zI=bz;K)db zb51zVDx6~lp}b-k6aLRnb-Xk;`J*&z2q9FmV%G+Q@Oq5!3NAzq03Zy|$EbDP$xmEC zgBLx7kbSZ8dDtHedFx6Gpt`mO4t#OgHS*?~#Axc;BVdFO0ssU65&$Rwpdc*kfUz#9 zqX2*g04e~eUXQ-e@74GE0zndvM7>KIDj^z+!O`Oport()); - TM::SocketServer *socket = new TM::SocketServer(&settings, service, w.editor_widget(), &w); + TM::SocketServer *socket = new TM::SocketServer(&settings, service, &w); server->install(new TM::DefaultHtmlModule(&settings, server->port(), server)); server->install(new TM::ProxyModule( diff --git a/proxy/mainwindow.cpp b/proxy/mainwindow.cpp index a876411..2e4e7f7 100644 --- a/proxy/mainwindow.cpp +++ b/proxy/mainwindow.cpp @@ -1,6 +1,7 @@ #include "mainwindow.h" #include "tmeditorwidget.h" #include "tmcandidatewidget.h" +#include #include "settings.h" @@ -34,6 +35,9 @@ MainWindow::MainWindow(Settings *settings, TM::Service *service, QWidget *parent // 設定の復帰 restoreGeometry(m_settings->value("MainWindow/geometry").toByteArray()); restoreState(m_settings->value("MainWindow/state").toByteArray()); + + service->set_editor_widget(m_editor_widget); + service->set_candidate_widget(m_candidate_widget); } MainWindow::~MainWindow() @@ -44,6 +48,8 @@ MainWindow::~MainWindow() TM::EditorWidget* MainWindow::editor_widget() { return m_editor_widget; } +TM::CandidateWidget* MainWindow::candidate_widget() { return m_candidate_widget; } + void MainWindow::set_http_port(quint16 http_port) { editor_widget()->set_http_port(http_port); diff --git a/proxy/mainwindow.h b/proxy/mainwindow.h index 1d1d85b..37f892b 100644 --- a/proxy/mainwindow.h +++ b/proxy/mainwindow.h @@ -26,6 +26,8 @@ public: ~MainWindow(); TM::EditorWidget* editor_widget(); + TM::CandidateWidget* candidate_widget(); + void set_http_port(quint16 http_port); signals: diff --git a/proxy/proxy.qrc b/proxy/proxy.qrc index 7691451..216d92e 100644 --- a/proxy/proxy.qrc +++ b/proxy/proxy.qrc @@ -4,5 +4,6 @@ image/insert-link.png image/document-properties.png image/web-browser.png + image/edit-find.png diff --git a/proxy/tmcandidate.h b/proxy/tmcandidate.h index 749fc76..b209774 100644 --- a/proxy/tmcandidate.h +++ b/proxy/tmcandidate.h @@ -1,15 +1,19 @@ -#ifndef TMCANDIDATE_H +#ifndef TMCANDIDATE_H #define TMCANDIDATE_H -class tmcandidate +namespace TM { -public: - tmcandidate(); -signals: +/*! + * \brief 候補文のためのデータ構造です。 + */ +class candidate_data_type_ +{ -public slots: }; + +} // namespace TM + #endif // TMCANDIDATE_H diff --git a/proxy/tmcandidatewidget.cpp b/proxy/tmcandidatewidget.cpp index e9cf995..972c732 100644 --- a/proxy/tmcandidatewidget.cpp +++ b/proxy/tmcandidatewidget.cpp @@ -1,42 +1,83 @@ - +#include "tmcandidatewidget.h" #include "settings.h" #include "tmservice.h" -#include "tmcandidatewidget.h" - +#include "tmsocket.h" +#include +#include +#include #include #include TM::CandidateWidget::CandidateWidget(Settings *settings, Service *service, QWidget *parent) - : QListWidget(parent) + : QWidget(parent) + , m_service(service) + , m_settings(settings) + , m_connection(nullptr) { - addItem("test\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r"); - addItem("test\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r"); - addItem("test\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r"); - addItem("test\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r"); - addItem("test\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r"); - /* - setVerticalScrollBarPolicy(Qt::ScrollBarAlwaysOn); - setWidgetResizable(false); - //new CandidateArea(this); QVBoxLayout *vlayout = new QVBoxLayout(this); vlayout->setSpacing(4); vlayout->setContentsMargins(0, 0, 0, 0); - vlayout->setSizeConstraint(QLayout::SetMinAndMaxSize); - - QLabel *l = new QLabel("test\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na\r\n\r\na", this); - l->setAlignment(Qt::AlignLeft | Qt::AlignTop); - l->setFrameStyle(QFrame::Panel); - //l->setSizePolicy(QSizePolicy(QSizePolicy::Minimum, QSizePolicy::Expanding)); - vlayout->addWidget(l); - QLabel *l2 = new QLabel("test", this); - l2->setAlignment(Qt::AlignLeft | Qt::AlignTop); - vlayout->addWidget(l2); - */ + + m_toolbar = new QToolBar("candidate", this); + + m_search = new QAction(QIcon(":/search.png"), "search", this); + m_toolbar->addAction(m_search); + + m_list_widget = new QListWidget(this); + m_list_widget->setWordWrap(true); + + vlayout->addWidget(m_toolbar); + vlayout->addWidget(m_list_widget); + + connect(m_search, SIGNAL(triggered(bool)), this, SLOT(onSearchTriggered(bool))); } +void TM::CandidateWidget::attach(SocketConnection *connection) +{ + if(m_connection == connection) return; + if(m_connection) detach(connection); -TM::CandidateArea::CandidateArea(QWidget *parent) - : QWidget(parent) + + m_connection = connection; +} + +void TM::CandidateWidget::detach(SocketConnection *connection) { + if(m_connection != connection) return; + + clear(); + m_connection = nullptr; } + +void TM::CandidateWidget::clear() { m_list_widget->clear(); } + +void TM::CandidateWidget::append(QString text) +{ + qDebug() << text; + QListWidgetItem *wi = new QListWidgetItem(m_list_widget); + wi->setText(text); + //QLabel *l = new QLabel(m_list_widget); + //l->setTextFormat(Qt::RichText); + //l->setWordWrap(true); + //l->setText(text); + m_list_widget->addItem(wi); + //m_list_widget->setItemWidget(wi, l); +} + +void TM::CandidateWidget::onSearchTriggered(bool) +{ + clear(); + if(!m_connection) return; + TextSentence::pointer sentence = + m_connection->segment_list()->current_sentence(); + if(!sentence) return; + + m_service->find_candidates(sentence); +} + + + + + + diff --git a/proxy/tmcandidatewidget.h b/proxy/tmcandidatewidget.h index e3ca413..0a2e005 100644 --- a/proxy/tmcandidatewidget.h +++ b/proxy/tmcandidatewidget.h @@ -2,8 +2,13 @@ #define TMCANDIDATEWIDGET_H #include -#include -#include +#include + +QT_BEGIN_NAMESPACE +class QToolBar; +class QAction; +class QListWidget; +QT_END_NAMESPACE class Settings; @@ -11,26 +16,34 @@ namespace TM { class Service; +class SocketConnection; -class CandidateWidget : public QListWidget +class CandidateWidget : public QWidget { Q_OBJECT public: CandidateWidget(Settings *settings, Service *service, QWidget *parent = 0); + void attach(SocketConnection *connection); + void detach(SocketConnection *connection); + + void clear(); + void append(QString text); signals: public slots: + void onSearchTriggered(bool); private: - ; -}; + Settings *m_settings; + Service *m_service; -class CandidateArea : public QWidget -{ - Q_OBJECT -public: - CandidateArea(QWidget *parent); + SocketConnection *m_connection; + + QToolBar *m_toolbar; + QAction *m_search; + + QListWidget *m_list_widget; }; } // namespace TM diff --git a/proxy/tmdatabase.cpp b/proxy/tmdatabase.cpp index efddaf0..fc3a5c5 100644 --- a/proxy/tmdatabase.cpp +++ b/proxy/tmdatabase.cpp @@ -2,6 +2,8 @@ #include "tmdatabase.h" #include "tmservice.h" +#include "dtl/dtl.hpp" + #include #include #include @@ -9,6 +11,10 @@ #include #include +#include + +#include + #include "debug.h" // DatabaseBase --------------------------------------------------------------- @@ -263,6 +269,7 @@ TM::SentenceDatabase::SentenceDatabase(Settings *settings, int site_id, QString m_find_sentence_by_crc = prepare("SELECT * FROM sentences WHERE crc=?;", Q_FUNC_INFO); m_find_sentence_by_source_id = prepare("SELECT * FROM sentences WHERE source_id=?;", Q_FUNC_INFO); + m_find_sentence = prepare("SELECT * FROM sentences WHERE sentence_id=?;", Q_FUNC_INFO); m_insert_sentence = prepare( "INSERT OR REPLACE INTO sentences(" "source_id, sentence, json, crc, previous_crc, next_crc, user_id, time) " @@ -308,18 +315,22 @@ quint32 TM::SentenceDatabase::find_sentence_id_with_context( return m_find_sentence_id_with_context.value(0).toUInt(); } +TM::sentence_data_type TM::SentenceDatabase::find_sentence(quint32 sentence_id) +{ + assert(sentence_id); + m_find_sentence.bindValue(0, sentence_id); + exec(m_find_sentence, Q_FUNC_INFO); + return stuff_value(&m_find_sentence); +} + TM::sentence_data_type TM::SentenceDatabase::find_sentence_by_source_id(int source_id) { assert(source_id); - sentence_data_type result; - m_find_sentence_by_source_id.bindValue(0, source_id); exec(m_find_sentence_by_source_id, Q_FUNC_INFO); - if(!m_find_sentence_by_source_id.next()) return result; - return stuff_value(&m_find_sentence_by_source_id); } @@ -372,9 +383,10 @@ void TM::SentenceDatabase::remove(quint32 source_id) TM::sentence_data_type TM::SentenceDatabase::stuff_value(QSqlQuery *query) { - assert(query->isValid()); sentence_data_type result; + if(!query->next()) return result; + result.sentence_id = query->value(0).toUInt(); result.source_id = query->value(1).toUInt(); result.sentence = query->value(2).toString(); @@ -404,11 +416,6 @@ TM::index_data_type::index_data_type() { } -TM::index_data_type::pointer TM::index_data_type::create() -{ - return pointer(new index_data_type()); -} - TM::IndexDatabase::IndexDatabase(Settings *settings, QString dbname) { m_index_limit = 20 * 1024; @@ -421,117 +428,66 @@ TM::IndexDatabase::IndexDatabase(Settings *settings, QString dbname) // テーブル作成。 exec("CREATE TABLE IF NOT EXISTS indexes(" "index_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL," - "word_id INTEGER UNIQUE NOT NULL," - "sentence_ids BLOB," - "is_stop_word BOOLEAN);", Q_FUNC_INFO); + "word_id INTEGER NOT NULL," + "source_id INTEGER NOT NULL," + "UNIQUE(word_id, source_id));", Q_FUNC_INFO); exec("CREATE INDEX IF NOT EXISTS word_id_index ON indexes(word_id);", Q_FUNC_INFO); + exec("CREATE INDEX IF NOT EXISTS source_id_index ON indexes(source_id);", Q_FUNC_INFO); // クエリ作成。 m_find_index = prepare( - "SELECT * FROM indexes WHERE word_id=?;", Q_FUNC_INFO); + "SELECT source_id FROM indexes WHERE word_id=?;", Q_FUNC_INFO); m_insert_index = prepare( - "INSERT INTO indexes(word_id, sentence_ids, is_stop_word) " - "VALUES(?, ?, ?);", Q_FUNC_INFO); + "INSERT OR REPLACE INTO indexes(word_id, source_id) VALUES(?, ?);", Q_FUNC_INFO); m_update_index = prepare( - "UPDATE indexes SET " - "word_id=?," - "sentence_ids=?," - "is_stop_word=? " - "WHERE index_id=?;", Q_FUNC_INFO); + "UPDATE indexes SET word_id=?, source_id=? " + "WHERE word_id=? AND source_id=?;", Q_FUNC_INFO); + m_remove_index = prepare( + "DELETE FROM indexes WHERE word_id=? AND source_id=?;", Q_FUNC_INFO); } -TM::index_data_type::pointer TM::IndexDatabase::find_index(quint32 word_id) +QSet TM::IndexDatabase::find_index(quint32 word_id) { assert(word_id); m_find_index.bindValue(0, word_id); exec(m_find_index, Q_FUNC_INFO); - index_data_type::pointer result; - - if(m_find_index.next()) + QSet result; + while(m_find_index.next()) { - result = index_data_type::create(); - result->index_id = m_find_index.value(0).toUInt(); - result->word_id = m_find_index.value(1).toUInt(); - QByteArray sentence_ids = m_find_index.value(2).toByteArray(); - QDataStream ds(sentence_ids); - while(!ds.atEnd()) - { - quint32 sentence_id; - ds >> sentence_id; - result->sentence_ids.insert(sentence_id); - } - result->is_stop_word = m_find_index.value(3).toBool(); + quint32 source_id = m_find_index.value(0).toUInt(); + result.insert(source_id); } - return result; } -void TM::IndexDatabase::insert_index(WordDatabase::pointer word_database, - sentence_data_type const &sdata, quint32 target_id) +void TM::IndexDatabase::insert_index(quint32 word_id, quint32 source_id) { - for(QString word : sdata.words) - { - if(word.size() <= 2) continue; - - int word_id = 0; - word_id = word_database->word_id(word); - assert(word_id); - - index_data_type::pointer index_data = find_index(word_id); - if(index_data) - { - index_data->sentence_ids.insert(target_id); - // レコードが大きくなりすぎた場合、ストップワードとする。 - if(m_index_limit < index_data->sentence_ids.size()) - { - index_data->sentence_ids.clear(); - index_data->is_stop_word = true; - } - update_index(index_data); - } - else - { - index_data = index_data_type::create(); - index_data->word_id = word_id; - index_data->sentence_ids.insert(target_id); - index_data->is_stop_word = false; - insert_index(index_data); - } - } -} - -void TM::IndexDatabase::insert_index(index_data_type::pointer index_data) -{ - assert(index_data); - - QByteArray sentence_ids; - QDataStream ds(&sentence_ids, QIODevice::WriteOnly); - for(quint32 sentence_id : index_data->sentence_ids) ds << sentence_id; + assert(word_id); + assert(source_id); - m_insert_index.bindValue(0, index_data->word_id); - m_insert_index.bindValue(1, sentence_ids); - m_insert_index.bindValue(2, index_data->is_stop_word); + m_insert_index.bindValue(0, word_id); + m_insert_index.bindValue(1, source_id); exec(m_insert_index, Q_FUNC_INFO); } -void TM::IndexDatabase::update_index(index_data_type::pointer index_data) +void TM::IndexDatabase::remove_index(quint32 word_id, quint32 source_id) { - assert(index_data); + assert(word_id); + assert(source_id); + + qDebug() << "word" << word_id << "source" << source_id; + qDebug() << "before remove" << find_index(word_id); - QByteArray sentence_ids; - QDataStream ds(&sentence_ids, QIODevice::WriteOnly); - for(quint32 sentence_id : index_data->sentence_ids) ds << sentence_id; + m_remove_index.bindValue(0, word_id); + m_remove_index.bindValue(1, source_id); + exec(m_remove_index, Q_FUNC_INFO); - m_update_index.bindValue(0, index_data->word_id); - m_update_index.bindValue(1, sentence_ids); - m_update_index.bindValue(2, index_data->is_stop_word); - m_update_index.bindValue(3, index_data->index_id); + qDebug() << "after remove" << find_index(word_id); - exec(m_update_index, Q_FUNC_INFO); } TM::IndexDatabase::pointer TM::IndexDatabase::create(Settings *settings, QString dbname) @@ -711,12 +667,16 @@ void TM::Database::insert_sentence(sentence_data_type source, // 単語とセンテンスの索引を付ける。 // 単語の登録。 - IndexDatabase::pointer index_database = + IndexDatabase::pointer idb = find_index_database(site_id, target.scode, target.tcode); - assert(index_database); - WordDatabase::pointer word_database = find_word_database(target.scode); - assert(word_database); - index_database->insert_index(word_database, source, target_id); + assert(idb); + WordDatabase::pointer wdb = find_word_database(target.scode); + assert(wdb); + for(QString word : source.words) + { + quint32 word_id = wdb->word_id(word); + idb->insert_index(word_id, source_id); + } } /*! @@ -754,8 +714,109 @@ void TM::Database::remove_sentence( SentenceDatabase::pointer tdb = find_sentence_database(site_id, sdata.tcode); assert(tdb); - tdb->remove(source_id); + + // 索引の消去。 + IndexDatabase::pointer idb = find_index_database(site_id, sdata.scode, sdata.tcode); + assert(idb); + WordDatabase::pointer wdb = find_word_database(sdata.scode); + assert(wdb); + for(QString word : sdata.words) + { + quint32 word_id = wdb->word_id(word); + idb->remove_index(word_id, source_id); + } +} + +void TM::Database::find_candidates(sentence_data_type source, + TextSentence::weak_pointer token) +{ + quint32 site_id = find_site_id(source.site); + WordDatabase::pointer wdb = find_word_database(source.scode); + IndexDatabase::pointer idb = + find_index_database(site_id, source.scode, source.tcode); + + QMap source_ids_map; // 原文ID、回数。 + for(QString word : source.words) + { + if(word.size() < 2) continue; + + quint32 word_id = wdb->word_id(word); + QSet ids = idb->find_index(word_id); + if(ids.isEmpty()) continue; + + for(quint32 source_id : ids) source_ids_map[source_id]++; + + if(10000 < source_ids_map.size()) break; //件数制限。 + } + + QList> source_ids; // 原文ID、回数。 + // コピー。 + QMap::const_iterator it1 = source_ids_map.begin(); + QMap::const_iterator it2 = source_ids_map.end(); + while(it1 != it2) + { + source_ids.append(qMakePair(it1.key(), it1.value())); + ++it1; + } + source_ids_map.clear(); + // matchする単語数が多い順にソート。 + qSort(source_ids.begin(), source_ids.end(), + [](QPair const &lhs, QPair const &rhs)->bool{ + return lhs.second > rhs.second; + }); + // 件数削減。 + if(100 < source_ids.size()) + source_ids.erase(source_ids.begin() + 100, source_ids.end()); + + // 編集距離。 + QList candidates; + SentenceDatabase::pointer sdb = find_sentence_database(site_id, source.scode); + for(QPair ¤t : source_ids) + { + candidate_data_type candidate; + + quint32 source_id = current.first; + candidate.source = sdb->find_sentence(source_id); + + std::wstring A = source.sentence.toStdWString(); + std::wstring B = candidate.source.sentence.toStdWString(); + + dtl::Diff d(A, B); + d.compose(); + candidate.edit_distance = d.getEditDistance(); + + candidates.append(candidate); + } + // 編集距離の小さい順にソート。 + qSort(candidates.begin(), candidates.end(), + [](candidate_data_type const &lhs, candidate_data_type const &rhs)->bool{ + return lhs.edit_distance < rhs.edit_distance; + }); + // 件数削減。 + if(10 < candidates.size()) + candidates.erase(candidates.begin() + 10, candidates.end()); + + // 訳文付与。 + SentenceDatabase::pointer tdb = find_sentence_database(site_id, source.tcode); + assert(tdb); + for(candidate_data_type &c : candidates) + { + quint32 source_id = c.source.sentence_id; + assert(source_id); + c.target = tdb->find_sentence_by_source_id(source_id); + } + + // サービスに返答。 + for(candidate_data_type &c : candidates) + { + QMetaObject::invokeMethod( + m_service, "candidate_found", + Qt::QueuedConnection, + Q_ARG(candidate_data_type, c), + Q_ARG(TextSentence::weak_pointer, token)); + } + } QString TM::Database::find_language_name(int code) const @@ -808,11 +869,11 @@ TM::IndexDatabase::pointer TM::Database::find_index_database( // データベース名を作成する。 QString dbname, sname, tname, site_name; - sname = QLocale::languageToString(static_cast(scode)); + sname = QLocale::languageToString(static_cast(scode)).toLower(); assert(!sname.isEmpty()); if(!sname.isEmpty()) { - tname = find_language_name(tcode).toLower(); + tname = QLocale::languageToString(static_cast(tcode)).toLower(); assert(!tname.isEmpty()); } if(!tname.isEmpty()) diff --git a/proxy/tmdatabase.h b/proxy/tmdatabase.h index 36c1d6a..0d6074c 100644 --- a/proxy/tmdatabase.h +++ b/proxy/tmdatabase.h @@ -123,6 +123,7 @@ public: bool find_sentence_by_crc( quint32 crc, QString *tsentence, QJsonArray *json = nullptr); + sentence_data_type find_sentence(quint32 sentence_id); sentence_data_type find_sentence_by_source_id(int source_id); void insert(sentence_data_type const &sentence_data); @@ -148,15 +149,11 @@ private: struct index_data_type { - typedef std::shared_ptr pointer; - index_data_type(); - static pointer create(); - quint32 index_id; - quint32 word_id; - QSet sentence_ids; + quint32 word_id; /*!< 単語ID */ + QSet source_ids; /*!< 原文IDの集合 */ bool is_stop_word; }; @@ -172,12 +169,10 @@ private: IndexDatabase(Settings *settings, QString dbname); public: - index_data_type::pointer find_index(quint32 word_id); - void insert_index(WordDatabase::pointer word_database, - const sentence_data_type &sdata, quint32 target_id); -private: - void insert_index(index_data_type::pointer index_data); - void update_index(index_data_type::pointer index_data); + QSet find_index(quint32 word_id); + void insert_index(quint32 word_id, quint32 source_id); + void remove_index(quint32 word_id, quint32 source_id); + public: static pointer create(Settings *settings, QString dbname); @@ -185,6 +180,7 @@ private: QSqlQuery m_find_index; QSqlQuery m_insert_index; QSqlQuery m_update_index; + QSqlQuery m_remove_index; int m_index_limit; }; @@ -215,7 +211,8 @@ public slots: void remove_sentence(sentence_data_type sdata, sentence_data_type tdata); - //void insert_index(); + void find_candidates(sentence_data_type source, + TextSentence::weak_pointer token); private: QString find_language_name(int code) const; diff --git a/proxy/tmeditorwidget.cpp b/proxy/tmeditorwidget.cpp index 6de1857..daf44a5 100644 --- a/proxy/tmeditorwidget.cpp +++ b/proxy/tmeditorwidget.cpp @@ -35,7 +35,6 @@ TM::EditorWidget::EditorWidget(Settings *settings, Service *service, QWidget *parent) : QWidget(parent) - , m_mutex(QMutex::Recursive) , m_service(service) , m_settings(settings) , m_http_port(80) @@ -91,9 +90,8 @@ void TM::EditorWidget::attach(SocketConnection *socket) if(m_socket == socket) return; if(m_socket) detach(m_socket); - QMutexLocker lock(&m_mutex); m_socket = socket; - connect(this, SIGNAL(editModeChanged(bool)), m_socket, SLOT(changeEditMode(bool))); +// connect(this, SIGNAL(editModeChanged(bool)), m_socket, SLOT(changeEditMode(bool))); set_link_mode_disabled(true); connect(this, SIGNAL(sourceLanguageChanged()), m_socket, SLOT(changeLanguage())); connect(this, SIGNAL(targetLanguageChanged()), m_socket, SLOT(changeLanguage())); @@ -103,10 +101,11 @@ void TM::EditorWidget::attach(SocketConnection *socket) * \brief ブラウザ上でドキュメントがフォーカスを取得することで、 * 現在のSocketConnectionがフォーカスを失った時に呼び出されます。 */ -void TM::EditorWidget::detach(SocketConnection *) +void TM::EditorWidget::detach(SocketConnection *connection) { - QMutexLocker lock(&m_mutex); - disconnect(this, SIGNAL(editModeChanged(bool)), m_socket, SLOT(changeEditMode(bool))); + if(m_socket != connection) return; + +// disconnect(this, SIGNAL(editModeChanged(bool)), m_socket, SLOT(changeEditMode(bool))); disconnect(this, SIGNAL(sourceLanguageChanged()), m_socket, SLOT(changeLanguage())); disconnect(this, SIGNAL(targetLanguageChanged()), m_socket, SLOT(changeLanguage())); @@ -117,23 +116,10 @@ void TM::EditorWidget::detach(SocketConnection *) void TM::EditorWidget::set_http_port(quint16 http_port) { m_http_port = http_port; } /*! - * \brief 編集モードを変更します。 - */ -void TM::EditorWidget::set_edit_mode(bool mode) -{ - QMutexLocker lock(&m_mutex); - bool old_mode = m_edit_mode->isChecked(); - if(old_mode == mode) return; - - m_edit_mode->setChecked(mode); -} - -/*! * \brief 編集モードの場合trueを返します。 */ bool TM::EditorWidget::edit_mode() { - QMutexLocker lock(&m_mutex); return m_edit_mode->isChecked(); } @@ -142,7 +128,6 @@ bool TM::EditorWidget::edit_mode() */ void TM::EditorWidget::set_link_mode(bool mode) { - QMutexLocker lock(&m_mutex); bool old_mode = m_link->isChecked(); if(old_mode == mode) return; @@ -154,7 +139,6 @@ void TM::EditorWidget::set_link_mode(bool mode) */ bool TM::EditorWidget::link_mode() { - QMutexLocker lock(&m_mutex); return m_link->isChecked(); } @@ -163,7 +147,6 @@ bool TM::EditorWidget::link_mode() */ void TM::EditorWidget::set_link_mode_disabled(bool disable) { - QMutexLocker lock(&m_mutex); m_link->setDisabled(disable); } @@ -172,7 +155,6 @@ void TM::EditorWidget::set_link_mode_disabled(bool disable) */ int TM::EditorWidget::source_language() { - QMutexLocker lock(&m_mutex); return m_slang->data().toInt(); } @@ -181,13 +163,11 @@ int TM::EditorWidget::source_language() */ int TM::EditorWidget::target_language() { - QMutexLocker lock(&m_mutex); return m_tlang->data().toInt(); } void TM::EditorWidget::set_segment(TextSegment::pointer segment) { - QMutexLocker lock(&m_mutex); m_edit->set_segment(segment); } @@ -217,6 +197,22 @@ void TM::EditorWidget::remove_sentence(TextSentence::pointer text_sentence) m_socket->remove_sentence(text_sentence); } +void TM::EditorWidget::do_panel_entered(SourcePanel *panel) +{ + //assert(m_socket); + if(!m_socket) return; + + m_socket->segment_list()->set_current_sentence(panel->text_sentence()); +} + +void TM::EditorWidget::do_panel_leaved(SourcePanel *) +{ + assert(m_socket); + if(!m_socket) return; + + m_socket->segment_list()->set_current_sentence(TextSentence::pointer()); +} + /*! * \brief 言語プラグインが読み込まれるたびに呼び出されます。 * \param code 言語コード。 @@ -260,7 +256,10 @@ void TM::EditorWidget::onEditModeTriggered(bool) m_link->setDisabled(true); //m_slang->setDisabled(!checked); //m_tlang->setDisabled(!checked); - emit editModeChanged(checked); + //emit editModeChanged(checked); + + m_service->change_edit_mode(checked); + if(!checked) m_edit->clear(); m_edit->set_edit_mode(checked); } @@ -835,6 +834,9 @@ void TM::Editor::do_panel_enter(SourcePanel *panel) assert(tp); tp->show(); if(can_link_mode()) parent_editor_widget()->set_link_mode_disabled(false); + + // 編集ウィジェットに通知。 + parent_editor_widget()->do_panel_entered(panel); } /*! @@ -863,6 +865,8 @@ void TM::Editor::do_panel_leave(SourcePanel *panel) parent_editor_widget()->save_sentence(text_sentence); tp->set_text_saved(true); } + // 編集ウィジェットに通知。 + parent_editor_widget()->do_panel_leaved(panel); } /*! diff --git a/proxy/tmeditorwidget.h b/proxy/tmeditorwidget.h index e3ea19e..4fb00a6 100644 --- a/proxy/tmeditorwidget.h +++ b/proxy/tmeditorwidget.h @@ -24,7 +24,6 @@ #include QT_BEGIN_NAMESPACE -class QSettings; class QToolBar; class QAction; @@ -43,6 +42,7 @@ namespace TM class Service; //class SocketConnection; class Editor; +class SourcePanel; class TargetPanel; class EditorWidget : public QWidget @@ -52,11 +52,10 @@ public: EditorWidget(Settings *settings, Service *service, QWidget *parent = 0); void attach(SocketConnection *socket); - void detach(SocketConnection *socket); + void detach(SocketConnection *connection); void set_http_port(quint16 http_port); - void set_edit_mode(bool mode); bool edit_mode(); void set_link_mode(bool mode); bool link_mode(); @@ -69,8 +68,11 @@ public: void save_sentence(TextSentence::pointer text_sentence); void remove_sentence(TextSentence::pointer text_sentence); + void do_panel_entered(SourcePanel *panel); + void do_panel_leaved(SourcePanel *panel); + signals: - void editModeChanged(bool mode_); + //void editModeChanged(bool mode_); void linkModeChanged(bool mode_); void sourceLanguageChanged(); void targetLanguageChanged(); @@ -84,7 +86,6 @@ public slots: void onBrowserTriggered(bool); private: - QMutex m_mutex; Service *m_service; Settings *m_settings; quint16 m_http_port; @@ -124,6 +125,10 @@ public: QColor color(int index) const; +signals: + void panelEntered(); + void panelLeaved(); + private: Editor *m_parent_editor; }; diff --git a/proxy/tmservice.cpp b/proxy/tmservice.cpp index 4d50869..629f7ce 100644 --- a/proxy/tmservice.cpp +++ b/proxy/tmservice.cpp @@ -6,6 +6,9 @@ #include "tmsocket.h" +#include "tmeditorwidget.h" +#include "tmcandidatewidget.h" + #include #include @@ -305,11 +308,15 @@ void TM::WordringConnection::sentence_found(QJsonObject json) TM::Service::Service(Settings *settings, QObject *parent) : QObject(parent) , m_settings(settings) + , m_editor_widget(nullptr) + , m_candidate_widget(nullptr) , m_database_thread(new QThread(this)) , m_database(new Database(settings, this)) , m_wordring(new WordringConnection(settings, this)) + , m_current_connection(nullptr) { qRegisterMetaType(); + qRegisterMetaType(); qRegisterMetaType(); setup_crc_table(); @@ -328,6 +335,22 @@ TM::Service::~Service() m_database_thread->wait(); } +TM::EditorWidget* TM::Service::editor_widget() { return m_editor_widget; } + +void TM::Service::set_editor_widget(EditorWidget *editor) { m_editor_widget = editor; } + +void TM::Service::change_edit_mode(bool mode) +{ + for(SocketConnection *connection : m_connections) + { + connection->set_edit_mode(mode); + } +} + +TM::CandidateWidget* TM::Service::candidate_widget() { return m_candidate_widget; } + +void TM::Service::set_candidate_widget(CandidateWidget *candidate) { m_candidate_widget = candidate; } + void TM::Service::attach(SocketConnection *connection) { m_connections.insert(connection); @@ -338,6 +361,11 @@ void TM::Service::detach(SocketConnection *connection) m_connections.remove(connection); } +void TM::Service::set_current_connection(SocketConnection *connection) +{ + m_current_connection = connection; +} + void TM::Service::setup_crc_table() { for (quint32 i = 0; i < 256; i++) @@ -393,6 +421,12 @@ void TM::Service::load_languages(QString const &path) } } +Language* TM::Service::find_language(int code) +{ + assert(m_languages.contains(code)); + return m_languages[code]; +} + /*! * \brief 引数として与えられたstringを文に分割します。 * \param code 分割に使用する言語コード。 @@ -436,9 +470,6 @@ quint32 TM::Service::find_site_id(QString host) /*! * \brief 原文から訳文を検索します。 - * \param site_id 翻訳対象サイトを表すID。 - * \param scode 原文の言語コード。 - * \param tcode 訳文の言語コード。 * \param sentence 原文。 */ void TM::Service::find_sentence(TextSentence::pointer sentence) @@ -456,6 +487,27 @@ void TM::Service::find_sentence(TextSentence::pointer sentence) } /*! + * \brief 原文から訳文候補を検索します。 + * \param sentence 原文。 + */ +void TM::Service::find_candidates(TextSentence::pointer sentence) +{ + // 現在の候補をクリア。 + sentence->clear_candidates(); + + sentence_data_type sdata = sentence->ssentence_data(); + // 原文を正規化。 + sdata.sentence = normalize(sdata.scode, sdata.sentence); + + // データベース呼び出し。 + QMetaObject::invokeMethod( + m_database, "find_candidates", + Qt::QueuedConnection, + Q_ARG(sentence_data_type, sdata), + Q_ARG(TextSentence::weak_pointer, sentence)); +} + +/*! * \brief 原文と訳文の対をデータベースとサーバへ登録します。 * \param sentence 挿入する原文、訳文の対。 */ @@ -552,6 +604,27 @@ void TM::Service::sentence_inserted(quint32 source_id, quint32 target_id, sentence->segment_list()->connection()->sentence_inserted(sentence); } +void TM::Service::candidate_found(candidate_data_type candidate, + TextSentence::weak_pointer token) +{ + if(token.expired()) return; + + TextSentence::pointer sentence = token.lock(); + bool ret = sentence->append_candidate(candidate); + + if(m_current_connection != sentence->segment_list()->connection()) + return; + + if(ret) + { + QString s = candidate.source.sentence; + s += "\r\n"; + s += candidate.target.sentence; + s += "\r\n"; + m_candidate_widget->append(s); + } +} + /*! * \brief サーバからの検索結果通知。 */ @@ -575,11 +648,17 @@ void TM::Service::sentence_found( // 原文の単語情報を埋める。 assert(m_languages.contains(result.scode)); Language *language = m_languages[result.scode]; + Text::pointer sentences = language->divide_into_sentences(sstring); assert(sentences->size() == 1); Text::pointer words = language->divide_into_words(sentences->begin()); + // 単語を正規化。 for(Text::pointer word = words->begin(); word; word = word->next()) - source.words.append(word->to_string()); + { + QString w = word->to_string(); + w = language->stem(w); + source.words.insert(w); + } // データベースへ保存。 QMetaObject::invokeMethod( diff --git a/proxy/tmservice.h b/proxy/tmservice.h index adea4f7..21227d0 100644 --- a/proxy/tmservice.h +++ b/proxy/tmservice.h @@ -29,6 +29,8 @@ namespace TM class Service; class SocketConnection; +class EditorWidget; +class CandidateWidget; class WordringConnection : public QObject { @@ -83,9 +85,18 @@ public: Service(Settings *settings, QObject *parent = 0); ~Service(); + // エディタ。 + EditorWidget* editor_widget(); + void set_editor_widget(EditorWidget *editor); + CandidateWidget* candidate_widget(); + void set_candidate_widget(CandidateWidget *candidate); + + void change_edit_mode(bool mode); + // SocketConnection void attach(SocketConnection *connection); void detach(SocketConnection *connection); + void set_current_connection(SocketConnection *connection); // CRC32 void setup_crc_table(); @@ -95,6 +106,8 @@ public: // 言語プラグイン void load_languages(QString const &path); + Language* find_language(int code); + Text::pointer divide_into_sentences(int code, QString string); Text::pointer divide_into_words(int code, Text::pointer sentence); @@ -102,11 +115,12 @@ public: // データベース quint32 find_site_id(QString host); - void insert_sentence(TextSentence::pointer sentence); void remove_sentence(TextSentence::pointer sentence); - void find_sentence(TextSentence::pointer sentence); + + void find_candidates(TextSentence::pointer sentence); + private: public: signals: @@ -124,6 +138,8 @@ public slots: void sentence_not_found(TextSentence::weak_pointer token); void sentence_inserted(quint32 source_id, quint32 target_id, TextSentence::weak_pointer token); + void candidate_found(candidate_data_type candidate, + TextSentence::weak_pointer token); public: // サーバから void sentence_found(QString sstring, sentence_data_type const &result); @@ -133,16 +149,22 @@ private: QMap m_languages; + EditorWidget *m_editor_widget; + CandidateWidget *m_candidate_widget; + QThread *m_database_thread; Database *m_database; WordringConnection *m_wordring; QSet m_connections; + SocketConnection *m_current_connection; + quint32 m_crc_table[256]; }; Q_DECLARE_METATYPE(sentence_data_type) +Q_DECLARE_METATYPE(candidate_data_type) Q_DECLARE_METATYPE(TextSentence::weak_pointer) } // namespace TM diff --git a/proxy/tmsocket.cpp b/proxy/tmsocket.cpp index 74f7b32..7d37c05 100644 --- a/proxy/tmsocket.cpp +++ b/proxy/tmsocket.cpp @@ -1,6 +1,7 @@ #include "tmsocket.h" -#include "tmeditorwidget.h" #include "tmservice.h" +#include "tmeditorwidget.h" +#include "tmcandidatewidget.h" #include "settings.h" #include "html.h" @@ -50,13 +51,10 @@ TM::HtmlData::pointer TM::HtmlData::create(HtmlNode node_, int begin_, int tail_ // SocketConnection ----------------------------------------------------------- TM::SocketConnection::SocketConnection(Settings *settings, Service *service, - EditorWidget *editor_widget, QWebSocket *socket) + QWebSocket *socket) : QObject(socket) , m_settings(settings) , m_service(service) - , m_mutex(QMutex::Recursive) - , m_editor_widget(editor_widget) - , m_edit_mode(false) , m_segments(TextSegmentList::create(service, this)) { qRegisterMetaType(); @@ -71,7 +69,8 @@ TM::SocketConnection::SocketConnection(Settings *settings, Service *service, TM::SocketConnection::~SocketConnection() { - m_editor_widget->detach(this); + m_service->editor_widget()->detach(this); + m_service->candidate_widget()->detach(this); m_service->detach(this); } @@ -92,11 +91,11 @@ void TM::SocketConnection::insert_sentence( for(TextSentence::pointer s : sentences) { - TextSegment::pointer current = m_segments->current(); + TextSegment::pointer current = m_segments->current_segment(); if(s->parent() == current) continue; s->set_tsentence(previous_crc, next_crc, tstring, json); - m_editor_widget->set_segment(s->parent()); + m_service->editor_widget()->set_segment(s->parent()); } } @@ -228,9 +227,6 @@ void TM::SocketConnection::changeLanguage() */ void TM::SocketConnection::set_edit_mode(bool edit_mode) { - if(edit_mode == m_edit_mode) return; - - m_edit_mode = edit_mode; QJsonObject json; json["cmd"] = "set_edit_mode"; json["edit_mode"] = edit_mode; @@ -266,7 +262,8 @@ void TM::SocketConnection::do_edit_segment(QJsonObject const &json) if(!segment) return; // 編集中のセグメントに変更があった場合のみ、エディタにセット。 - if(m_segments->set_current(segment)) m_editor_widget->set_segment(segment); + if(m_segments->set_current_segment(segment)) + m_service->editor_widget()->set_segment(segment); } /*! @@ -275,10 +272,15 @@ void TM::SocketConnection::do_edit_segment(QJsonObject const &json) */ void TM::SocketConnection::do_focus(QJsonObject const &) { - m_editor_widget->attach(this); + set_edit_mode(m_service->editor_widget()->edit_mode()); - m_editor_widget->set_edit_mode(m_edit_mode); - if(m_segments->current()) m_editor_widget->set_segment(m_segments->current()); + m_service->editor_widget()->attach(this); + m_service->candidate_widget()->attach(this); + + m_service->set_current_connection(this); + + if(m_segments->current_segment()) + m_service->editor_widget()->set_segment(m_segments->current_segment()); } void TM::SocketConnection::do_blur(QJsonObject const &) @@ -288,13 +290,10 @@ void TM::SocketConnection::do_blur(QJsonObject const &) void TM::SocketConnection::do_load(QJsonObject const &json) { - m_editor_widget->attach(this); - - set_edit_mode(m_editor_widget->edit_mode()); + set_edit_mode(m_service->editor_widget()->edit_mode()); - m_segments->set_scode(m_editor_widget->source_language()); - m_segments->set_tcode(m_editor_widget->target_language()); - qDebug() << QUrl(json["url"].toString()); + m_segments->set_scode(m_service->editor_widget()->source_language()); + m_segments->set_tcode(m_service->editor_widget()->target_language()); m_segments->set_url(QUrl(json["url"].toString())); } @@ -339,14 +338,12 @@ void TM::SocketConnection::onBinaryMessageReceived(QByteArray const &message) // SocketServer --------------------------------------------------------------- -TM::SocketServer::SocketServer(Settings *settings, Service* service, - EditorWidget *editor_widget, QObject *parent) +TM::SocketServer::SocketServer(Settings *settings, Service* service, QObject *parent) : QObject(parent) , m_settings(settings) , m_service(service) , m_server(new QWebSocketServer(QStringLiteral("wordring websocket"), QWebSocketServer::NonSecureMode, this)) - , m_editor_widget(editor_widget) { connect(m_server, SIGNAL(newConnection()), this, SLOT(onNewConnection())); @@ -380,7 +377,7 @@ void TM::SocketServer::onNewConnection() { QWebSocket *socket = m_server->nextPendingConnection(); connect(socket, SIGNAL(disconnected()), this, SLOT(onDisconnected())); - new SocketConnection(m_settings, m_service, m_editor_widget, socket); + new SocketConnection(m_settings, m_service, socket); m_sockets.push_back(socket); } diff --git a/proxy/tmsocket.h b/proxy/tmsocket.h index a2a82c4..8e657f2 100644 --- a/proxy/tmsocket.h +++ b/proxy/tmsocket.h @@ -29,7 +29,6 @@ namespace TM { class Service; -class EditorWidget; class SocketConnection : public QObject { @@ -41,8 +40,7 @@ public: typedef segment_map_type::iterator segment_map_iterator; public: - SocketConnection(Settings *settings, Service *service, - EditorWidget *editor_widget, QWebSocket *socket); + SocketConnection(Settings *settings, Service *service, QWebSocket *socket); virtual ~SocketConnection(); // @@ -93,21 +91,16 @@ private slots: private: Settings *m_settings; Service *m_service; - QMutex m_mutex; - bool m_edit_mode; + TextSegmentList::pointer m_segments; // スレッドを渡るため、共有ポインタの必要がある。 - EditorWidget *m_editor_widget; - - TextSegmentList::pointer m_segments; }; class SocketServer : public QObject { Q_OBJECT public: - SocketServer(Settings *settings, Service* service, - EditorWidget *editor_widget, QObject *parent); + SocketServer(Settings *settings, Service* service, QObject *parent); ~SocketServer(); quint16 port() const; @@ -130,8 +123,6 @@ private: QWebSocketServer *m_server; QList m_sockets; - - EditorWidget *m_editor_widget; }; Q_DECLARE_METATYPE(SocketConnection::pointer) diff --git a/proxy/tmtext.cpp b/proxy/tmtext.cpp index e8bdc95..a5680ee 100644 --- a/proxy/tmtext.cpp +++ b/proxy/tmtext.cpp @@ -516,13 +516,16 @@ TM::sentence_data_type TM::TextSentence::ssentence_data() Service *service = segment_list()->service(); // 原文の単語情報を埋める。 - Text::pointer sentences = service->divide_into_sentences(scode(), sstring); + Language *language = service->find_language(result.scode); + Text::pointer sentences = language->divide_into_sentences(sstring); assert(sentences->size() == 1); - Text::pointer words = service->divide_into_words(scode(), sentences->begin()); + Text::pointer words = language->divide_into_words(sentences->begin()); + // 単語を正規化。 for(Text::pointer word = words->begin(); word; word = word->next()) { - QString w = service->normalize(scode(), word->to_string()); - result.words.append(w); + QString w = language->normalize(word->to_string()); + w = language->stem(w); + result.words.insert(w); } return result; @@ -842,19 +845,28 @@ quint32 TM::TextSentence::next_crc() /*! * \brief 訳語候補を追加します。 */ -void TM::TextSentence::append(sentence_data_type const &sentence) +bool TM::TextSentence::append_candidate(candidate_data_type const &candidate) { - m_candidates.append(sentence); + bool result = true; + for(candidate_data_type const &c : m_candidates) + if(c.source.sentence == candidate.source.sentence) result = false; + if(result) m_candidates.append(candidate); + return result; } /*! * \brief 訳語候補のリストを返します。 */ -const QList &TM::TextSentence::candidates() const +QList const &TM::TextSentence::candidates() const { return m_candidates; } +void TM::TextSentence::clear_candidates() +{ + m_candidates.clear(); +} + QString TM::TextSentence::debug_dump() const { QString result; @@ -1093,7 +1105,7 @@ TM::TextSegmentList::find_sentences(QString ssentence) return m_source_map.values(ssentence); } -TM::TextSegment::pointer TM::TextSegmentList::current() +TM::TextSegment::pointer TM::TextSegmentList::current_segment() { return m_current_segment; } @@ -1102,14 +1114,24 @@ TM::TextSegment::pointer TM::TextSegmentList::current() * \brief 引数として与えられたセグメントを編集中としてマークします。 * \return 既に編集中とマークされていた場合、falseを返す。 */ -bool TM::TextSegmentList::set_current(TextSegment::pointer segment) +bool TM::TextSegmentList::set_current_segment(TextSegment::pointer segment) { - if(m_current_segment == segment) return false; + if(m_current_segment == segment && m_current_segment) return false; m_current_segment = segment; return true; } +TM::TextSentence::pointer TM::TextSegmentList::current_sentence() +{ + return m_current_sentence; +} + +void TM::TextSegmentList::set_current_sentence(TextSentence::pointer sentence) +{ + m_current_sentence = sentence; +} + TM::SocketConnection* TM::TextSegmentList::connection() { return m_connection; } int TM::TextSegmentList::scode() const { assert(m_scode); return m_scode; } diff --git a/proxy/tmtext.h b/proxy/tmtext.h index f0d8dc8..0a1867a 100644 --- a/proxy/tmtext.h +++ b/proxy/tmtext.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -52,7 +53,7 @@ public: * データベース登録時に、sentenceの単語を示すリスト。 * 原文にのみ必要。 */ - QList words; + QSet words; }; /*! @@ -60,7 +61,10 @@ public: */ class candidate_data_type { - +public: + int edit_distance; + sentence_data_type source; + sentence_data_type target; }; /*! @@ -211,8 +215,9 @@ public: quint32 user_id() { return 0; } - void append(sentence_data_type const &sentence); - QList const & candidates() const; + bool append_candidate(candidate_data_type const &candidate); + QList const & candidates() const; + void clear_candidates(); bool is_loaded() const; //void set_loaded(bool loaded); @@ -231,7 +236,7 @@ private: int m_quality; /*!< 訳文の品質 */ - QList m_candidates; /*!< 訳文候補 */ + QList m_candidates; /*!< 訳文候補 */ parent_weak_pointer m_parent; int m_index; /*!< セグメント内における位置 */ @@ -312,7 +317,7 @@ private: Text::pointer m_text; /*!< セグメントのHtmlData付テキスト。 */ // 編集用 - storage_type m_sentences; /*!< 編集用センテンスのリスト。 */ + QList m_sentences; /*!< 編集用センテンスのリスト。 */ // 親子。 parent_weak_pointer m_parent; @@ -344,9 +349,11 @@ public: // 検索。 QList find_sentences(QString ssentence); - // エディタ。 - TextSegment::pointer current(); - bool set_current(TextSegment::pointer segment); + // 編集。 + TextSegment::pointer current_segment(); + bool set_current_segment(TextSegment::pointer segment); + TextSentence::pointer current_sentence(); + void set_current_sentence(TextSentence::pointer sentence); SocketConnection* connection(); @@ -369,13 +376,14 @@ public: private: Service *m_service; - SocketConnection *m_connection; + SocketConnection *m_connection; /*!< このリストのオーナーとなるコネクション */ int m_scode; /*!< 原文言語コード。*/ int m_tcode; /*!< 訳文言語コード。 */ QUrl m_url; QMap m_segments; /*!< HTML内のdata-wordring-segmentの値とセグメントのマップ */ - TextSegment::pointer m_current_segment; /*!< 編集中のセグメントを表す。 */ + TextSegment::pointer m_current_segment; /*!< 編集中のセグメントを表す。(ブラウザから変更)*/ + TextSentence::pointer m_current_sentence; /*!< 編集中のセンテンスを表す。(エディタから変更) */ /*! 正規化原文と編集文のマルチ・マップ。 */ QMap m_source_map; diff --git a/third-party/include/dtl/Diff.hpp b/third-party/include/dtl/Diff.hpp new file mode 100644 index 0000000..ee16494 --- /dev/null +++ b/third-party/include/dtl/Diff.hpp @@ -0,0 +1,692 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2013 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_DIFF_H +#define DTL_DIFF_H + +namespace dtl { + + /** + * diff class template + * sequence must support random_access_iterator. + */ + template , typename comparator = Compare< elem > > + class Diff + { + private : + dtl_typedefs(elem, sequence) + sequence A; + sequence B; + size_t M; + size_t N; + size_t delta; + size_t offset; + long long *fp; + long long editDistance; + Lcs< elem > lcs; + Ses< elem > ses; + editPath path; + editPathCordinates pathCordinates; + bool swapped; + bool huge; + bool trivial; + bool editDistanceOnly; + uniHunkVec uniHunks; + comparator cmp; + public : + Diff () {} + + Diff (const sequence& a, + const sequence& b) : A(a), B(b), ses(false) { + init(); + } + + Diff (const sequence& a, + const sequence& b, + bool deletesFirst) : A(a), B(b), ses(deletesFirst) { + init(); + } + + Diff (const sequence& a, + const sequence& b, + const comparator& comp) : A(a), B(b), ses(false), cmp(comp) { + init(); + } + + Diff (const sequence& a, + const sequence& b, + bool deleteFirst, + const comparator& comp) : A(a), B(b), ses(deleteFirst), cmp(comp) { + init(); + } + + ~Diff() {} + + long long getEditDistance () const { + return editDistance; + } + + Lcs< elem > getLcs () const { + return lcs; + } + + elemVec getLcsVec () const { + return lcs.getSequence(); + } + + Ses< elem > getSes () const { + return ses; + } + + uniHunkVec getUniHunks () const { + return uniHunks; + } + + /* These should be deprecated */ + bool isHuge () const { + return huge; + } + + void onHuge () { + this->huge = true; + } + + void offHuge () { + this->huge = false; + } + + bool isUnserious () const { + return trivial; + } + + void onUnserious () { + this->trivial = true; + } + + void offUnserious () { + this->trivial = false; + } + + void onOnlyEditDistance () { + this->editDistanceOnly = true; + } + + /* These are the replacements for the above */ + bool hugeEnabled () const { + return huge; + } + + void enableHuge () { + this->huge = true; + } + + void disableHuge () { + this->huge = false; + } + + bool trivialEnabled () const { + return trivial; + } + + void enableTrivial () const { + this->trivial = true; + } + + void disableTrivial () { + this->trivial = false; + } + + void editDistanceOnlyEnabled () { + this->editDistanceOnly = true; + } + + /** + * patching with Unified Format Hunks + */ + sequence uniPatch (const sequence& seq) { + elemList seqLst(seq.begin(), seq.end()); + sesElemVec shunk; + sesElemVec_iter vsesIt; + elemList_iter lstIt = seqLst.begin(); + long long inc_dec_total = 0; + long long gap = 1; + for (uniHunkVec_iter it=uniHunks.begin();it!=uniHunks.end();++it) { + joinSesVec(shunk, it->common[0]); + joinSesVec(shunk, it->change); + joinSesVec(shunk, it->common[1]); + it->a += inc_dec_total; + inc_dec_total += it->inc_dec_count; + for (long long i=0;ia - gap;++i) { + ++lstIt; + } + gap = it->a + it->b + it->inc_dec_count; + vsesIt = shunk.begin(); + while (vsesIt!=shunk.end()) { + switch (vsesIt->second.type) { + case SES_ADD : + seqLst.insert(lstIt, vsesIt->first); + break; + case SES_DELETE : + if (lstIt != seqLst.end()) { + lstIt = seqLst.erase(lstIt); + } + break; + case SES_COMMON : + if (lstIt != seqLst.end()) { + ++lstIt; + } + break; + default : + // no fall-through + break; + } + ++vsesIt; + } + shunk.clear(); + } + + sequence patchedSeq(seqLst.begin(), seqLst.end()); + return patchedSeq; + } + + /** + * patching with Shortest Edit Script (SES) + */ + sequence patch (const sequence& seq) const { + sesElemVec sesSeq = ses.getSequence(); + elemList seqLst(seq.begin(), seq.end()); + elemList_iter lstIt = seqLst.begin(); + for (sesElemVec_iter sesIt=sesSeq.begin();sesIt!=sesSeq.end();++sesIt) { + switch (sesIt->second.type) { + case SES_ADD : + seqLst.insert(lstIt, sesIt->first); + break; + case SES_DELETE : + lstIt = seqLst.erase(lstIt); + break; + case SES_COMMON : + ++lstIt; + break; + default : + // no through + break; + } + } + sequence patchedSeq(seqLst.begin(), seqLst.end()); + return patchedSeq; + } + + /** + * compose Longest Common Subsequence and Shortest Edit Script. + * The algorithm implemented here is based on "An O(NP) Sequence Comparison Algorithm" + * described by Sun Wu, Udi Manber and Gene Myers + */ + void compose() { + + if (isHuge()) { + pathCordinates.reserve(MAX_CORDINATES_SIZE); + } + + long long p = -1; + fp = new long long[M + N + 3]; + fill(&fp[0], &fp[M + N + 3], -1); + path = editPath(M + N + 3); + fill(path.begin(), path.end(), -1); + ONP: + do { + ++p; + for (long long k=-p;k<=static_cast(delta)-1;++k) { + fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]); + } + for (long long k=static_cast(delta)+p;k>=static_cast(delta)+1;--k) { + fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]); + } + fp[delta+offset] = snake(static_cast(delta), fp[delta-1+offset]+1, fp[delta+1+offset]); + } while (fp[delta+offset] != static_cast(N) && pathCordinates.size() < MAX_CORDINATES_SIZE); + + editDistance += static_cast(delta) + 2 * p; + long long r = path[delta+offset]; + P cordinate; + editPathCordinates epc(0); + + // recording edit distance only + if (editDistanceOnly) { + delete[] this->fp; + return; + } + + while(r != -1) { + cordinate.x = pathCordinates[(size_t)r].x; + cordinate.y = pathCordinates[(size_t)r].y; + epc.push_back(cordinate); + r = pathCordinates[(size_t)r].k; + } + + // record Longest Common Subsequence & Shortest Edit Script + if (!recordSequence(epc)) { + pathCordinates.resize(0); + epc.resize(0); + p = -1; + goto ONP; + } + delete[] this->fp; + } + + /** + * print difference between A and B as an SES + */ + template < typename stream > + void printSES (stream& out) const { + sesElemVec ses_v = ses.getSequence(); + for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out)); + } + + void printSES (ostream& out = cout) const { + printSES< ostream >(out); + } + + /** + * print differences given an SES + */ + template < typename stream > + static void printSES (const Ses< elem >& s, stream& out) { + sesElemVec ses_v = s.getSequence(); + for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out)); + } + + static void printSES (const Ses< elem >& s, ostream& out = cout) { + printSES< ostream >(s, out); + } + + /** + * print difference between A and B as an SES with custom printer + */ + template < typename stream, template < typename SEET, typename STRT > class PT > + void printSES (stream& out) const { + sesElemVec ses_v = ses.getSequence (); + for_each (ses_v.begin (), ses_v.end(), PT < sesElem, stream > (out)); + } + + /** + * print difference between A and B in the Unified Format + */ + template < typename stream > + void printUnifiedFormat (stream& out) const { + for_each(uniHunks.begin(), uniHunks.end(), UniHunkPrinter< sesElem, stream >(out)); + } + + void printUnifiedFormat (ostream& out = cout) const { + printUnifiedFormat< ostream >(out); + } + + /** + * print unified format difference with given unified format hunks + */ + template < typename stream > + static void printUnifiedFormat (const uniHunkVec& hunks, stream& out) { + for_each(hunks.begin(), hunks.end(), UniHunkPrinter< sesElem >(out)); + } + + static void printUnifiedFormat (const uniHunkVec& hunks, ostream& out = cout) { + printUnifiedFormat< ostream >(hunks, out); + } + + /** + * compose Unified Format Hunks from Shortest Edit Script + */ + void composeUnifiedHunks () { + sesElemVec common[2]; + sesElemVec change; + sesElemVec ses_v = ses.getSequence(); + long long l_cnt = 1; + long long length = distance(ses_v.begin(), ses_v.end()); + long long middle = 0; + bool isMiddle, isAfter; + elemInfo einfo; + long long a, b, c, d; // @@ -a,b +c,d @@ + long long inc_dec_count = 0; + uniHunk< sesElem > hunk; + sesElemVec adds; + sesElemVec deletes; + + isMiddle = isAfter = false; + a = b = c = d = 0; + + for (sesElemVec_iter it=ses_v.begin();it!=ses_v.end();++it, ++l_cnt) { + einfo = it->second; + switch (einfo.type) { + case SES_ADD : + middle = 0; + ++inc_dec_count; + adds.push_back(*it); + if (!isMiddle) isMiddle = true; + if (isMiddle) ++d; + if (l_cnt >= length) { + joinSesVec(change, deletes); + joinSesVec(change, adds); + isAfter = true; + } + break; + case SES_DELETE : + middle = 0; + --inc_dec_count; + deletes.push_back(*it); + if (!isMiddle) isMiddle = true; + if (isMiddle) ++b; + if (l_cnt >= length) { + joinSesVec(change, deletes); + joinSesVec(change, adds); + isAfter = true; + } + break; + case SES_COMMON : + ++b;++d; + if (common[1].empty() && adds.empty() && deletes.empty() && change.empty()) { + if (static_cast(common[0].size()) < DTL_CONTEXT_SIZE) { + if (a == 0 && c == 0) { + if (!wasSwapped()) { + a = einfo.beforeIdx; + c = einfo.afterIdx; + } else { + a = einfo.afterIdx; + c = einfo.beforeIdx; + } + } + common[0].push_back(*it); + } else { + rotate(common[0].begin(), common[0].begin() + 1, common[0].end()); + common[0].pop_back(); + common[0].push_back(*it); + ++a;++c; + --b;--d; + } + } + if (isMiddle && !isAfter) { + ++middle; + joinSesVec(change, deletes); + joinSesVec(change, adds); + change.push_back(*it); + if (middle >= DTL_SEPARATE_SIZE || l_cnt >= length) { + isAfter = true; + } + adds.clear(); + deletes.clear(); + } + break; + default : + // no through + break; + } + // compose unified format hunk + if (isAfter && !change.empty()) { + sesElemVec_iter cit = it; + long long cnt = 0; + for (long long i=0;isecond.type == SES_COMMON) { + ++cnt; + } + } + if (cnt < DTL_SEPARATE_SIZE && l_cnt < length) { + middle = 0; + isAfter = false; + continue; + } + if (static_cast(common[0].size()) >= DTL_SEPARATE_SIZE) { + long long c0size = static_cast(common[0].size()); + rotate(common[0].begin(), + common[0].begin() + (size_t)c0size - DTL_SEPARATE_SIZE, + common[0].end()); + for (long long i=0;i + static Ses< elem > composeSesFromStream (stream& st) + { + elem line; + Ses< elem > ret; + long long x_idx, y_idx; + x_idx = y_idx = 1; + while (getline(st, line)) { + elem mark(line.begin(), line.begin() + 1); + elem e(line.begin() + 1, line.end()); + if (mark == SES_MARK_DELETE) { + ret.addSequence(e, x_idx, 0, SES_DELETE); + ++x_idx; + } else if (mark == SES_MARK_ADD) { + ret.addSequence(e, y_idx, 0, SES_ADD); + ++y_idx; + } else if (mark == SES_MARK_COMMON) { + ret.addSequence(e, x_idx, y_idx, SES_COMMON); + ++x_idx; + ++y_idx; + } + } + return ret; + } + + private : + /** + * initialize + */ + void init () { + M = distance(A.begin(), A.end()); + N = distance(B.begin(), B.end()); + if (M < N) { + swapped = false; + } else { + swap(A, B); + swap(M, N); + swapped = true; + } + editDistance = 0; + delta = N - M; + offset = M + 1; + huge = false; + trivial = false; + editDistanceOnly = false; + fp = NULL; + } + + /** + * search shortest path and record the path + */ + long long snake(const long long& k, const long long& above, const long long& below) { + long long r = above > below ? path[(size_t)k-1+offset] : path[(size_t)k+1+offset]; + long long y = max(above, below); + long long x = y - k; + while ((size_t)x < M && (size_t)y < N && (swapped ? cmp.impl(B[(size_t)y], A[(size_t)x]) : cmp.impl(A[(size_t)x], B[(size_t)y]))) { + ++x;++y; + } + + path[(size_t)k+offset] = static_cast(pathCordinates.size()); + if (!editDistanceOnly) { + P p; + p.x = x;p.y = y;p.k = r; + pathCordinates.push_back(p); + } + return y; + } + + /** + * record SES and LCS + */ + bool recordSequence (const editPathCordinates& v) { + sequence_const_iter x(A.begin()); + sequence_const_iter y(B.begin()); + long long x_idx, y_idx; // line number for Unified Format + long long px_idx, py_idx; // cordinates + bool complete = false; + x_idx = y_idx = 1; + px_idx = py_idx = 0; + for (size_t i=v.size()-1;!complete;--i) { + while(px_idx < v[i].x || py_idx < v[i].y) { + if (v[i].y - v[i].x > py_idx - px_idx) { + if (!wasSwapped()) { + ses.addSequence(*y, 0, y_idx, SES_ADD); + } else { + ses.addSequence(*y, y_idx, 0, SES_DELETE); + } + ++y; + ++y_idx; + ++py_idx; + } else if (v[i].y - v[i].x < py_idx - px_idx) { + if (!wasSwapped()) { + ses.addSequence(*x, x_idx, 0, SES_DELETE); + } else { + ses.addSequence(*x, 0, x_idx, SES_ADD); + } + ++x; + ++x_idx; + ++px_idx; + } else { + if (!wasSwapped()) { + lcs.addSequence(*x); + ses.addSequence(*x, x_idx, y_idx, SES_COMMON); + } else { + lcs.addSequence(*y); + ses.addSequence(*y, y_idx, x_idx, SES_COMMON); + } + ++x; + ++y; + ++x_idx; + ++y_idx; + ++px_idx; + ++py_idx; + } + } + if (i == 0) complete = true; + } + + if (x_idx > static_cast(M) && y_idx > static_cast(N)) { + // all recording succeeded + } else { + // trivial difference + if (trivialEnabled()) { + if (!wasSwapped()) { + recordOddSequence(x_idx, M, x, SES_DELETE); + recordOddSequence(y_idx, N, y, SES_ADD); + } else { + recordOddSequence(x_idx, M, x, SES_ADD); + recordOddSequence(y_idx, N, y, SES_DELETE); + } + return true; + } + + // nontrivial difference + sequence A_(A.begin() + (size_t)x_idx - 1, A.end()); + sequence B_(B.begin() + (size_t)y_idx - 1, B.end()); + A = A_; + B = B_; + M = distance(A.begin(), A.end()); + N = distance(B.begin(), B.end()); + delta = N - M; + offset = M + 1; + delete[] fp; + fp = new long long[M + N + 3]; + fill(&fp[0], &fp[M + N + 3], -1); + fill(path.begin(), path.end(), -1); + return false; + } + return true; + } + + /** + * record odd sequence in SES + */ + void inline recordOddSequence (long long idx, long long length, sequence_const_iter it, const edit_t et) { + while(idx < length){ + ses.addSequence(*it, idx, 0, et); + ++it; + ++idx; + ++editDistance; + } + ses.addSequence(*it, idx, 0, et); + ++editDistance; + } + + /** + * join SES vectors + */ + void inline joinSesVec (sesElemVec& s1, sesElemVec& s2) const { + if (!s2.empty()) { + for (sesElemVec_iter vit=s2.begin();vit!=s2.end();++vit) { + s1.push_back(*vit); + } + } + } + + /** + * check if the sequences have been swapped + */ + bool inline wasSwapped () const { + return swapped; + } + + }; +} + +#endif // DTL_DIFF_H diff --git a/third-party/include/dtl/Diff3.hpp b/third-party/include/dtl/Diff3.hpp new file mode 100644 index 0000000..7813210 --- /dev/null +++ b/third-party/include/dtl/Diff3.hpp @@ -0,0 +1,245 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2013 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_DIFF3_H +#define DTL_DIFF3_H + +namespace dtl { + + /** + * diff3 class template + * sequence must support random_access_iterator. + */ + template , typename comparator = Compare< elem > > + class Diff3 + { + private: + dtl_typedefs(elem, sequence) + sequence A; + sequence B; + sequence C; + sequence S; + Diff< elem, sequence, comparator > diff_ba; + Diff< elem, sequence, comparator > diff_bc; + bool conflict; + elem csepabegin; + elem csepa; + elem csepaend; + public : + Diff3 () {} + Diff3 (const sequence& a, + const sequence& b, + const sequence& c) : A(a), B(b), C(c), + diff_ba(b, a), diff_bc(b, c), + conflict(false) {} + + ~Diff3 () {} + + bool isConflict () const { + return conflict; + } + + sequence getMergedSequence () const { + return S; + } + + /** + * merge changes B and C into A + */ + bool merge () { + if (diff_ba.getEditDistance() == 0) { // A == B + if (diff_bc.getEditDistance() == 0) { // A == B == C + S = B; + return true; + } + S = C; + return true; + } else { // A != B + if (diff_bc.getEditDistance() == 0) { // A != B == C + S = A; + return true; + } else { // A != B != C + S = merge_(); + if (isConflict()) { // conflict occured + return false; + } + } + } + return true; + } + + /** + * compose differences + */ + void compose () { + diff_ba.compose(); + diff_bc.compose(); + } + + private : + /** + * merge implementation + */ + sequence merge_ () { + elemVec seq; + Ses< elem > ses_ba = diff_ba.getSes(); + Ses< elem > ses_bc = diff_bc.getSes(); + sesElemVec ses_ba_v = ses_ba.getSequence(); + sesElemVec ses_bc_v = ses_bc.getSequence(); + sesElemVec_iter ba_it = ses_ba_v.begin(); + sesElemVec_iter bc_it = ses_bc_v.begin(); + sesElemVec_iter ba_end = ses_ba_v.end(); + sesElemVec_iter bc_end = ses_bc_v.end(); + + while (!isEnd(ba_end, ba_it) || !isEnd(bc_end, bc_it)) { + while (true) { + if (!isEnd(ba_end, ba_it) && + !isEnd(bc_end, bc_it) && + ba_it->first == bc_it->first && + ba_it->second.type == SES_COMMON && + bc_it->second.type == SES_COMMON) { + // do nothing + } else { + break; + } + if (!isEnd(ba_end, ba_it)) seq.push_back(ba_it->first); + else if (!isEnd(bc_end, bc_it)) seq.push_back(bc_it->first); + forwardUntilEnd(ba_end, ba_it); + forwardUntilEnd(bc_end, bc_it); + } + if (isEnd(ba_end, ba_it) || isEnd(bc_end, bc_it)) break; + if ( ba_it->second.type == SES_COMMON + && bc_it->second.type == SES_DELETE) { + forwardUntilEnd(ba_end, ba_it); + forwardUntilEnd(bc_end, bc_it); + } else if (ba_it->second.type == SES_COMMON && + bc_it->second.type == SES_ADD) { + seq.push_back(bc_it->first); + forwardUntilEnd(bc_end, bc_it); + } else if (ba_it->second.type == SES_DELETE && + bc_it->second.type == SES_COMMON) { + forwardUntilEnd(ba_end, ba_it); + forwardUntilEnd(bc_end, bc_it); + } else if (ba_it->second.type == SES_DELETE && + bc_it->second.type == SES_DELETE) { + if (ba_it->first == bc_it->first) { + forwardUntilEnd(ba_end, ba_it); + forwardUntilEnd(bc_end, bc_it); + } else { + // conflict + conflict = true; + return B; + } + } else if (ba_it->second.type == SES_DELETE && + bc_it->second.type == SES_ADD) { + // conflict + conflict = true; + return B; + } else if (ba_it->second.type == SES_ADD && + bc_it->second.type == SES_COMMON) { + seq.push_back(ba_it->first); + forwardUntilEnd(ba_end, ba_it); + } else if (ba_it->second.type == SES_ADD && + bc_it->second.type == SES_DELETE) { + // conflict + conflict = true; + return B; + } else if (ba_it->second.type == SES_ADD && + bc_it->second.type == SES_ADD) { + if (ba_it->first == bc_it->first) { + seq.push_back(ba_it->first); + forwardUntilEnd(ba_end, ba_it); + forwardUntilEnd(bc_end, bc_it); + } else { + // conflict + conflict = true; + return B; + } + } + } + + if (isEnd(ba_end, ba_it)) { + addDecentSequence(bc_end, bc_it, seq); + } else if (isEnd(bc_end, bc_it)) { + addDecentSequence(ba_end, ba_it, seq); + } + + sequence mergedSeq(seq.begin(), seq.end()); + return mergedSeq; + } + + /** + * join elem vectors + */ + void inline joinElemVec (elemVec& s1, elemVec& s2) const { + if (!s2.empty()) { + for (elemVec_iter vit=s2.begin();vit!=s2.end();++vit) { + s1.push_back(*vit); + } + } + } + + /** + * check if sequence is at end + */ + template + bool inline isEnd (const T_iter& end, const T_iter& it) const { + return it == end ? true : false; + } + + /** + * increment iterator until iterator is at end + */ + template + void inline forwardUntilEnd (const T_iter& end, T_iter& it) const { + if (!isEnd(end, it)) ++it; + } + + /** + * add elements whose SES's type is ADD + */ + void inline addDecentSequence (const sesElemVec_iter& end, sesElemVec_iter& it, elemVec& seq) const { + while (!isEnd(end, it)) { + if (it->second.type == SES_ADD) seq.push_back(it->first); + ++it; + } + } + + }; +} + +#endif // DTL_DIFF3_H diff --git a/third-party/include/dtl/Lcs.hpp b/third-party/include/dtl/Lcs.hpp new file mode 100644 index 0000000..7cc0bc8 --- /dev/null +++ b/third-party/include/dtl/Lcs.hpp @@ -0,0 +1,55 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2013 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_LCS_H +#define DTL_LCS_H + +namespace dtl { + + /** + * Longest Common Subsequence template class + */ + template + class Lcs : public Sequence< elem > + { + public : + Lcs () {} + ~Lcs () {} + }; +} + +#endif // DTL_LCS_H diff --git a/third-party/include/dtl/Sequence.hpp b/third-party/include/dtl/Sequence.hpp new file mode 100644 index 0000000..6ff7a6a --- /dev/null +++ b/third-party/include/dtl/Sequence.hpp @@ -0,0 +1,65 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2013 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_SEQUENCE_H +#define DTL_SEQUENCE_H + +namespace dtl { + + /** + * sequence class template + */ + template + class Sequence + { + public : + typedef vector< elem > elemVec; + Sequence () {} + virtual ~Sequence () {} + + elemVec getSequence () const { + return sequence; + } + void addSequence (elem e) { + sequence.push_back(e); + } + protected : + elemVec sequence; + }; +} + +#endif // DTL_SEQUENCE_H diff --git a/third-party/include/dtl/Ses.hpp b/third-party/include/dtl/Ses.hpp new file mode 100644 index 0000000..864ddb3 --- /dev/null +++ b/third-party/include/dtl/Ses.hpp @@ -0,0 +1,132 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2013 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_SES_H +#define DTL_SES_H + +namespace dtl { + + /** + * Shortest Edit Script template class + */ + template + class Ses : public Sequence< elem > + { + private : + typedef pair< elem, elemInfo > sesElem; + typedef vector< sesElem > sesElemVec; + public : + + Ses () : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(false) { + nextDeleteIdx = 0; + } + Ses (bool moveDel) : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(moveDel) { + nextDeleteIdx = 0; + } + ~Ses () {} + + bool isOnlyAdd () const { + return onlyAdd; + } + + bool isOnlyDelete () const { + return onlyDelete; + } + + bool isOnlyCopy () const { + return onlyCopy; + } + + bool isOnlyOneOperation () const { + return isOnlyAdd() || isOnlyDelete() || isOnlyCopy(); + } + + bool isChange () const { + return !onlyCopy; + } + + using Sequence< elem >::addSequence; + void addSequence (elem e, long long beforeIdx, long long afterIdx, const edit_t type) { + elemInfo info; + info.beforeIdx = beforeIdx; + info.afterIdx = afterIdx; + info.type = type; + sesElem pe(e, info); + if (!deletesFirst) { + sequence.push_back(pe); + } + switch (type) { + case SES_DELETE: + onlyCopy = false; + onlyAdd = false; + if (deletesFirst) { + sequence.insert(sequence.begin() + nextDeleteIdx, pe); + nextDeleteIdx++; + } + break; + case SES_COMMON: + onlyAdd = false; + onlyDelete = false; + if (deletesFirst) { + sequence.push_back(pe); + nextDeleteIdx = sequence.size(); + } + break; + case SES_ADD: + onlyDelete = false; + onlyCopy = false; + if (deletesFirst) { + sequence.push_back(pe); + } + break; + } + } + + sesElemVec getSequence () const { + return sequence; + } + private : + sesElemVec sequence; + bool onlyAdd; + bool onlyDelete; + bool onlyCopy; + bool deletesFirst; + size_t nextDeleteIdx; + }; +} + +#endif // DTL_SES_H diff --git a/third-party/include/dtl/dtl.hpp b/third-party/include/dtl/dtl.hpp new file mode 100644 index 0000000..9e4e773 --- /dev/null +++ b/third-party/include/dtl/dtl.hpp @@ -0,0 +1,47 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2013 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef DTL_H +#define DTL_H + +#include "variables.hpp" +#include "functors.hpp" +#include "Sequence.hpp" +#include "Lcs.hpp" +#include "Ses.hpp" +#include "Diff.hpp" +#include "Diff3.hpp" + +#endif // DTL_H diff --git a/third-party/include/dtl/functors.hpp b/third-party/include/dtl/functors.hpp new file mode 100644 index 0000000..4bfe726 --- /dev/null +++ b/third-party/include/dtl/functors.hpp @@ -0,0 +1,137 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2013 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_FUNCTORS_H +#define DTL_FUNCTORS_H + +namespace dtl { + + /** + * printer class template + */ + template + class Printer + { + public : + Printer () : out_(cout) {} + Printer (stream& out) : out_(out) {} + virtual ~Printer () {} + virtual void operator() (const sesElem& se) const = 0; + protected : + stream& out_; + }; + + /** + * common element printer class template + */ + template + class CommonPrinter : public Printer < sesElem, stream > + { + public : + CommonPrinter () : Printer < sesElem, stream > () {} + CommonPrinter (stream& out) : Printer < sesElem, stream > (out) {} + ~CommonPrinter () {} + void operator() (const sesElem& se) const { + this->out_ << SES_MARK_COMMON << se.first << endl; + } + }; + + /** + * ses element printer class template + */ + template + class ChangePrinter : public Printer < sesElem, stream > + { + public : + ChangePrinter () : Printer < sesElem, stream > () {} + ChangePrinter (stream& out) : Printer < sesElem, stream > (out) {} + ~ChangePrinter () {} + void operator() (const sesElem& se) const { + switch (se.second.type) { + case SES_ADD: + this->out_ << SES_MARK_ADD << se.first << endl; + break; + case SES_DELETE: + this->out_ << SES_MARK_DELETE << se.first << endl; + break; + case SES_COMMON: + this->out_ << SES_MARK_COMMON << se.first << endl; + break; + } + } + }; + + /** + * unified format element printer class template + */ + template + class UniHunkPrinter + { + public : + UniHunkPrinter () : out_(cout) {} + UniHunkPrinter (stream& out) : out_(out) {} + ~UniHunkPrinter () {} + void operator() (const uniHunk< sesElem >& hunk) const { + out_ << "@@" + << " -" << hunk.a << "," << hunk.b + << " +" << hunk.c << "," << hunk.d + << " @@" << endl; + + for_each(hunk.common[0].begin(), hunk.common[0].end(), CommonPrinter< sesElem, stream >(out_)); + for_each(hunk.change.begin(), hunk.change.end(), ChangePrinter< sesElem, stream >(out_)); + for_each(hunk.common[1].begin(), hunk.common[1].end(), CommonPrinter< sesElem, stream >(out_)); + } + private : + stream& out_; + }; + + /** + * compare class template + */ + template + class Compare + { + public : + Compare () {} + virtual ~Compare () {} + virtual inline bool impl (const elem& e1, const elem& e2) const { + return e1 == e2; + } + }; +} + +#endif // DTL_FUNCTORS_H diff --git a/third-party/include/dtl/variables.hpp b/third-party/include/dtl/variables.hpp new file mode 100644 index 0000000..18415f5 --- /dev/null +++ b/third-party/include/dtl/variables.hpp @@ -0,0 +1,142 @@ +/** + dtl -- Diff Template Library + + In short, Diff Template Library is distributed under so called "BSD license", + + Copyright (c) 2013 Tatsuhiko Kubo + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the authors nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* If you use this library, you must include dtl.hpp only. */ + +#ifndef DTL_VARIABLES_H +#define DTL_VARIABLES_H + +#include +#include +#include +#include +#include + +namespace dtl { + + using std::vector; + using std::string; + using std::pair; + using std::ostream; + using std::list; + using std::for_each; + using std::distance; + using std::fill; + using std::cout; + using std::endl; + using std::rotate; + using std::swap; + using std::max; + + /** + * version string + */ + const string version = "1.18"; + + /** + * type of edit for SES + */ + typedef int edit_t; + const edit_t SES_DELETE = -1; + const edit_t SES_COMMON = 0; + const edit_t SES_ADD = 1; + + /** + * mark of SES + */ +#define SES_MARK_DELETE "-" +#define SES_MARK_COMMON " " +#define SES_MARK_ADD "+" + + /** + * info for Unified Format + */ + typedef struct eleminfo { + long long beforeIdx; // index of prev sequence + long long afterIdx; // index of after sequence + edit_t type; // type of edit(Add, Delete, Common) + bool operator==(const eleminfo& other) const{ + return (this->beforeIdx == other.beforeIdx && this->afterIdx == other.afterIdx && this->type == other.type); + } + } elemInfo; + + const long long DTL_SEPARATE_SIZE = 3; + const long long DTL_CONTEXT_SIZE = 3; + + /** + * cordinate for registering route + */ + typedef struct Point { + long long x; // x cordinate + long long y; // y cordinate + long long k; // vertex + } P; + + /** + * limit of cordinate size + */ + const unsigned long long MAX_CORDINATES_SIZE = 2000000; + + typedef vector< long long > editPath; + typedef vector< P > editPathCordinates; + + /** + * Structure of Unified Format Hunk + */ + template + struct uniHunk { + long long a, b, c, d; // @@ -a,b +c,d @@ + vector< sesElem > common[2]; // anteroposterior commons on changes + vector< sesElem > change; // changes + long long inc_dec_count; // count of increace and decrease + }; + +#define dtl_typedefs(elem, sequence) \ + typedef pair< elem, elemInfo > sesElem; \ + typedef vector< sesElem > sesElemVec; \ + typedef vector< uniHunk< sesElem > > uniHunkVec; \ + typedef list< elem > elemList; \ + typedef vector< elem > elemVec; \ + typedef typename uniHunkVec::iterator uniHunkVec_iter; \ + typedef typename sesElemVec::iterator sesElemVec_iter; \ + typedef typename elemList::iterator elemList_iter; \ + typedef typename sequence::iterator sequence_iter; \ + typedef typename sequence::const_iterator sequence_const_iter; \ + typedef typename elemVec::iterator elemVec_iter; + + +} + +#endif // DTL_VARIABLES_H -- 2.11.0