From 68e7fe21146876cfa7e30d18fa589637cf8c7ad2 Mon Sep 17 00:00:00 2001 From: komutan Date: Fri, 13 Sep 2019 11:11:18 +0900 Subject: [PATCH] =?utf8?q?MMF=E3=81=B8=E3=81=AE=E3=82=A2=E3=82=AF=E3=82=BB?= =?utf8?q?=E3=82=B9=E3=82=92=E3=82=A2=E3=83=B3=E3=83=9E=E3=83=8D=E3=83=BC?= =?utf8?q?=E3=82=B8=E3=83=89=E3=83=9D=E3=82=A4=E3=83=B3=E3=82=BF=E7=B5=8C?= =?utf8?q?=E7=94=B1=E3=81=A8=E3=81=97=E3=81=A6=E9=AB=98=E9=80=9F=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- src/LibNMeCab/Core/CharInfo.cs | 4 +- src/LibNMeCab/Core/CharProperty.cs | 4 +- src/LibNMeCab/Core/Connector.cs | 56 ++++++------- src/LibNMeCab/Core/DoubleArray.cs | 152 ++++++++++------------------------ src/LibNMeCab/Core/MeCabDictionary.cs | 118 +++++++++++++++++--------- src/LibNMeCab/Core/StrUtils.cs | 65 +++++---------- src/LibNMeCab/Core/Token.cs | 12 +-- src/LibNMeCab/Core/Tokenizer.cs | 59 +++++++------ 8 files changed, 211 insertions(+), 259 deletions(-) diff --git a/src/LibNMeCab/Core/CharInfo.cs b/src/LibNMeCab/Core/CharInfo.cs index dfdab57..789207d 100644 --- a/src/LibNMeCab/Core/CharInfo.cs +++ b/src/LibNMeCab/Core/CharInfo.cs @@ -10,9 +10,9 @@ namespace NMeCab.Core { public struct CharInfo { - #region Const/Field/Property + #region Const/Field/Property - private readonly uint bits; + private uint bits; /// /// 互換カテゴリ diff --git a/src/LibNMeCab/Core/CharProperty.cs b/src/LibNMeCab/Core/CharProperty.cs index 6c89a81..5973b7a 100644 --- a/src/LibNMeCab/Core/CharProperty.cs +++ b/src/LibNMeCab/Core/CharProperty.cs @@ -32,8 +32,8 @@ namespace NMeCab.Core { string fileName = Path.Combine(dicDir, CharPropertyFile); - using (FileStream stream = new FileStream(fileName, FileMode.Open, FileAccess.Read)) - using (BinaryReader reader = new BinaryReader(stream)) + using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read)) + using (var reader = new BinaryReader(stream)) { this.Open(reader, fileName); } diff --git a/src/LibNMeCab/Core/Connector.cs b/src/LibNMeCab/Core/Connector.cs index e34ebcc..70843b2 100644 --- a/src/LibNMeCab/Core/Connector.cs +++ b/src/LibNMeCab/Core/Connector.cs @@ -20,7 +20,8 @@ namespace NMeCab.Core #if MMF_MTX private MemoryMappedFile mmf; - private MemoryMappedViewAccessor matrix; + private MemoryMappedViewAccessor mmva; + private unsafe short* matrix; #else private short[] matrix; #endif @@ -40,36 +41,33 @@ namespace NMeCab.Core } #if MMF_MTX - - public void Open(string fileName) + public unsafe void Open(string fileName) { - //MMFインスタンスを生成するが、後でDisposeするために保持しておく - this.mmf = MemoryMappedFile.CreateFromFile(fileName, FileMode.Open, - null, 0L, MemoryMappedFileAccess.Read); - this.Open(this.mmf); - } + this.mmf = MemoryMappedFile.CreateFromFile(fileName, FileMode.Open, null, 0L, MemoryMappedFileAccess.Read); + this.mmva = this.mmf.CreateViewAccessor(0L, 0L, MemoryMappedFileAccess.Read); - public void Open(MemoryMappedFile mmf) - { - using (MemoryMappedViewStream stream = mmf.CreateViewStream( - 0L, 0L, MemoryMappedFileAccess.Read)) - using (BinaryReader reader = new BinaryReader(stream)) + byte* ptr = null; + this.mmva.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr); + + using (var stream = mmf.CreateViewStream(0L, 0L, MemoryMappedFileAccess.Read)) + using (var reader = new BinaryReader(stream)) { this.LSize = reader.ReadUInt16(); this.RSize = reader.ReadUInt16(); - long offset = stream.Position; - long size = this.LSize * this.RSize * sizeof(short); - this.matrix = mmf.CreateViewAccessor(offset, size, MemoryMappedFileAccess.Read); + long fSize = stream.Position + sizeof(short) * this.LSize * this.RSize; + if (this.mmva.Capacity < fSize) + throw new MeCabInvalidFileException("file size is invalid", fileName); + + ptr += stream.Position; + this.matrix = (short*)ptr; } } - #else - public void Open(string fileName) { - using (FileStream stream = new FileStream(fileName, FileMode.Open, FileAccess.Read)) - using (BinaryReader reader = new BinaryReader(stream)) + using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read)) + using (var reader = new BinaryReader(stream)) { this.Open(reader, fileName); } @@ -89,22 +87,16 @@ namespace NMeCab.Core if (reader.BaseStream.ReadByte() != -1) throw new MeCabInvalidFileException("file size is invalid", fileName); } - #endif #endregion #region Cost - public int Cost(MeCabNode lNode, MeCabNode rNode) + public unsafe int Cost(MeCabNode lNode, MeCabNode rNode) { int pos = lNode.RCAttr + this.LSize * rNode.LCAttr; - -#if MMF_MTX - return this.matrix.ReadInt16(pos * sizeof(short)) + rNode.WCost; -#else return this.matrix[pos] + rNode.WCost; -#endif } #endregion @@ -129,8 +121,14 @@ namespace NMeCab.Core if (disposing) { #if MMF_MTX - if (this.mmf != null) this.mmf.Dispose(); - if (this.matrix != null) this.matrix.Dispose(); + if (this.mmva != null) + { + this.mmva.SafeMemoryMappedViewHandle.ReleasePointer(); + this.mmva.Dispose(); + } + + if (this.mmf != null) + this.mmf.Dispose(); #endif } diff --git a/src/LibNMeCab/Core/DoubleArray.cs b/src/LibNMeCab/Core/DoubleArray.cs index ee133ff..56fb04e 100644 --- a/src/LibNMeCab/Core/DoubleArray.cs +++ b/src/LibNMeCab/Core/DoubleArray.cs @@ -5,9 +5,8 @@ using System; using System.Collections.Generic; using System.Text; +#if !MMF_DIC using System.IO; -#if MMF_DIC -using System.IO.MemoryMappedFiles; #endif namespace NMeCab.Core @@ -15,37 +14,30 @@ namespace NMeCab.Core /// /// Double-Array Trie の実装 /// - public class DoubleArray : IDisposable + public class DoubleArray { #region Array private struct Unit { - public readonly int Base; - public readonly uint Check; - - public Unit(int b, uint c) - { - this.Base = b; - this.Check = c; - } +#pragma warning disable 0649 + public int Base; + public uint Check; +#pragma warning restore 0649 } public const int UnitSize = sizeof(int) + sizeof(uint); #if MMF_DIC - private MemoryMappedViewAccessor accessor; + private unsafe Unit* array; public int Size { - get { return (int)(this.accessor.Capacity) / UnitSize; } + get { return this.TotalSize / UnitSize; } } - public int TotalSize - { - get { return (int)(this.accessor.Capacity); } - } + public int TotalSize { get; private set; } #else @@ -69,20 +61,25 @@ namespace NMeCab.Core #if MMF_DIC - public void Open(MemoryMappedFile mmf, long offset, long size) + public unsafe void Open(byte* ptr, int size) { - this.accessor = mmf.CreateViewAccessor(offset, size, MemoryMappedFileAccess.Read); + this.array = (Unit*)ptr; + this.TotalSize = size; } #else - public void Open(BinaryReader reader, uint size) + public void Open(BinaryReader reader, int size) { this.array = new Unit[size / UnitSize]; for (int i = 0; i < array.Length; i++) { - this.array[i] = new Unit(reader.ReadInt32(), reader.ReadUInt32()); + this.array[i] = new Unit() + { + Base = reader.ReadInt32(), + Check = reader.ReadUInt32() + }; } } @@ -95,71 +92,60 @@ namespace NMeCab.Core public struct ResultPair { public int Value; - public int Length; - - public ResultPair(int r, int t) - { - this.Value = r; - this.Length = t; - } - } - - public unsafe void ExactMatchSearch(byte* key, ResultPair* result, int len, int nodePos) - { - *result = this.ExactMatchSearch(key, len, nodePos); } public unsafe ResultPair ExactMatchSearch(byte* key, int len, int nodePos) { - int b = this.ReadBase(nodePos); - Unit p; + int b = this.array[nodePos].Base; + int p; for (int i = 0; i < len; i++) { - this.ReadUnit(b + key[i] + 1, out p); - if (b == p.Check) + p = b + key[i] + 1; + if (b == this.array[p].Check) { - b = p.Base; + b = this.array[p].Base; } else { - return new ResultPair(-1, 0); + return new ResultPair() { Value = -1, Length = 0 }; } } - this.ReadUnit(b, out p); - int n = p.Base; - if (b == p.Check && n < 0) + p = b; + int n = this.array[b].Base; + if (b == this.array[p].Check && n < 0) { - return new ResultPair(-n - 1, len); + return new ResultPair() { Value = -n - 1, Length = 0 }; } - return new ResultPair(-1, 0); + return new ResultPair() { Value = -1, Length = 0 }; } public unsafe int CommonPrefixSearch(byte* key, ResultPair* result, int resultLen, int len, int nodePos = 0) { - int b = this.ReadBase(nodePos); + int b = this.array[nodePos].Base; int num = 0; int n; - Unit p; + int p; for (int i = 0; i < len; i++) { - this.ReadUnit(b, out p); - n = p.Base; + p = b; + n = this.array[p].Base; - if (b == p.Check && n < 0) + if (b == this.array[p].Check && n < 0) { - if (num < resultLen) result[num] = new ResultPair(-n - 1, i); + if (num < resultLen) + result[num] = new ResultPair() { Value = -n - 1, Length = i }; num++; } - this.ReadUnit(b + key[i] + 1, out p); - if (b == p.Check) + p = b + key[i] + 1; + if (b == this.array[p].Check) { - b = p.Base; + b = this.array[p].Base; } else { @@ -167,69 +153,19 @@ namespace NMeCab.Core } } - this.ReadUnit(b, out p); - n = p.Base; + p = b; + n = this.array[p].Base; - if (b == p.Check && n < 0) + if (b == this.array[p].Check && n < 0) { - if (num < resultLen) result[num] = new ResultPair(-n - 1, len); + if (num < resultLen) + result[num] = new ResultPair() { Value = -n - 1, Length = len }; num++; } return num; } - - - private int ReadBase(int pos) - { -#if MMF_DIC - return this.accessor.ReadInt32(pos * UnitSize); -#else - return this.array[pos].Base; -#endif - } - - private void ReadUnit(int pos, out Unit unit) - { -#if MMF_DIC - this.accessor.Read(pos * UnitSize, out unit); -#else - unit = this.array[pos]; -#endif - } - - #endregion - - #region Dispose - - private bool disposed; - - public void Dispose() - { - this.Dispose(true); - GC.SuppressFinalize(this); - } - - protected virtual void Dispose(bool disposing) - { - if (disposed) return; - - if (disposing) - { -#if MMF_DIC - if (this.accessor != null) this.accessor.Dispose(); -#endif - } - - this.disposed = true; - } - - ~DoubleArray() - { - this.Dispose(false); - } - #endregion } } diff --git a/src/LibNMeCab/Core/MeCabDictionary.cs b/src/LibNMeCab/Core/MeCabDictionary.cs index 77158c2..8dbd609 100644 --- a/src/LibNMeCab/Core/MeCabDictionary.cs +++ b/src/LibNMeCab/Core/MeCabDictionary.cs @@ -21,14 +21,15 @@ namespace NMeCab.Core #if MMF_DIC private MemoryMappedFile mmf; - private MemoryMappedViewAccessor tokens; - private MemoryMappedViewAccessor features; + private MemoryMappedViewAccessor mmva; + private unsafe Token* tokens; + private unsafe byte* features; #else private Token[] tokens; private byte[] features; #endif - private DoubleArray da = new DoubleArray(); + private readonly DoubleArray da = new DoubleArray(); private Encoding encoding; @@ -73,28 +74,26 @@ namespace NMeCab.Core #if MMF_DIC - public void Open(string filePath) + public unsafe void Open(string fileName) { - this.mmf = MemoryMappedFile.CreateFromFile(filePath, FileMode.Open, - null, 0L, MemoryMappedFileAccess.Read); - this.Open(this.mmf, filePath); - } + this.FileName = fileName; - public void Open(MemoryMappedFile mmf, string filePath = null) - { - this.FileName = filePath; + this.mmf = MemoryMappedFile.CreateFromFile(fileName, FileMode.Open, null, 0L, MemoryMappedFileAccess.Read); + this.mmva = mmf.CreateViewAccessor(0L, 0L, MemoryMappedFileAccess.Read); - using (MemoryMappedViewStream stream = mmf.CreateViewStream( - 0L, 0L, MemoryMappedFileAccess.Read)) - using (BinaryReader reader = new BinaryReader(stream)) + byte* ptr = null; + this.mmva.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr); + + using (var stream = this.mmf.CreateViewStream(0L, 0L, MemoryMappedFileAccess.Read)) + using (var reader = new BinaryReader(stream)) { uint magic = reader.ReadUInt32(); - if (stream.CanSeek && stream.Length < (magic ^ DictionaryMagicID)) //正確なサイズ取得ができないので不等号で代用 - throw new MeCabInvalidFileException("dictionary file is broken", filePath); + if (this.mmva.Capacity < (magic ^ DictionaryMagicID)) + throw new MeCabInvalidFileException("dictionary file is broken", fileName); this.Version = reader.ReadUInt32(); if (this.Version != DicVersion) - throw new MeCabInvalidFileException("incompatible version", filePath); + throw new MeCabInvalidFileException("incompatible version", fileName); this.Type = (DictionaryType)reader.ReadUInt32(); this.LexSize = reader.ReadUInt32(); @@ -108,22 +107,25 @@ namespace NMeCab.Core string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); this.encoding = StrUtils.GetEncoding(charSet); - long offset = stream.Position; - this.da.Open(mmf, offset, dSize); - offset += dSize; - this.tokens = mmf.CreateViewAccessor(offset, tSize, MemoryMappedFileAccess.Read); - offset += tSize; - this.features = mmf.CreateViewAccessor(offset, fSize, MemoryMappedFileAccess.Read); + ptr += stream.Position; + + this.da.Open(ptr, (int)dSize); + ptr += dSize; + + this.tokens = (Token*)ptr; + ptr += tSize; + + this.features = ptr; } } #else - public void Open(string filePath) + public void Open(string fileName) { - this.FileName = filePath; - - using (FileStream fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read)) + this.FileName = fileName; + + using (FileStream fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read)) using (BinaryReader reader = new BinaryReader(fileStream)) { this.Open(reader); @@ -153,7 +155,7 @@ namespace NMeCab.Core string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); this.encoding = StrUtils.GetEncoding(charSet); - this.da.Open(reader, dSize); + this.da.Open(reader, (int)dSize); this.tokens = new Token[tSize / sizeof(Token)]; for (int i = 0; i < this.tokens.Length; i++) @@ -187,7 +189,7 @@ namespace NMeCab.Core byte* bytes = stackalloc byte[maxByteCount]; int bytesLen = this.encoding.GetBytes(key, len, bytes, maxByteCount); - DoubleArray.ResultPair result = this.da.ExactMatchSearch(bytes, bytesLen, nodePos); + var result = this.da.ExactMatchSearch(bytes, bytesLen, nodePos); //文字数をデコードしたものに変換 result.Length = this.encoding.GetCharCount(bytes, result.Length); @@ -218,19 +220,49 @@ namespace NMeCab.Core #region Get Infomation - public unsafe Token[] GetToken(DoubleArray.ResultPair n) + public int GetTokenSize(int value) { - Token[] dist = new Token[0xFF & n.Value]; - int tokenPos = n.Value >> 8; + return 0xFF & value; + } + + public int GetTokenPos(int value) + { + return value >> 8; + } + #if MMF_DIC - this.tokens.ReadArray(tokenPos * sizeof(Token), dist, 0, dist.Length); + public unsafe Token* GetTokens(int value) + { + return this.tokens + this.GetTokenPos(value); + } + + public unsafe Token[] GetTokensArray(int value) + { + var ret = new Token[this.GetTokenSize(value)]; + var t = this.GetTokens(value); + + for (int i = 0; i < ret.Length; i++) + { + ret[i] = t[i]; + } + + return ret; + } #else - Array.Copy(this.tokens, tokenPos, dist, 0, dist.Length); -#endif - return dist; + public ArraySegment GetTokens(int value) + { + return new ArraySegment(this.tokens, this.GetTokenPos(value), this.GetTokenSize(value)); + } + + public Token[] GetTokensArray(int value) + { + var ret = new Token[this.GetTokenSize(value)]; + Array.Copy(this.tokens, this.GetTokenPos(value), ret, 0, ret.Length); + return ret; } +#endif - public string GetFeature(uint featurePos) + public unsafe string GetFeature(uint featurePos) { return StrUtils.GetString(this.features, (long)featurePos, this.encoding); } @@ -268,11 +300,15 @@ namespace NMeCab.Core if (disposing) { - if (this.da != null) this.da.Dispose(); #if MMF_DIC - if (this.mmf != null) this.mmf.Dispose(); - if (this.tokens != null) this.tokens.Dispose(); - if (this.features != null) this.features.Dispose(); + if (this.mmva != null) + { + this.mmva.SafeMemoryMappedViewHandle.ReleasePointer(); + this.mmva.Dispose(); + } + + if (this.mmf != null) + this.mmf.Dispose(); #endif } diff --git a/src/LibNMeCab/Core/StrUtils.cs b/src/LibNMeCab/Core/StrUtils.cs index da7a6d2..22c276e 100644 --- a/src/LibNMeCab/Core/StrUtils.cs +++ b/src/LibNMeCab/Core/StrUtils.cs @@ -38,7 +38,7 @@ namespace NMeCab.Core public unsafe static string GetString(byte[] bytes, long offset, Encoding enc) { fixed (byte* pBytes = bytes) - return StrUtils.GetString(pBytes + offset, enc); + return StrUtils.GetString(pBytes, offset, enc); } /// @@ -47,6 +47,20 @@ namespace NMeCab.Core /// /// バイト配列の長さはInt32.MaxValueを超えていても良い。 /// + /// デコードするバイトへのポインタ + /// オフセット位置 + /// 文字エンコーディング + /// 文字列(\0は含まない) + public unsafe static string GetString(byte* bytes, long offset, Encoding enc) + { + return StrUtils.GetString(bytes + offset, enc); + } + /// + /// バイト配列の中から終端が\0で表された文字列を取り出す。 + /// + /// + /// バイト配列の長さはInt32.MaxValueを超えていても良い。 + /// /// デコードする最初のバイトへのポインタ /// 文字エンコーディング /// 文字列(\0は含まない) @@ -54,12 +68,13 @@ namespace NMeCab.Core { //バイト長のカウント int byteCount = 0; - while (*bytes != Nul) //終端\0に到達するまでシーク + while (bytes[byteCount] != Nul) //終端\0に到達するまでシーク { checked { byteCount++; } //文字列のバイト長がInt32.MaxValueを超えたならエラー - bytes++; } - bytes -= byteCount; + + if (byteCount == 0) + return ""; //生成されうる最大文字数のバッファを確保 int maxCharCount = enc.GetMaxCharCount(byteCount); @@ -71,48 +86,6 @@ namespace NMeCab.Core } } -#if MMF_DIC - - /// - /// MemoryMappedViewAccessorから終端が\0で表された文字列を取り出す。 - /// - /// - /// MemoryMappedViewAccessorの容量はInt32.MaxValueを超えていても良い。 - /// - /// MemoryMappedViewAccessor - /// オフセット位置 - /// 文字エンコーディング - /// 内部で使用するバッファの初期サイズ - /// 文字列(\0は含まない) - public static string GetString(MemoryMappedViewAccessor accessor, long offset, Encoding enc, - int buffSize = 128) - { - byte[] buff = new byte[buffSize]; //IO回数削減のためのバッファ配列 - accessor.ReadArray(offset, buff, 0, buffSize); //初期読込 - - //バイト長のカウント - int byteCount = 0; - while (buff[byteCount] != Nul) //終端\0に到達するまでシーク - { - byteCount++; - - if (byteCount == buffSize) //バッファ配列の終端 - { - //バッファ配列の拡張と追加読込 - checked { buffSize *= 2; } //Int32.MaxValueを超えたならエラー - byte[] newBuff = new byte[buffSize]; - Buffer.BlockCopy(buff, 0, newBuff, 0, byteCount); - accessor.ReadArray(offset + byteCount, newBuff, byteCount, buffSize - byteCount); - buff = newBuff; - } - } - - //バッファ配列を文字列にデコード - return enc.GetString(buff, 0, byteCount); - } - -#endif - /// /// 指定の名前に対応するエンコーディングを取得する(.NET FWが対応していない名前にもアドホックに対応) /// diff --git a/src/LibNMeCab/Core/Token.cs b/src/LibNMeCab/Core/Token.cs index 29cba0f..2d9a316 100644 --- a/src/LibNMeCab/Core/Token.cs +++ b/src/LibNMeCab/Core/Token.cs @@ -16,32 +16,32 @@ namespace NMeCab.Core /// /// 右文脈 id /// - public ushort LcAttr { get; set; } + public ushort LcAttr; /// /// 左文脈 id /// - public ushort RcAttr { get; set; } + public ushort RcAttr; /// /// 形態素 ID /// - public ushort PosId { get; set; } + public ushort PosId; /// /// 単語生起コスト /// - public short WCost { get; set; } + public short WCost; /// /// 素性情報の位置 /// - public uint Feature { get; set; } + public uint Feature; /// /// reserved for noun compound /// - public uint Compound { get; set; } + public uint Compound; #endregion diff --git a/src/LibNMeCab/Core/Tokenizer.cs b/src/LibNMeCab/Core/Tokenizer.cs index a03cd96..a857be4 100644 --- a/src/LibNMeCab/Core/Tokenizer.cs +++ b/src/LibNMeCab/Core/Tokenizer.cs @@ -55,7 +55,7 @@ namespace NMeCab.Core if (this.unkDic.Type != DictionaryType.Unk) throw new MeCabInvalidFileException("not a unk dictionary", this.unkDic.FileName); - MeCabDictionary sysDic = new MeCabDictionary(); + var sysDic = new MeCabDictionary(); sysDic.Open(Path.Combine(prefix, SysDicFile)); if (sysDic.Type != DictionaryType.Sys) throw new MeCabInvalidFileException("not a system dictionary", sysDic.FileName); @@ -63,7 +63,7 @@ namespace NMeCab.Core for (int i = 0; i < param.UserDic.Length; i++) { - MeCabDictionary d = new MeCabDictionary(); + var d = new MeCabDictionary(); d.Open(Path.Combine(prefix, param.UserDic[i])); if (d.Type != DictionaryType.Usr) throw new MeCabInvalidFileException("not a user dictionary", d.FileName); @@ -76,10 +76,11 @@ namespace NMeCab.Core for (int i = 0; i < this.unkTokens.Length; i++) { string key = this.property.Name(i); - DoubleArray.ResultPair n = this.unkDic.ExactMatchSearch(key); + var n = this.unkDic.ExactMatchSearch(key); if (n.Value == -1) throw new MeCabInvalidFileException("cannot find UNK category: " + key, this.unkDic.FileName); - this.unkTokens[i] = this.unkDic.GetToken(n); + + this.unkTokens[i] = this.unkDic.GetTokensArray(n.Value); } this.space = this.property.GetCharInfo(' '); @@ -111,28 +112,34 @@ namespace NMeCab.Core if (end - begin > ushort.MaxValue) end = begin + ushort.MaxValue; char* begin2 = property.SeekToOtherType(begin, end, this.space, &cInfo, &cLen); - DoubleArray.ResultPair* daResults = stackalloc DoubleArray.ResultPair[DAResultSize]; + var daResults = stackalloc DoubleArray.ResultPair[DAResultSize]; foreach (MeCabDictionary it in this.dic) { int n = it.CommonPrefixSearch(begin2, (int)(end - begin2), daResults, DAResultSize); - for (int i = 0; i < n; i++) { - Token[] token = it.GetToken(daResults[i]); - for (int j = 0; j < token.Length; j++) +#if MMF_DIC + var tokenSize = it.GetTokenSize(daResults->Value); + var tokens = it.GetTokens(daResults->Value); + for (int j = 0; j < tokenSize; j++) +#else + var seg = it.GetTokens(daResults->Value); + var tokens = seg.Array; + for (int j = seg.Offset; j < seg.Offset + seg.Count; j++) +#endif { - MeCabNode newNode = this.GetNewNode(); - this.ReadNodeInfo(it, token[j], newNode); - //newNode.Token = token[j]; - newNode.Length = daResults[i].Length; - newNode.RLength = (int)(begin2 - begin) + daResults[i].Length; - newNode.Surface = new string(begin2, 0, daResults[i].Length); + var newNode = this.GetNewNode(); + this.ReadNodeInfo(it, tokens[j], newNode); + newNode.Length = daResults->Length; + newNode.RLength = (int)(begin2 - begin) + daResults->Length; + newNode.Surface = new string(begin2, 0, newNode.Length); newNode.Stat = MeCabNodeStat.Nor; newNode.CharType = cInfo.DefaultType; newNode.BNext = resultNode; resultNode = newNode; } + daResults++; } } @@ -186,18 +193,18 @@ namespace NMeCab.Core private unsafe void AddUnknown(ref MeCabNode resultNode, CharInfo cInfo, char* begin, char* begin2, char* begin3) { - Token[] token = this.unkTokens[cInfo.DefaultType]; + var token = this.unkTokens[cInfo.DefaultType]; for (int i = 0; i < token.Length; i++) { - MeCabNode newNode = this.GetNewNode(); + var newNode = this.GetNewNode(); this.ReadNodeInfo(this.unkDic, token[i], newNode); - newNode.CharType = cInfo.DefaultType; - newNode.Surface = new string(begin2, 0, (int)(begin3 - begin2)); newNode.Length = (int)(begin3 - begin2); newNode.RLength = (int)(begin3 - begin); - newNode.BNext = resultNode; + newNode.Surface = new string(begin2, 0, newNode.Length); + newNode.CharType = cInfo.DefaultType; newNode.Stat = MeCabNodeStat.Unk; if (this.unkFeature != null) newNode.Feature = this.unkFeature; + newNode.BNext = resultNode; resultNode = newNode; } } @@ -208,7 +215,7 @@ namespace NMeCab.Core public MeCabNode GetBosNode() { - MeCabNode bosNode = this.GetNewNode(); + var bosNode = this.GetNewNode(); bosNode.Surface = BosKey; // dummy bosNode.Feature = this.bosFeature; bosNode.IsBest = true; @@ -218,14 +225,14 @@ namespace NMeCab.Core public MeCabNode GetEosNode() { - MeCabNode eosNode = this.GetBosNode(); // same + var eosNode = this.GetBosNode(); // same eosNode.Stat = MeCabNodeStat.Eos; return eosNode; } public MeCabNode GetNewNode() { - MeCabNode node = new MeCabNode(); + var node = new MeCabNode(); #if NeedId node.Id = Tokenizer.id++; #endif @@ -251,10 +258,12 @@ namespace NMeCab.Core if (disposing) { if (this.dic != null) - foreach (MeCabDictionary d in this.dic) - if (d != null) d.Dispose(); + foreach (var d in this.dic) + if (d != null) + d.Dispose(); - if (this.unkDic != null) this.unkDic.Dispose(); + if (this.unkDic != null) + this.unkDic.Dispose(); } this.disposed = true; -- 2.11.0