// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package precis import "errors" // This file contains tables and code related to context rules. type catBitmap uint16 const ( // These bits, once set depending on the current value, are never unset. bJapanese catBitmap = 1 << iota bArabicIndicDigit bExtendedArabicIndicDigit // These bits are set on each iteration depending on the current value. bJoinStart bJoinMid bJoinEnd bVirama bLatinSmallL bGreek bHebrew // These bits indicated which of the permanent bits need to be set at the // end of the checks. bMustHaveJapn permanent = bJapanese | bArabicIndicDigit | bExtendedArabicIndicDigit | bMustHaveJapn ) const finalShift = 10 var errContext = errors.New("precis: contextual rule violated") func init() { // Programmatically set these required bits as, manually setting them seems // too error prone. for i, ct := range categoryTransitions { categoryTransitions[i].keep |= permanent categoryTransitions[i].accept |= ct.term } } var categoryTransitions = []struct { keep catBitmap // mask selecting which bits to keep from the previous state set catBitmap // mask for which bits to set for this transition // These bitmaps are used for rules that require lookahead. // term&accept == term must be true, which is enforced programmatically. term catBitmap // bits accepted as termination condition accept catBitmap // bits that pass, but not sufficient as termination // The rule function cannot take a *context as an argument, as it would // cause the context to escape, adding significant overhead. rule func(beforeBits catBitmap) (doLookahead bool, err error) }{ joiningL: {set: bJoinStart}, joiningD: {set: bJoinStart | bJoinEnd}, joiningT: {keep: bJoinStart, set: bJoinMid}, joiningR: {set: bJoinEnd}, viramaModifier: {set: bVirama}, viramaJoinT: {set: bVirama | bJoinMid}, latinSmallL: {set: bLatinSmallL}, greek: {set: bGreek}, greekJoinT: {set: bGreek | bJoinMid}, hebrew: {set: bHebrew}, hebrewJoinT: {set: bHebrew | bJoinMid}, japanese: {set: bJapanese}, katakanaMiddleDot: {set: bMustHaveJapn}, zeroWidthNonJoiner: { term: bJoinEnd, accept: bJoinMid, rule: func(before catBitmap) (doLookAhead bool, err error) { if before&bVirama != 0 { return false, nil } if before&bJoinStart == 0 { return false, errContext } return true, nil }, }, zeroWidthJoiner: { rule: func(before catBitmap) (doLookAhead bool, err error) { if before&bVirama == 0 { err = errContext } return false, err }, }, middleDot: { term: bLatinSmallL, rule: func(before catBitmap) (doLookAhead bool, err error) { if before&bLatinSmallL == 0 { return false, errContext } return true, nil }, }, greekLowerNumeralSign: { set: bGreek, term: bGreek, rule: func(before catBitmap) (doLookAhead bool, err error) { return true, nil }, }, hebrewPreceding: { set: bHebrew, rule: func(before catBitmap) (doLookAhead bool, err error) { if before&bHebrew == 0 { err = errContext } return false, err }, }, arabicIndicDigit: { set: bArabicIndicDigit, rule: func(before catBitmap) (doLookAhead bool, err error) { if before&bExtendedArabicIndicDigit != 0 { err = errContext } return false, err }, }, extendedArabicIndicDigit: { set: bExtendedArabicIndicDigit, rule: func(before catBitmap) (doLookAhead bool, err error) { if before&bArabicIndicDigit != 0 { err = errContext } return false, err }, }, }