1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 //go:generate stringer -type=Kind
6 //go:generate go run gen.go gen_common.go gen_trieval.go
8 // Package width provides functionality for handling different widths in text.
10 // Wide characters behave like ideographs; they tend to allow line breaks after
11 // each character and remain upright in vertical text layout. Narrow characters
12 // are kept together in words or runs that are rotated sideways in vertical text
15 // For more information, see http://unicode.org/reports/tr11/.
16 package width // import "golang.org/x/text/width"
21 "golang.org/x/text/transform"
25 // 1) Reduce table size by compressing blocks.
26 // 2) API proposition for computing display length
27 // (approximation, fixed pitch only).
28 // 3) Implement display length.
30 // Kind indicates the type of width property as defined in http://unicode.org/reports/tr11/.
34 // Neutral characters do not occur in legacy East Asian character sets.
37 // EastAsianAmbiguous characters that can be sometimes wide and sometimes
38 // narrow and require additional information not contained in the character
39 // code to further resolve their width.
42 // EastAsianWide characters are wide in its usual form. They occur only in
43 // the context of East Asian typography. These runes may have explicit
44 // halfwidth counterparts.
47 // EastAsianNarrow characters are narrow in its usual form. They often have
48 // fullwidth counterparts.
51 // Note: there exist Narrow runes that do not have fullwidth or wide
52 // counterparts, despite what the definition says (e.g. U+27E6).
54 // EastAsianFullwidth characters have a compatibility decompositions of type
55 // wide that map to a narrow counterpart.
58 // EastAsianHalfwidth characters have a compatibility decomposition of type
59 // narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON
63 // Note: there exist runes that have a halfwidth counterparts but that are
64 // classified as Ambiguous, rather than wide (e.g. U+2190).
67 // TODO: the generated tries need to return size 1 for invalid runes for the
68 // width to be computed correctly (each byte should render width 1)
70 var trie = newWidthTrie(0)
72 // Lookup reports the Properties of the first rune in b and the number of bytes
73 // of its UTF-8 encoding.
74 func Lookup(b []byte) (p Properties, size int) {
75 v, sz := trie.lookup(b)
76 return Properties{elem(v), b[sz-1]}, sz
79 // LookupString reports the Properties of the first rune in s and the number of
80 // bytes of its UTF-8 encoding.
81 func LookupString(s string) (p Properties, size int) {
82 v, sz := trie.lookupString(s)
83 return Properties{elem(v), s[sz-1]}, sz
86 // LookupRune reports the Properties of rune r.
87 func LookupRune(r rune) Properties {
89 n := utf8.EncodeRune(buf[:], r)
90 v, _ := trie.lookup(buf[:n])
92 if r >= utf8.RuneSelf {
93 last = 0x80 + byte(r&0x3f)
95 return Properties{elem(v), last}
98 // Properties provides access to width properties of a rune.
99 type Properties struct {
104 func (e elem) kind() Kind {
105 return Kind(e >> typeShift)
108 // Kind returns the Kind of a rune as defined in Unicode TR #11.
109 // See http://unicode.org/reports/tr11/ for more details.
110 func (p Properties) Kind() Kind {
114 // Folded returns the folded variant of a rune or 0 if the rune is canonical.
115 func (p Properties) Folded() rune {
116 if p.elem&tagNeedsFold != 0 {
117 buf := inverseData[byte(p.elem)]
118 buf[buf[0]] ^= p.last
119 r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
125 // Narrow returns the narrow variant of a rune or 0 if the rune is already
126 // narrow or doesn't have a narrow variant.
127 func (p Properties) Narrow() rune {
128 if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) {
129 buf := inverseData[byte(p.elem)]
130 buf[buf[0]] ^= p.last
131 r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
137 // Wide returns the wide variant of a rune or 0 if the rune is already
138 // wide or doesn't have a wide variant.
139 func (p Properties) Wide() rune {
140 if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) {
141 buf := inverseData[byte(p.elem)]
142 buf[buf[0]] ^= p.last
143 r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
149 // TODO for Properties:
150 // - Add Fullwidth/Halfwidth or Inverted methods for computing variants
152 // - Add width information (including information on non-spacing runes).
154 // Transformer implements the transform.Transformer interface.
155 type Transformer struct {
156 t transform.SpanningTransformer
159 // Reset implements the transform.Transformer interface.
160 func (t Transformer) Reset() { t.t.Reset() }
162 // Transform implements the transform.Transformer interface.
163 func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
164 return t.t.Transform(dst, src, atEOF)
167 // Span implements the transform.SpanningTransformer interface.
168 func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) {
169 return t.t.Span(src, atEOF)
172 // Bytes returns a new byte slice with the result of applying t to b.
173 func (t Transformer) Bytes(b []byte) []byte {
174 b, _, _ = transform.Bytes(t, b)
178 // String returns a string with the result of applying t to s.
179 func (t Transformer) String(s string) string {
180 s, _, _ = transform.String(t, s)
185 // Fold is a transform that maps all runes to their canonical width.
187 // Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm
188 // provide a more generic folding mechanism.
189 Fold Transformer = Transformer{foldTransform{}}
191 // Widen is a transform that maps runes to their wide variant, if
193 Widen Transformer = Transformer{wideTransform{}}
195 // Narrow is a transform that maps runes to their narrow variant, if
197 Narrow Transformer = Transformer{narrowTransform{}}
200 // TODO: Consider the following options:
201 // - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some
202 // generalized variant of this.
203 // - Consider a wide Won character to be the default width (or some generalized
205 // - Filter the set of characters that gets converted (the preferred approach is
206 // to allow applying filters to transforms).