2 * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
22 #ifndef BidiResolver_h
23 #define BidiResolver_h
25 #include "BidiContext.h"
26 #include <wtf/Noncopyable.h>
27 #include <wtf/PassRefPtr.h>
28 #include <wtf/Vector.h>
32 template <class Iterator> struct MidpointState {
42 betweenMidpoints = false;
45 // The goal is to reuse the line state across multiple
46 // lines so we just keep an array around for midpoints and never clear it across multiple
47 // lines. We track the number of items and position using the two other variables.
48 Vector<Iterator> midpoints;
49 unsigned numMidpoints;
50 unsigned currentMidpoint;
51 bool betweenMidpoints;
54 // The BidiStatus at a given position (typically the end of a line) can
55 // be cached and then used to restart bidi resolution at that position.
58 : eor(WTF::Unicode::OtherNeutral)
59 , lastStrong(WTF::Unicode::OtherNeutral)
60 , last(WTF::Unicode::OtherNeutral)
64 BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext)
66 , lastStrong(lastStrongDir)
68 , context(bidiContext)
72 WTF::Unicode::Direction eor;
73 WTF::Unicode::Direction lastStrong;
74 WTF::Unicode::Direction last;
75 RefPtr<BidiContext> context;
78 inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
80 return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
83 inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
85 return !(status1 == status2);
88 struct BidiCharacterRun {
89 BidiCharacterRun(int start, int stop, BidiContext* context, WTF::Unicode::Direction dir)
92 , m_override(context->override())
95 if (dir == WTF::Unicode::OtherNeutral)
98 m_level = context->level();
100 // add level of run (cases I1 & I2)
102 if (dir == WTF::Unicode::LeftToRight || dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
105 if (dir == WTF::Unicode::RightToLeft)
107 else if (dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
112 void destroy() { delete this; }
114 int start() const { return m_start; }
115 int stop() const { return m_stop; }
116 unsigned char level() const { return m_level; }
117 bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
118 bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
120 BidiCharacterRun* next() const { return m_next; }
122 unsigned char m_level;
126 BidiCharacterRun* m_next;
129 template <class Iterator, class Run> class BidiResolver {
130 WTF_MAKE_NONCOPYABLE(BidiResolver);
133 : m_direction(WTF::Unicode::OtherNeutral)
134 , reachedEndOfLine(false)
138 , m_logicallyLastRun(0)
143 const Iterator& position() const { return current; }
144 void setPosition(const Iterator& position) { current = position; }
146 void increment() { current.increment(); }
148 BidiContext* context() const { return m_status.context.get(); }
149 void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
151 void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; }
152 void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; }
153 void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; }
155 WTF::Unicode::Direction dir() const { return m_direction; }
156 void setDir(WTF::Unicode::Direction d) { m_direction = d; }
158 const BidiStatus& status() const { return m_status; }
159 void setStatus(const BidiStatus s) { m_status = s; }
161 MidpointState<Iterator>& midpointState() { return m_midpointState; }
163 void embed(WTF::Unicode::Direction);
164 bool commitExplicitEmbedding();
166 void createBidiRunsForLine(const Iterator& end, bool visualOrder = false, bool hardLineBreak = false);
168 Run* firstRun() const { return m_firstRun; }
169 Run* lastRun() const { return m_lastRun; }
170 Run* logicallyLastRun() const { return m_logicallyLastRun; }
171 unsigned runCount() const { return m_runCount; }
174 void prependRun(Run*);
176 void moveRunToEnd(Run*);
177 void moveRunToBeginning(Run*);
183 void reverseRuns(unsigned start, unsigned end);
190 WTF::Unicode::Direction m_direction;
192 bool reachedEndOfLine;
193 Iterator lastBeforeET;
198 Run* m_logicallyLastRun;
200 MidpointState<Iterator> m_midpointState;
203 void raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to);
204 void lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from);
205 void checkDirectionInLowerRaiseEmbeddingLevel();
207 Vector<WTF::Unicode::Direction, 8> m_currentExplicitEmbeddingSequence;
210 template <class Iterator, class Run>
211 inline void BidiResolver<Iterator, Run>::addRun(Run* run)
216 m_lastRun->m_next = run;
221 template <class Iterator, class Run>
222 inline void BidiResolver<Iterator, Run>::prependRun(Run* run)
224 ASSERT(!run->m_next);
229 run->m_next = m_firstRun;
234 template <class Iterator, class Run>
235 inline void BidiResolver<Iterator, Run>::moveRunToEnd(Run* run)
242 Run* next = m_firstRun;
243 while (next != run) {
245 next = current->next();
249 m_firstRun = run->next();
251 current->m_next = run->m_next;
254 m_lastRun->m_next = run;
258 template <class Iterator, class Run>
259 inline void BidiResolver<Iterator, Run>::moveRunToBeginning(Run* run)
263 ASSERT(run != m_firstRun);
265 Run* current = m_firstRun;
266 Run* next = current->next();
267 while (next != run) {
269 next = current->next();
272 current->m_next = run->m_next;
273 if (run == m_lastRun)
276 run->m_next = m_firstRun;
280 template <class Iterator, class Run>
281 void BidiResolver<Iterator, Run>::appendRun()
283 if (!emptyRun && !eor.atEnd()) {
284 unsigned startOffset = sor.offset();
285 unsigned endOffset = eor.offset();
287 if (!endOfLine.atEnd() && endOffset >= endOfLine.offset()) {
288 reachedEndOfLine = true;
289 endOffset = endOfLine.offset();
292 if (endOffset >= startOffset)
293 addRun(new Run(startOffset, endOffset + 1, context(), m_direction));
299 m_direction = WTF::Unicode::OtherNeutral;
300 m_status.eor = WTF::Unicode::OtherNeutral;
303 template <class Iterator, class Run>
304 void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction d)
306 using namespace WTF::Unicode;
308 ASSERT(d == PopDirectionalFormat || d == LeftToRightEmbedding || d == LeftToRightOverride || d == RightToLeftEmbedding || d == RightToLeftOverride);
309 m_currentExplicitEmbeddingSequence.append(d);
312 template <class Iterator, class Run>
313 void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel()
315 using namespace WTF::Unicode;
317 ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
318 ASSERT(m_status.last != NonSpacingMark
319 && m_status.last != BoundaryNeutral
320 && m_status.last != RightToLeftEmbedding
321 && m_status.last != LeftToRightEmbedding
322 && m_status.last != RightToLeftOverride
323 && m_status.last != LeftToRightOverride
324 && m_status.last != PopDirectionalFormat);
325 if (m_direction == OtherNeutral)
326 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
329 template <class Iterator, class Run>
330 void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from)
332 using namespace WTF::Unicode;
334 if (!emptyRun && eor != last) {
335 checkDirectionInLowerRaiseEmbeddingLevel();
336 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
337 if (from == LeftToRight) {
338 // bidi.sor ... bidi.eor ... bidi.last L
339 if (m_status.eor == EuropeanNumber) {
340 if (m_status.lastStrong != LeftToRight) {
341 m_direction = EuropeanNumber;
344 } else if (m_status.eor == ArabicNumber) {
345 m_direction = ArabicNumber;
347 } else if (m_status.lastStrong != LeftToRight) {
349 m_direction = LeftToRight;
351 } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
353 m_direction = RightToLeft;
359 // sor for the new run is determined by the higher level (rule X10)
361 setLastStrongDir(from);
365 template <class Iterator, class Run>
366 void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to)
368 using namespace WTF::Unicode;
370 if (!emptyRun && eor != last) {
371 checkDirectionInLowerRaiseEmbeddingLevel();
372 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
373 if (to == LeftToRight) {
374 // bidi.sor ... bidi.eor ... bidi.last L
375 if (m_status.eor == EuropeanNumber) {
376 if (m_status.lastStrong != LeftToRight) {
377 m_direction = EuropeanNumber;
380 } else if (m_status.eor == ArabicNumber) {
381 m_direction = ArabicNumber;
383 } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) {
385 m_direction = LeftToRight;
387 } else if (m_status.eor == ArabicNumber
388 || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft))
389 || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) {
391 m_direction = RightToLeft;
398 setLastStrongDir(to);
402 template <class Iterator, class Run>
403 bool BidiResolver<Iterator, Run>::commitExplicitEmbedding()
405 using namespace WTF::Unicode;
407 unsigned char fromLevel = context()->level();
408 RefPtr<BidiContext> toContext = context();
410 for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) {
411 Direction embedding = m_currentExplicitEmbeddingSequence[i];
412 if (embedding == PopDirectionalFormat) {
413 if (BidiContext* parentContext = toContext->parent())
414 toContext = parentContext;
416 Direction direction = (embedding == RightToLeftEmbedding || embedding == RightToLeftOverride) ? RightToLeft : LeftToRight;
417 bool override = embedding == LeftToRightOverride || embedding == RightToLeftOverride;
418 unsigned char level = toContext->level();
419 if (direction == RightToLeft) {
420 // Go to the least greater odd integer
424 // Go to the least greater even integer
429 toContext = BidiContext::create(level, direction, override, toContext.get());
433 unsigned char toLevel = toContext->level();
435 if (toLevel > fromLevel)
436 raiseExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight);
437 else if (toLevel < fromLevel)
438 lowerExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight);
440 setContext(toContext);
442 m_currentExplicitEmbeddingSequence.clear();
444 return fromLevel != toLevel;
447 template <class Iterator, class Run>
448 void BidiResolver<Iterator, Run>::deleteRuns()
454 Run* curr = m_firstRun;
456 Run* s = curr->next();
466 template <class Iterator, class Run>
467 void BidiResolver<Iterator, Run>::reverseRuns(unsigned start, unsigned end)
472 ASSERT(end < m_runCount);
474 // Get the item before the start of the runs to reverse and put it in
475 // |beforeStart|. |curr| should point to the first run to reverse.
476 Run* curr = m_firstRun;
477 Run* beforeStart = 0;
485 Run* startRun = curr;
491 Run* afterEnd = curr->next();
495 Run* newNext = afterEnd;
498 Run* next = curr->next();
499 curr->m_next = newNext;
505 // Now hook up beforeStart and afterEnd to the startRun and endRun.
507 beforeStart->m_next = endRun;
511 startRun->m_next = afterEnd;
513 m_lastRun = startRun;
516 template <class Iterator, class Run>
517 void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, bool visualOrder, bool hardLineBreak)
519 using namespace WTF::Unicode;
521 ASSERT(m_direction == OtherNeutral);
528 bool pastEnd = false;
529 BidiResolver<Iterator, Run> stateAtEnd;
532 Direction dirCurrent;
533 if (pastEnd && (hardLineBreak || current.atEnd())) {
534 BidiContext* c = context();
537 dirCurrent = c->dir();
539 // A deviation from the Unicode Bidi Algorithm in order to match
540 // Mac OS X text and WinIE: a hard line break resets bidi state.
541 stateAtEnd.setContext(c);
542 stateAtEnd.setEorDir(dirCurrent);
543 stateAtEnd.setLastDir(dirCurrent);
544 stateAtEnd.setLastStrongDir(dirCurrent);
547 dirCurrent = current.direction();
548 if (context()->override()
549 && dirCurrent != RightToLeftEmbedding
550 && dirCurrent != LeftToRightEmbedding
551 && dirCurrent != RightToLeftOverride
552 && dirCurrent != LeftToRightOverride
553 && dirCurrent != PopDirectionalFormat)
554 dirCurrent = context()->dir();
555 else if (dirCurrent == NonSpacingMark)
556 dirCurrent = m_status.last;
559 ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
560 switch (dirCurrent) {
562 // embedding and overrides (X1-X9 in the Bidi specs)
563 case RightToLeftEmbedding:
564 case LeftToRightEmbedding:
565 case RightToLeftOverride:
566 case LeftToRightOverride:
567 case PopDirectionalFormat:
569 commitExplicitEmbedding();
574 switch(m_status.last) {
576 case RightToLeftArabic:
579 if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight)
584 case EuropeanNumberSeparator:
585 case EuropeanNumberTerminator:
586 case CommonNumberSeparator:
587 case BoundaryNeutral:
589 case SegmentSeparator:
590 case WhiteSpaceNeutral:
592 if (m_status.eor == EuropeanNumber) {
593 if (m_status.lastStrong != LeftToRight) {
594 // the numbers need to be on a higher embedding level, so let's close that run
595 m_direction = EuropeanNumber;
597 if (context()->dir() != LeftToRight) {
598 // the neutrals take the embedding direction, which is R
600 m_direction = RightToLeft;
604 } else if (m_status.eor == ArabicNumber) {
605 // Arabic numbers are always on a higher embedding level, so let's close that run
606 m_direction = ArabicNumber;
608 if (context()->dir() != LeftToRight) {
609 // the neutrals take the embedding direction, which is R
611 m_direction = RightToLeft;
614 } else if (m_status.lastStrong != LeftToRight) {
615 //last stuff takes embedding dir
616 if (context()->dir() == RightToLeft) {
618 m_direction = RightToLeft;
626 m_status.eor = LeftToRight;
627 m_status.lastStrong = LeftToRight;
628 m_direction = LeftToRight;
630 case RightToLeftArabic:
632 switch (m_status.last) {
638 case RightToLeftArabic:
640 case EuropeanNumberSeparator:
641 case EuropeanNumberTerminator:
642 case CommonNumberSeparator:
643 case BoundaryNeutral:
645 case SegmentSeparator:
646 case WhiteSpaceNeutral:
648 if (m_status.eor == EuropeanNumber) {
649 if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight)
652 } else if (m_status.eor == ArabicNumber)
654 else if (m_status.lastStrong == LeftToRight) {
655 if (context()->dir() == LeftToRight)
663 m_status.eor = RightToLeft;
664 m_status.lastStrong = dirCurrent;
665 m_direction = RightToLeft;
671 if (m_status.lastStrong != RightToLeftArabic) {
672 // if last strong was AL change EN to AN
673 switch (m_status.last) {
678 case RightToLeftArabic:
682 m_direction = EuropeanNumber;
684 case EuropeanNumberSeparator:
685 case CommonNumberSeparator:
686 if (m_status.eor == EuropeanNumber)
688 case EuropeanNumberTerminator:
689 case BoundaryNeutral:
691 case SegmentSeparator:
692 case WhiteSpaceNeutral:
694 if (m_status.eor == EuropeanNumber) {
695 if (m_status.lastStrong == RightToLeft) {
696 // ENs on both sides behave like Rs, so the neutrals should be R.
697 // Terminate the EN run.
700 eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
701 m_direction = RightToLeft;
703 // Begin a new EN run.
704 m_direction = EuropeanNumber;
706 } else if (m_status.eor == ArabicNumber) {
707 // Terminate the AN run.
709 if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) {
711 eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
712 m_direction = RightToLeft;
714 // Begin a new EN run.
715 m_direction = EuropeanNumber;
717 } else if (m_status.lastStrong == RightToLeft) {
718 // Extend the R run to include the neutrals.
719 eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
720 m_direction = RightToLeft;
722 // Begin a new EN run.
723 m_direction = EuropeanNumber;
729 m_status.eor = EuropeanNumber;
730 if (m_direction == OtherNeutral)
731 m_direction = LeftToRight;
735 dirCurrent = ArabicNumber;
736 switch (m_status.last) {
738 if (context()->dir() == LeftToRight)
744 case RightToLeftArabic:
749 case CommonNumberSeparator:
750 if (m_status.eor == ArabicNumber)
752 case EuropeanNumberSeparator:
753 case EuropeanNumberTerminator:
754 case BoundaryNeutral:
756 case SegmentSeparator:
757 case WhiteSpaceNeutral:
759 if (m_status.eor == ArabicNumber
760 || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft))
761 || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) {
762 // Terminate the run before the neutrals.
764 // Begin an R run for the neutrals.
765 m_direction = RightToLeft;
766 } else if (m_direction == OtherNeutral)
767 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
774 m_status.eor = ArabicNumber;
775 if (m_direction == OtherNeutral)
776 m_direction = ArabicNumber;
778 case EuropeanNumberSeparator:
779 case CommonNumberSeparator:
781 case EuropeanNumberTerminator:
782 if (m_status.last == EuropeanNumber) {
783 dirCurrent = EuropeanNumber;
785 m_status.eor = dirCurrent;
786 } else if (m_status.last != EuropeanNumberTerminator)
787 lastBeforeET = emptyRun ? eor : last;
790 // boundary neutrals should be ignored
791 case BoundaryNeutral:
797 // ### what do we do with newline and paragraph seperators that come to here?
799 case SegmentSeparator:
800 // ### implement rule L1
802 case WhiteSpaceNeutral:
810 if (pastEnd && eor == current) {
811 if (!reachedEndOfLine) {
813 switch (m_status.eor) {
817 m_direction = m_status.eor;
820 m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber;
828 m_status = stateAtEnd.m_status;
829 sor = stateAtEnd.sor;
830 eor = stateAtEnd.eor;
831 last = stateAtEnd.last;
832 reachedEndOfLine = stateAtEnd.reachedEndOfLine;
833 lastBeforeET = stateAtEnd.lastBeforeET;
834 emptyRun = stateAtEnd.emptyRun;
835 m_direction = OtherNeutral;
839 // set m_status.last as needed.
840 switch (dirCurrent) {
841 case EuropeanNumberTerminator:
842 if (m_status.last != EuropeanNumber)
843 m_status.last = EuropeanNumberTerminator;
845 case EuropeanNumberSeparator:
846 case CommonNumberSeparator:
847 case SegmentSeparator:
848 case WhiteSpaceNeutral:
850 switch(m_status.last) {
853 case RightToLeftArabic:
856 m_status.last = dirCurrent;
859 m_status.last = OtherNeutral;
863 case BoundaryNeutral:
864 case RightToLeftEmbedding:
865 case LeftToRightEmbedding:
866 case RightToLeftOverride:
867 case LeftToRightOverride:
868 case PopDirectionalFormat:
874 m_status.last = dirCurrent;
885 if (!m_currentExplicitEmbeddingSequence.isEmpty()) {
886 bool committed = commitExplicitEmbedding();
887 if (committed && pastEnd) {
889 m_status = stateAtEnd.m_status;
890 sor = stateAtEnd.sor;
891 eor = stateAtEnd.eor;
892 last = stateAtEnd.last;
893 reachedEndOfLine = stateAtEnd.reachedEndOfLine;
894 lastBeforeET = stateAtEnd.lastBeforeET;
895 emptyRun = stateAtEnd.emptyRun;
896 m_direction = OtherNeutral;
901 if (!pastEnd && (current == end || current.atEnd())) {
904 stateAtEnd.m_status = m_status;
905 stateAtEnd.sor = sor;
906 stateAtEnd.eor = eor;
907 stateAtEnd.last = last;
908 stateAtEnd.reachedEndOfLine = reachedEndOfLine;
909 stateAtEnd.lastBeforeET = lastBeforeET;
910 stateAtEnd.emptyRun = emptyRun;
916 m_logicallyLastRun = m_lastRun;
918 // reorder line according to run structure...
919 // do not reverse for visually ordered web sites
922 // first find highest and lowest levels
923 unsigned char levelLow = 128;
924 unsigned char levelHigh = 0;
927 if (r->m_level > levelHigh)
928 levelHigh = r->m_level;
929 if (r->m_level < levelLow)
930 levelLow = r->m_level;
934 // implements reordering of the line (L2 according to Bidi spec):
935 // L2. From the highest level found in the text to the lowest odd level on each line,
936 // reverse any contiguous sequence of characters that are at that level or higher.
938 // reversing is only done up to the lowest odd level
942 unsigned count = runCount() - 1;
944 while (levelHigh >= levelLow) {
946 Run* currRun = firstRun();
948 while (i < count && currRun && currRun->m_level < levelHigh) {
950 currRun = currRun->next();
953 while (i <= count && currRun && currRun->m_level >= levelHigh) {
955 currRun = currRun->next();
957 unsigned end = i - 1;
958 reverseRuns(start, end);
963 endOfLine = Iterator();
966 } // namespace WebCore
968 #endif // BidiResolver_h