2 * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "TextCheckingHelper.h"
30 #include "DocumentMarkerController.h"
32 #include "TextCheckerClient.h"
33 #include "TextIterator.h"
34 #include "VisiblePosition.h"
35 #include "visible_units.h"
39 static PassRefPtr<Range> expandToParagraphBoundary(PassRefPtr<Range> range)
42 RefPtr<Range> paragraphRange = range->cloneRange(ec);
43 setStart(paragraphRange.get(), startOfParagraph(range->startPosition()));
44 setEnd(paragraphRange.get(), endOfParagraph(range->endPosition()));
45 return paragraphRange;
48 TextCheckingParagraph::TextCheckingParagraph(PassRefPtr<Range> checkingRange)
49 : m_checkingRange(checkingRange)
52 , m_checkingLength(-1)
56 TextCheckingParagraph::~TextCheckingParagraph()
60 void TextCheckingParagraph::expandRangeToNextEnd()
62 ASSERT(m_checkingRange);
63 setEnd(paragraphRange().get(), endOfParagraph(startOfNextParagraph(paragraphRange()->startPosition())));
64 invalidateParagraphRangeValues();
67 void TextCheckingParagraph::invalidateParagraphRangeValues()
69 m_checkingStart = m_checkingEnd = -1;
74 int TextCheckingParagraph::rangeLength() const
76 ASSERT(m_checkingRange);
77 return TextIterator::rangeLength(paragraphRange().get());
80 PassRefPtr<Range> TextCheckingParagraph::paragraphRange() const
82 ASSERT(m_checkingRange);
83 if (!m_paragraphRange)
84 m_paragraphRange = expandToParagraphBoundary(checkingRange());
85 return m_paragraphRange;
88 PassRefPtr<Range> TextCheckingParagraph::subrange(int characterOffset, int characterCount) const
90 ASSERT(m_checkingRange);
91 return TextIterator::subrange(paragraphRange().get(), characterOffset, characterCount);
94 int TextCheckingParagraph::offsetTo(const Position& position, ExceptionCode& ec) const
96 ASSERT(m_checkingRange);
97 RefPtr<Range> range = offsetAsRange();
98 range->setEnd(position.containerNode(), position.computeOffsetInContainerNode(), ec);
101 return TextIterator::rangeLength(range.get());
104 bool TextCheckingParagraph::isEmpty() const
106 // Both predicates should have same result, but we check both just for sure.
107 // We need to investigate to remove this redundancy.
108 return isRangeEmpty() || isTextEmpty();
111 PassRefPtr<Range> TextCheckingParagraph::offsetAsRange() const
113 ASSERT(m_checkingRange);
114 if (!m_offsetAsRange) {
115 ExceptionCode ec = 0;
116 m_offsetAsRange = Range::create(paragraphRange()->startContainer(ec)->document(), paragraphRange()->startPosition(), checkingRange()->startPosition());
119 return m_offsetAsRange;
122 const String& TextCheckingParagraph::text() const
124 ASSERT(m_checkingRange);
125 if (m_text.isEmpty())
126 m_text = plainText(paragraphRange().get());
130 int TextCheckingParagraph::checkingStart() const
132 ASSERT(m_checkingRange);
133 if (m_checkingStart == -1)
134 m_checkingStart = TextIterator::rangeLength(offsetAsRange().get());
135 return m_checkingStart;
138 int TextCheckingParagraph::checkingEnd() const
140 ASSERT(m_checkingRange);
141 if (m_checkingEnd == -1)
142 m_checkingEnd = checkingStart() + TextIterator::rangeLength(checkingRange().get());
143 return m_checkingEnd;
146 int TextCheckingParagraph::checkingLength() const
148 ASSERT(m_checkingRange);
149 if (-1 == m_checkingLength)
150 m_checkingLength = TextIterator::rangeLength(checkingRange().get());
151 return m_checkingLength;
154 TextCheckingHelper::TextCheckingHelper(EditorClient* client, PassRefPtr<Range> range)
158 ASSERT_ARG(m_client, m_client);
159 ASSERT_ARG(m_range, m_range);
162 TextCheckingHelper::~TextCheckingHelper()
166 String TextCheckingHelper::findFirstMisspelling(int& firstMisspellingOffset, bool markAll, RefPtr<Range>& firstMisspellingRange)
168 WordAwareIterator it(m_range.get());
169 firstMisspellingOffset = 0;
171 String firstMisspelling;
172 int currentChunkOffset = 0;
174 while (!it.atEnd()) {
175 const UChar* chars = it.characters();
176 int len = it.length();
178 // Skip some work for one-space-char hunks
179 if (!(len == 1 && chars[0] == ' ')) {
181 int misspellingLocation = -1;
182 int misspellingLength = 0;
183 m_client->textChecker()->checkSpellingOfString(chars, len, &misspellingLocation, &misspellingLength);
185 // 5490627 shows that there was some code path here where the String constructor below crashes.
186 // We don't know exactly what combination of bad input caused this, so we're making this much
187 // more robust against bad input on release builds.
188 ASSERT(misspellingLength >= 0);
189 ASSERT(misspellingLocation >= -1);
190 ASSERT(!misspellingLength || misspellingLocation >= 0);
191 ASSERT(misspellingLocation < len);
192 ASSERT(misspellingLength <= len);
193 ASSERT(misspellingLocation + misspellingLength <= len);
195 if (misspellingLocation >= 0 && misspellingLength > 0 && misspellingLocation < len && misspellingLength <= len && misspellingLocation + misspellingLength <= len) {
197 // Compute range of misspelled word
198 RefPtr<Range> misspellingRange = TextIterator::subrange(m_range.get(), currentChunkOffset + misspellingLocation, misspellingLength);
200 // Remember first-encountered misspelling and its offset.
201 if (!firstMisspelling) {
202 firstMisspellingOffset = currentChunkOffset + misspellingLocation;
203 firstMisspelling = String(chars + misspellingLocation, misspellingLength);
204 firstMisspellingRange = misspellingRange;
207 // Store marker for misspelled word.
208 ExceptionCode ec = 0;
209 misspellingRange->startContainer(ec)->document()->markers()->addMarker(misspellingRange.get(), DocumentMarker::Spelling);
212 // Bail out if we're marking only the first misspelling, and not all instances.
218 currentChunkOffset += len;
222 return firstMisspelling;
225 String TextCheckingHelper::findFirstMisspellingOrBadGrammar(bool checkGrammar, bool& outIsSpelling, int& outFirstFoundOffset, GrammarDetail& outGrammarDetail)
227 #if USE(UNIFIED_TEXT_CHECKING)
228 String firstFoundItem;
229 String misspelledWord;
230 String badGrammarPhrase;
231 ExceptionCode ec = 0;
233 // Initialize out parameters; these will be updated if we find something to return.
234 outIsSpelling = true;
235 outFirstFoundOffset = 0;
236 outGrammarDetail.location = -1;
237 outGrammarDetail.length = 0;
238 outGrammarDetail.guesses.clear();
239 outGrammarDetail.userDescription = "";
241 // Expand the search range to encompass entire paragraphs, since text checking needs that much context.
242 // Determine the character offset from the start of the paragraph to the start of the original search range,
243 // since we will want to ignore results in this area.
244 RefPtr<Range> paragraphRange = m_range->cloneRange(ec);
245 setStart(paragraphRange.get(), startOfParagraph(m_range->startPosition()));
246 int totalRangeLength = TextIterator::rangeLength(paragraphRange.get());
247 setEnd(paragraphRange.get(), endOfParagraph(m_range->startPosition()));
249 RefPtr<Range> offsetAsRange = Range::create(paragraphRange->startContainer(ec)->document(), paragraphRange->startPosition(), m_range->startPosition());
250 int rangeStartOffset = TextIterator::rangeLength(offsetAsRange.get());
251 int totalLengthProcessed = 0;
253 bool firstIteration = true;
254 bool lastIteration = false;
255 while (totalLengthProcessed < totalRangeLength) {
256 // Iterate through the search range by paragraphs, checking each one for spelling and grammar.
257 int currentLength = TextIterator::rangeLength(paragraphRange.get());
258 int currentStartOffset = firstIteration ? rangeStartOffset : 0;
259 int currentEndOffset = currentLength;
260 if (inSameParagraph(paragraphRange->startPosition(), m_range->endPosition())) {
261 // Determine the character offset from the end of the original search range to the end of the paragraph,
262 // since we will want to ignore results in this area.
263 RefPtr<Range> endOffsetAsRange = Range::create(paragraphRange->startContainer(ec)->document(), paragraphRange->startPosition(), m_range->endPosition());
264 currentEndOffset = TextIterator::rangeLength(endOffsetAsRange.get());
265 lastIteration = true;
267 if (currentStartOffset < currentEndOffset) {
268 String paragraphString = plainText(paragraphRange.get());
269 if (paragraphString.length() > 0) {
270 bool foundGrammar = false;
271 int spellingLocation = 0;
272 int grammarPhraseLocation = 0;
273 int grammarDetailLocation = 0;
274 unsigned grammarDetailIndex = 0;
276 Vector<TextCheckingResult> results;
277 TextCheckingTypeMask checkingTypes = checkGrammar ? (TextCheckingTypeSpelling | TextCheckingTypeGrammar) : TextCheckingTypeSpelling;
278 m_client->textChecker()->checkTextOfParagraph(paragraphString.characters(), paragraphString.length(), checkingTypes, results);
280 for (unsigned i = 0; i < results.size(); i++) {
281 const TextCheckingResult* result = &results[i];
282 if (result->type == TextCheckingTypeSpelling && result->location >= currentStartOffset && result->location + result->length <= currentEndOffset) {
283 ASSERT(result->length > 0 && result->location >= 0);
284 spellingLocation = result->location;
285 misspelledWord = paragraphString.substring(result->location, result->length);
286 ASSERT(misspelledWord.length());
289 if (checkGrammar && result->type == TextCheckingTypeGrammar && result->location < currentEndOffset && result->location + result->length > currentStartOffset) {
290 ASSERT(result->length > 0 && result->location >= 0);
291 // We can't stop after the first grammar result, since there might still be a spelling result after
292 // it begins but before the first detail in it, but we can stop if we find a second grammar result.
295 for (unsigned j = 0; j < result->details.size(); j++) {
296 const GrammarDetail* detail = &result->details[j];
297 ASSERT(detail->length > 0 && detail->location >= 0);
298 if (result->location + detail->location >= currentStartOffset && result->location + detail->location + detail->length <= currentEndOffset && (!foundGrammar || result->location + detail->location < grammarDetailLocation)) {
299 grammarDetailIndex = j;
300 grammarDetailLocation = result->location + detail->location;
305 grammarPhraseLocation = result->location;
306 outGrammarDetail = result->details[grammarDetailIndex];
307 badGrammarPhrase = paragraphString.substring(result->location, result->length);
308 ASSERT(badGrammarPhrase.length());
313 if (!misspelledWord.isEmpty() && (!checkGrammar || badGrammarPhrase.isEmpty() || spellingLocation <= grammarDetailLocation)) {
314 int spellingOffset = spellingLocation - currentStartOffset;
315 if (!firstIteration) {
316 RefPtr<Range> paragraphOffsetAsRange = Range::create(paragraphRange->startContainer(ec)->document(), m_range->startPosition(), paragraphRange->startPosition());
317 spellingOffset += TextIterator::rangeLength(paragraphOffsetAsRange.get());
319 outIsSpelling = true;
320 outFirstFoundOffset = spellingOffset;
321 firstFoundItem = misspelledWord;
324 if (checkGrammar && !badGrammarPhrase.isEmpty()) {
325 int grammarPhraseOffset = grammarPhraseLocation - currentStartOffset;
326 if (!firstIteration) {
327 RefPtr<Range> paragraphOffsetAsRange = Range::create(paragraphRange->startContainer(ec)->document(), m_range->startPosition(), paragraphRange->startPosition());
328 grammarPhraseOffset += TextIterator::rangeLength(paragraphOffsetAsRange.get());
330 outIsSpelling = false;
331 outFirstFoundOffset = grammarPhraseOffset;
332 firstFoundItem = badGrammarPhrase;
337 if (lastIteration || totalLengthProcessed + currentLength >= totalRangeLength)
339 VisiblePosition newParagraphStart = startOfNextParagraph(paragraphRange->endPosition());
340 setStart(paragraphRange.get(), newParagraphStart);
341 setEnd(paragraphRange.get(), endOfParagraph(newParagraphStart));
342 firstIteration = false;
343 totalLengthProcessed += currentLength;
345 return firstFoundItem;
347 ASSERT_NOT_REACHED();
348 UNUSED_PARAM(checkGrammar);
349 UNUSED_PARAM(outIsSpelling);
350 UNUSED_PARAM(outFirstFoundOffset);
351 UNUSED_PARAM(outGrammarDetail);
353 #endif // USE(UNIFIED_TEXT_CHECKING)
356 int TextCheckingHelper::findFirstGrammarDetail(const Vector<GrammarDetail>& grammarDetails, int badGrammarPhraseLocation, int /*badGrammarPhraseLength*/, int startOffset, int endOffset, bool markAll)
358 #if USE(GRAMMAR_CHECKING)
359 // Found some bad grammar. Find the earliest detail range that starts in our search range (if any).
360 // Optionally add a DocumentMarker for each detail in the range.
361 int earliestDetailLocationSoFar = -1;
362 int earliestDetailIndex = -1;
363 for (unsigned i = 0; i < grammarDetails.size(); i++) {
364 const GrammarDetail* detail = &grammarDetails[i];
365 ASSERT(detail->length > 0 && detail->location >= 0);
367 int detailStartOffsetInParagraph = badGrammarPhraseLocation + detail->location;
369 // Skip this detail if it starts before the original search range
370 if (detailStartOffsetInParagraph < startOffset)
373 // Skip this detail if it starts after the original search range
374 if (detailStartOffsetInParagraph >= endOffset)
378 RefPtr<Range> badGrammarRange = TextIterator::subrange(m_range.get(), badGrammarPhraseLocation - startOffset + detail->location, detail->length);
379 ExceptionCode ec = 0;
380 badGrammarRange->startContainer(ec)->document()->markers()->addMarker(badGrammarRange.get(), DocumentMarker::Grammar, detail->userDescription);
384 // Remember this detail only if it's earlier than our current candidate (the details aren't in a guaranteed order)
385 if (earliestDetailIndex < 0 || earliestDetailLocationSoFar > detail->location) {
386 earliestDetailIndex = i;
387 earliestDetailLocationSoFar = detail->location;
391 return earliestDetailIndex;
393 ASSERT_NOT_REACHED();
394 UNUSED_PARAM(grammarDetails);
395 UNUSED_PARAM(badGrammarPhraseLocation);
396 UNUSED_PARAM(startOffset);
397 UNUSED_PARAM(endOffset);
398 UNUSED_PARAM(markAll);
403 String TextCheckingHelper::findFirstBadGrammar(GrammarDetail& outGrammarDetail, int& outGrammarPhraseOffset, bool markAll)
405 ASSERT(WTF_USE_GRAMMAR_CHECKING);
406 // Initialize out parameters; these will be updated if we find something to return.
407 outGrammarDetail.location = -1;
408 outGrammarDetail.length = 0;
409 outGrammarDetail.guesses.clear();
410 outGrammarDetail.userDescription = "";
411 outGrammarPhraseOffset = 0;
413 String firstBadGrammarPhrase;
415 // Expand the search range to encompass entire paragraphs, since grammar checking needs that much context.
416 // Determine the character offset from the start of the paragraph to the start of the original search range,
417 // since we will want to ignore results in this area.
418 TextCheckingParagraph paragraph(m_range);
420 // Start checking from beginning of paragraph, but skip past results that occur before the start of the original search range.
422 while (startOffset < paragraph.checkingEnd()) {
423 Vector<GrammarDetail> grammarDetails;
424 int badGrammarPhraseLocation = -1;
425 int badGrammarPhraseLength = 0;
426 m_client->textChecker()->checkGrammarOfString(paragraph.textCharacters() + startOffset, paragraph.textLength() - startOffset, grammarDetails, &badGrammarPhraseLocation, &badGrammarPhraseLength);
428 if (!badGrammarPhraseLength) {
429 ASSERT(badGrammarPhraseLocation == -1);
433 ASSERT(badGrammarPhraseLocation >= 0);
434 badGrammarPhraseLocation += startOffset;
437 // Found some bad grammar. Find the earliest detail range that starts in our search range (if any).
438 int badGrammarIndex = findFirstGrammarDetail(grammarDetails, badGrammarPhraseLocation, badGrammarPhraseLength, paragraph.checkingStart(), paragraph.checkingEnd(), markAll);
439 if (badGrammarIndex >= 0) {
440 ASSERT(static_cast<unsigned>(badGrammarIndex) < grammarDetails.size());
441 outGrammarDetail = grammarDetails[badGrammarIndex];
444 // If we found a detail in range, then we have found the first bad phrase (unless we found one earlier but
445 // kept going so we could mark all instances).
446 if (badGrammarIndex >= 0 && firstBadGrammarPhrase.isEmpty()) {
447 outGrammarPhraseOffset = badGrammarPhraseLocation - paragraph.checkingStart();
448 firstBadGrammarPhrase = paragraph.textSubstring(badGrammarPhraseLocation, badGrammarPhraseLength);
450 // Found one. We're done now, unless we're marking each instance.
455 // These results were all between the start of the paragraph and the start of the search range; look
456 // beyond this phrase.
457 startOffset = badGrammarPhraseLocation + badGrammarPhraseLength;
460 return firstBadGrammarPhrase;
464 bool TextCheckingHelper::isUngrammatical(Vector<String>& guessesVector) const
466 ASSERT(WTF_USE_GRAMMAR_CHECKING);
471 if (!m_range || m_range->collapsed(ec))
474 // Returns true only if the passed range exactly corresponds to a bad grammar detail range. This is analogous
475 // to isSelectionMisspelled. It's not good enough for there to be some bad grammar somewhere in the range,
476 // or overlapping the range; the ranges must exactly match.
477 guessesVector.clear();
478 int grammarPhraseOffset;
480 GrammarDetail grammarDetail;
481 String badGrammarPhrase = const_cast<TextCheckingHelper*>(this)->findFirstBadGrammar(grammarDetail, grammarPhraseOffset, false);
483 // No bad grammar in these parts at all.
484 if (badGrammarPhrase.isEmpty())
487 // Bad grammar, but phrase (e.g. sentence) starts beyond start of range.
488 if (grammarPhraseOffset > 0)
491 ASSERT(grammarDetail.location >= 0 && grammarDetail.length > 0);
493 // Bad grammar, but start of detail (e.g. ungrammatical word) doesn't match start of range
494 if (grammarDetail.location + grammarPhraseOffset)
497 // Bad grammar at start of range, but end of bad grammar is before or after end of range
498 if (grammarDetail.length != TextIterator::rangeLength(m_range.get()))
501 // Update the spelling panel to be displaying this error (whether or not the spelling panel is on screen).
502 // This is necessary to make a subsequent call to [NSSpellChecker ignoreWord:inSpellDocumentWithTag:] work
503 // correctly; that call behaves differently based on whether the spelling panel is displaying a misspelling
504 // or a grammar error.
505 m_client->updateSpellingUIWithGrammarString(badGrammarPhrase, grammarDetail);
510 Vector<String> TextCheckingHelper::guessesForMisspelledOrUngrammaticalRange(bool checkGrammar, bool& misspelled, bool& ungrammatical) const
512 #if USE(UNIFIED_TEXT_CHECKING)
513 Vector<String> guesses;
516 ungrammatical = false;
518 if (!m_client || !m_range || m_range->collapsed(ec))
521 // Expand the range to encompass entire paragraphs, since text checking needs that much context.
522 TextCheckingParagraph paragraph(m_range);
523 if (paragraph.isEmpty())
526 Vector<TextCheckingResult> results;
527 TextCheckingTypeMask checkingTypes = checkGrammar ? (TextCheckingTypeSpelling | TextCheckingTypeGrammar) : TextCheckingTypeSpelling;
528 m_client->textChecker()->checkTextOfParagraph(paragraph.textCharacters(), paragraph.textLength(), checkingTypes, results);
530 for (unsigned i = 0; i < results.size(); i++) {
531 const TextCheckingResult* result = &results[i];
532 if (result->type == TextCheckingTypeSpelling && paragraph.checkingRangeMatches(result->location, result->length)) {
533 String misspelledWord = paragraph.checkingSubstring();
534 ASSERT(misspelledWord.length());
535 m_client->textChecker()->getGuessesForWord(misspelledWord, String(), guesses);
536 m_client->updateSpellingUIWithMisspelledWord(misspelledWord);
545 for (unsigned i = 0; i < results.size(); i++) {
546 const TextCheckingResult* result = &results[i];
547 if (result->type == TextCheckingTypeGrammar && paragraph.isCheckingRangeCoveredBy(result->location, result->length)) {
548 for (unsigned j = 0; j < result->details.size(); j++) {
549 const GrammarDetail* detail = &result->details[j];
550 ASSERT(detail->length > 0 && detail->location >= 0);
551 if (paragraph.checkingRangeMatches(result->location + detail->location, detail->length)) {
552 String badGrammarPhrase = paragraph.textSubstring(result->location, result->length);
553 ASSERT(badGrammarPhrase.length());
554 for (unsigned k = 0; k < detail->guesses.size(); k++)
555 guesses.append(detail->guesses[k]);
556 m_client->updateSpellingUIWithGrammarString(badGrammarPhrase, *detail);
557 ungrammatical = true;
565 ASSERT_NOT_REACHED();
566 UNUSED_PARAM(checkGrammar);
567 UNUSED_PARAM(misspelled);
568 UNUSED_PARAM(ungrammatical);
569 return Vector<String>();
570 #endif // USE(UNIFIED_TEXT_CHECKING)
574 void TextCheckingHelper::markAllMisspellings(RefPtr<Range>& firstMisspellingRange)
576 // Use the "markAll" feature of findFirstMisspelling. Ignore the return value and the "out parameter";
577 // all we need to do is mark every instance.
579 findFirstMisspelling(ignoredOffset, true, firstMisspellingRange);
582 void TextCheckingHelper::markAllBadGrammar()
584 ASSERT(WTF_USE_GRAMMAR_CHECKING);
585 // Use the "markAll" feature of ofindFirstBadGrammar. Ignore the return value and "out parameters"; all we need to
586 // do is mark every instance.
587 GrammarDetail ignoredGrammarDetail;
589 findFirstBadGrammar(ignoredGrammarDetail, ignoredOffset, true);