It looks like the word segments don't have valid font info where this occurs anyway, so descending doesn't help.
if (!text)\r
domNode->GetChildCount(&childCount);\r
\r
- if (fontStatus == FontInfo_NoInfo && childCount > 0) {\r
+ long nodeType = 0;\r
+ if (fontStatus == FontInfo_NoInfo && childCount > 0\r
+ // We never want to descend beneath word nodes,\r
+ // as word segments sometimes seem to double characters.\r
+ && domNode->GetType(&nodeType) == S_OK && nodeType != CPDDomNode_Word\r
+ ) {\r
// HACK: #2175: Reader 10.1 and later report FontInfo_NoInfo even when there is mixed font info.\r
// Therefore, we must assume FontInfo_MixedInfo.\r
fontStatus = FontInfo_MixedInfo;\r