3 This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
5 Copyright (C) 2007-2012 by Jin-Hwan Cho and Shunsaku Hirata,
6 the dvipdfmx project team.
8 Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
28 * Unicode and Glyph Names, ver. 2.3., Adobe Solution Network
29 * http://partners.adobe.com/asn/tech/type/unicodegn.jsp
55 static int verbose = 0;
58 agl_set_verbose (void)
68 agln = NEW(1, agl_name);
71 agln->n_components = 0;
72 agln->alternate = NULL;
79 agl_release_name (agl_name *agln)
84 next = agln->alternate;
88 RELEASE(agln->suffix);
96 agl_chop_suffix (const char *glyphname, char **suffix)
101 ASSERT(glyphname && suffix);
103 p = strchr(glyphname, '.');
105 len = strlen(glyphname) - strlen(p);
108 *suffix = NEW(strlen(glyphname), char);
109 strcpy(*suffix, glyphname+1);
112 name = NEW(len + 1, char);
113 strncpy(name, glyphname, len);
118 *suffix = NEW(strlen(p) + 1, char);
123 name = NEW(strlen(glyphname) + 1, char);
124 strcpy(name, glyphname);
131 static const char * const modifiers[] = {
132 "acute", "breve", "caron", "cedilla", "circumflex",
133 "dieresis", "dotaccent", "grave", "hungarumlaut",
134 "macron", "ogonek", "ring", "tilde", "commaaccent",
137 /* The following entries are not accent nor something
138 * but PS font may have those "small" version...
140 "ampersand", "exclam", "exclamdown",
141 "question","questiondown",
146 skip_capital (const char **p, const char *endptr)
150 len = (long) (endptr - (*p));
153 ((**p == 'A' && *(*p+1) == 'E') ||
154 (**p == 'O' && *(*p+1) == 'E'))) {
157 } else if (len >= 3 &&
163 } else if (len >= 5 &&
171 } else if (len >= 1 &&
172 **p >= 'A' && **p <= 'Z') {
181 skip_modifier (const char **p, const char *endptr)
186 len = (long) (endptr - (*p));
188 for (i = 0; modifiers[i] != NULL; i++) {
189 if ((len >= strlen(modifiers[i]) &&
190 !memcmp(*p, modifiers[i], len))) {
191 slen = strlen(modifiers[i]);
201 is_smallcap (const char *glyphname)
204 const char *p, *endptr;
210 len = strlen(glyphname);
212 strcmp(p + len - 5, "small"))
215 endptr = p + len - 5;
218 slen = skip_modifier(&p, endptr);
220 return 1; /* Acutesmall, Gravesmall, etc */
221 else if (slen > 0) { /* ??? */
225 len -= skip_capital(&p, endptr);
227 return 1; /* Asmall, AEsmall, etc */
230 while (len > 0) { /* allow multiple accent */
231 slen = skip_modifier(&p, endptr);
240 #define SUFFIX_LIST_MAX 16
241 #define AGL_VAR_SMCP_IDX 0
245 const char *suffixes[SUFFIX_LIST_MAX];
247 {"small" , "smcp", {"sc", NULL}},
248 {"swash" , "swsh", {NULL}},
249 {"superior" , "sups", {NULL}},
250 {"inferior" , "sinf", {NULL}},
251 {"numerator" , "numr", {NULL}},
252 {"denominator" , "dnom", {NULL}},
253 {"oldstyle" , "onum", {NULL}},
255 /* The following only used by TeX, there are no
256 * corresponding OTL feat. tag.
258 {"display" , NULL, {NULL}},
259 {"text" , NULL, {NULL}},
260 {"big" , NULL, {NULL}},
261 {"bigg" , NULL, {NULL}},
262 {"Big" , NULL, {NULL}},
263 {"Bigg" , NULL, {NULL}},
268 agl_suffix_to_otltag (const char *suffix)
272 for (i = 0; var_list[i].key; i++) {
273 for (j = 0; var_list[i].suffixes[j]; j++) {
274 if (!strcmp(suffix, var_list[i].suffixes[j]))
275 return var_list[i].otl_tag;
277 if (!strcmp(suffix, var_list[i].key))
278 return var_list[i].otl_tag;
279 if (var_list[i].otl_tag &&
280 !strcmp(suffix, var_list[i].otl_tag))
281 return var_list[i].otl_tag;
288 agl_guess_name (const char *glyphname)
292 if (is_smallcap(glyphname))
293 return AGL_VAR_SMCP_IDX;
295 len = strlen(glyphname);
296 for (i = 1; var_list[i].key != NULL; i++) {
297 if (len > strlen(var_list[i].key) &&
298 !strcmp(glyphname+len-strlen(var_list[i].key), var_list[i].key)
308 agl_normalized_name (char *glyphname)
317 agln = agl_new_name();
318 suffix = strchr(glyphname, '.');
320 n = strlen(glyphname) - strlen(suffix);
321 if (suffix[1] != '\0') {
322 agln->suffix = NEW(strlen(suffix), char);
323 strcpy(agln->suffix, suffix+1);
325 agln->name = NEW(n+1, char);
326 memcpy(agln->name, glyphname, n);
327 agln->name[n] = '\0';
328 } else if (is_smallcap(glyphname)) {
329 n = strlen(glyphname) - 5;
330 agln->suffix = NEW(3, char);
331 strcpy(agln->suffix, "sc");
332 agln->name = NEW(n+1, char);
333 for (i = 0; i < n; i++) {
334 agln->name[i] = isupper(glyphname[i]) ?
335 (glyphname[i] + 32) : glyphname[i];
337 agln->name[n] = '\0';
341 #define SET_STRING(p,s) do {\
342 (p) = NEW(strlen((s))+1, char);\
345 var_idx = agl_guess_name(glyphname);
347 !var_list[var_idx].key) {
348 n = strlen(glyphname);
350 n = strlen(glyphname) - strlen(var_list[var_idx].key);
351 if (var_list[var_idx].suffixes[0])
352 SET_STRING(agln->suffix, var_list[var_idx].suffixes[0]);
354 SET_STRING(agln->suffix, var_list[var_idx].key);
357 agln->name = NEW(n+1, char);
358 memcpy(agln->name, glyphname, n);
359 agln->name[n] = '\0';
365 static struct ht_table aglmap;
368 hval_free (void *hval)
370 agl_release_name((struct agl_name *) hval);
376 ht_init_table(&aglmap, hval_free);
377 agl_load_listfile(AGL_EXTRA_LISTFILE, 0);
378 if (agl_load_listfile(AGL_PREDEF_LISTFILE, 1) < 0) {
379 WARN("Failed to load AGL file \"%s\"...", AGL_PREDEF_LISTFILE);
381 if (agl_load_listfile(AGL_DEFAULT_LISTFILE, 0) < 0) {
382 WARN("Failed to load AGL file \"%s\"...", AGL_DEFAULT_LISTFILE);
389 ht_clear_table(&aglmap);
392 #define WBUF_SIZE 1024
395 agl_load_listfile (const char *filename, int is_predef)
398 const char *p, *endptr;
400 char wbuf[WBUF_SIZE];
406 fp = DPXFOPEN(filename, DPX_RES_TYPE_AGL);
412 MESG("<AGL:%s", filename);
414 while ((p = mfgets(wbuf, WBUF_SIZE, fp)) != NULL) {
415 agl_name *agln, *duplicate;
418 long unicodes[AGL_MAX_UNICODES];
420 endptr = p + strlen(p);
421 skip_white(&p, endptr);
423 /* Need table version check. */
424 if (!p || p[0] == '#' || p >= endptr)
426 nextptr = strchr(p, ';');
427 if (!nextptr || nextptr == p)
430 name = parse_ident(&p, nextptr);
432 skip_white(&p, endptr);
433 if (!name || p[0] != ';') {
434 WARN("Invalid AGL entry: %s", wbuf);
441 skip_white(&p, endptr);
445 ((p[0] >= '0' && p[0] <= '9') ||
446 (p[0] >= 'A' && p[0] <= 'F'))
449 if (n_unicodes >= AGL_MAX_UNICODES) {
450 WARN("Too many Unicode values");
453 unicodes[n_unicodes++] = strtol(p, &nextptr, 16);
456 skip_white(&p, endptr);
459 if (n_unicodes == 0) {
460 WARN("AGL entry ignored (no mapping): %s", wbuf);
465 agln = agl_normalized_name(name);
466 agln->is_predef = is_predef;
467 agln->n_components = n_unicodes;
468 for (i = 0; i < n_unicodes; i++) {
469 agln->unicodes[i] = unicodes[i];
472 duplicate = ht_lookup_table(&aglmap, name, strlen(name));
474 ht_append_table(&aglmap, name, strlen(name), agln);
476 while (duplicate->alternate)
477 duplicate = duplicate->alternate;
478 duplicate->alternate = agln;
483 MESG("agl: %s [%s.%s] -->", name, agln->name, agln->suffix);
485 MESG("agl: %s [%s] -->", name, agln->name);
486 for (i = 0; i < agln->n_components; i++) {
487 if (agln->unicodes[i] > 0xffff) {
488 MESG(" U+%06X", agln->unicodes[i]);
490 MESG(" U+%04X", agln->unicodes[i]);
508 agl_lookup_list (const char *glyphname)
515 agln = ht_lookup_table(&aglmap, glyphname, strlen(glyphname));
521 agl_name_is_unicode (const char *glyphname)
529 suffix = strchr(glyphname, '.');
530 len = (int) (suffix ? suffix - glyphname : strlen(glyphname));
532 * uni02ac is invalid glyph name and mapped to th empty string.
534 if (len >= 7 && (len - 3) % 4 == 0 &&
535 !strncmp(glyphname, "uni", 3)) {
538 * Check if the 4th character is uppercase hexadecimal digit.
539 * "union" should not be treated as Unicode glyph name.
541 if (isdigit(c) || (c >= 'A' && c <= 'F'))
545 } else if (len <= 7 && len >= 5 &&
546 glyphname[0] == 'u') {
547 for (i = 1; i < len - 1; i++) {
549 if (!isdigit(c) && (c < 'A' || c > 'F'))
559 agl_name_convert_unicode (const char *glyphname)
564 if (!agl_name_is_unicode(glyphname))
567 if (strlen(glyphname) > 7 && *(glyphname+7) != '.') {
568 WARN("Mapping to multiple Unicode characters not supported.");
572 if (glyphname[1] == 'n')
577 while (*p != '\0' && *p != '.') {
578 if (!isdigit(*p) && (*p < 'A' || *p > 'F')) {
579 WARN("Invalid char %c in Unicode glyph name %s.", *p, glyphname);
583 ucv += isdigit(*p) ? *p - '0' : *p - 'A' + 10;
587 if (!UC_is_valid(ucv)) {
589 WARN("Invalid Unicode code value U+%04X.", ucv);
591 WARN("Invalid Unicode code value U+%06X.", ucv);
602 xtol (const char *start, int len)
608 if (isdigit(*start)) {
610 } else if (*start >= 'A' && *start <= 'F') {
611 v += *start - 'A' + 10;
621 #define IS_PUA(u) (((u) >= 0x00E000L && (u) <= 0x00F8FFL) || \
622 ((u) >= 0x0F0000L && (u) <= 0x0FFFFDL) || \
623 ((u) >= 0x100000L && (u) <= 0x10FFFDL) \
627 put_unicode_glyph (const char *name,
628 unsigned char **dstpp, unsigned char *limptr)
638 ucv = xtol(p, strlen(p));
639 len += UC_sput_UTF16BE (ucv, dstpp, limptr);
644 len += UC_sput_UTF16BE (ucv, dstpp, limptr);
653 agl_sput_UTF16BE (const char *glyphstr,
654 unsigned char **dstpp, unsigned char *limptr,
659 const char *p, *endptr;
661 ASSERT(glyphstr && dstpp);
664 endptr = strchr(p, '.');
666 endptr = p + strlen(p);
673 agl_name *agln0, *agln1 = NULL;
675 delim = strchr(p, '_');
678 * Glyph names starting with a underscore or two subsequent
679 * underscore in glyph name not allowed?
681 WARN("Invalid glyph name component in \"%s\".", glyphstr);
685 return len; /* Cannot continue */
686 } else if (!delim || delim > endptr) {
689 sub_len = (long) (delim - p);
691 name = NEW(sub_len+1, char);
692 memcpy(name, p, sub_len);
693 name[sub_len] = '\0';
695 if (agl_name_is_unicode(name)) {
696 sub_len = put_unicode_glyph(name, dstpp, limptr);
703 agln1 = agl_lookup_list(name);
704 if (!agln1 || (agln1->n_components == 1 &&
705 IS_PUA(agln1->unicodes[0]))) {
706 agln0 = agl_normalized_name(name);
708 if (verbose > 1 && agln0->suffix) {
709 WARN("agl: fix %s --> %s.%s",
710 name, agln0->name, agln0->suffix);
712 agln1 = agl_lookup_list(agln0->name);
713 agl_release_name(agln0);
717 for (i = 0; i < agln1->n_components; i++) {
718 len += UC_sput_UTF16BE (agln1->unicodes[i], dstpp, limptr);
722 WARN("No Unicode mapping for glyph name \"%s\" found.", name);
737 agl_get_unicodes (const char *glyphstr,
738 long *unicodes, int max_unicodes)
741 const char *p, *endptr;
744 endptr = strchr(p, '.');
746 endptr = p + strlen(p);
753 agl_name *agln0, *agln1 = NULL;
755 delim = strchr(p, '_');
758 * Glyph names starting with a underscore or two subsequent
759 * underscore in glyph name not allowed?
761 WARN("Invalid glyph name component in \"%s\".", glyphstr);
762 return -1; /* Cannot continue */
763 } else if (!delim || delim > endptr) {
766 sub_len = (long) (delim - p);
768 name = NEW(sub_len+1, char);
769 memcpy(name, p, sub_len);
770 name[sub_len] = '\0';
772 if (agl_name_is_unicode(name)) {
774 if (p[1] != 'n') { /* uXXXXXXXX */
775 if (count >= max_unicodes) {
780 unicodes[count++] = xtol(p, strlen(p));
784 if (count >= max_unicodes) {
788 unicodes[count++] = xtol(p, 4);
793 agln1 = agl_lookup_list(name);
794 if (!agln1 || (agln1->n_components == 1 &&
795 IS_PUA(agln1->unicodes[0]))) {
796 agln0 = agl_normalized_name(name);
798 if (verbose > 1 && agln0->suffix) {
799 WARN("agl: fix %s --> %s.%s",
800 name, agln0->name, agln0->suffix);
802 agln1 = agl_lookup_list(agln0->name);
803 agl_release_name(agln0);
807 if (count + agln1->n_components > max_unicodes) {
811 for (i = 0; i < agln1->n_components; i++) {
812 unicodes[count++] = agln1->unicodes[i];
816 WARN("No Unicode mapping for glyph name \"%s\" found.", name);