src/dvipdfmx-pu/src/otl_conf.c

   1 /*
   2
   3     This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
   4
   5     Copyright (C) 2002-2012 by Jin-Hwan Cho and Shunsaku Hirata,
   6     the dvipdfmx project team.
   7
   8     This program is free software; you can redistribute it and/or modify
   9     it under the terms of the GNU General Public License as published by
  10     the Free Software Foundation; either version 2 of the License, or
  11     (at your option) any later version.
  12
  13     This program is distributed in the hope that it will be useful,
  14     but WITHOUT ANY WARRANTY; without even the implied warranty of
  15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16     GNU General Public License for more details.
  17
  18     You should have received a copy of the GNU General Public License
  19     along with this program; if not, write to the Free Software
  20     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  21 */
  22
  23 #ifdef  _HAVE_CONFIG_H
  24 #include "config.h"
  25 #endif /* _HAVE_CONFIG_H */
  26
  27 #include "system.h"
  28 #include "error.h"
  29 #include "mem.h"
  30 #include "dpxfile.h"
  31 #include "dpxutil.h"
  32
  33 #include "pdfobj.h"
  34 #include "pdfparse.h"
  35
  36 #include "agl.h"
  37
  38 #include "otl_conf.h"
  39
  40 #define VERBOSE_LEVEL_MIN 0
  41 static int verbose = 0;
  42 void
  43 otl_conf_set_verbose (void)
  44 {
  45   verbose++;
  46 }
  47
  48 static pdf_obj *
  49 parse_uc_coverage (pdf_obj *gclass, const char **pp, const char *endptr)
  50 {
  51   pdf_obj *coverage;
  52   pdf_obj *value;
  53   long     ucv = 0;
  54   char    *glyphname, *glyphclass;
  55
  56   if (*pp + 1 >= endptr)
  57     return NULL;
  58
  59   if (**pp == '[')
  60     (*pp)++;
  61
  62   coverage = pdf_new_array();
  63
  64   while (*pp < endptr) {
  65     skip_white(pp, endptr);
  66     switch (**pp) {
  67     case ']': case ';':
  68       (*pp)++;
  69       return coverage;
  70       break;
  71     case ',':
  72       (*pp)++;
  73       break;
  74     case '@':
  75       {
  76         pdf_obj *cvalues;
  77         int      i, size;
  78
  79         (*pp)++;
  80         glyphclass = parse_c_ident(pp, endptr);
  81         cvalues = pdf_lookup_dict(gclass, glyphclass);
  82         if (!cvalues)
  83           ERROR("%s not defined...", glyphclass);
  84         size    = pdf_array_length(cvalues);
  85         for (i = 0; i < size; i++) {
  86           pdf_add_array(coverage,
  87                         pdf_link_obj(pdf_get_array(cvalues, i)));
  88         }
  89       }
  90       break;
  91     default:
  92       glyphname  = parse_c_ident(pp, endptr);
  93       if (!glyphname)
  94         ERROR("Invalid Unicode character specified.");
  95
  96       skip_white(pp, endptr);
  97       if (*pp + 1 < endptr && **pp == '-') {
  98         value = pdf_new_array();
  99
 100         if (agl_get_unicodes(glyphname, &ucv, 1) != 1)
 101           ERROR("Invalid Unicode char: %s", glyphname);
 102         pdf_add_array(value, pdf_new_number(ucv));
 103         RELEASE(glyphname);
 104
 105         (*pp)++; skip_white(pp, endptr);
 106         glyphname = parse_c_ident(pp, endptr);
 107         if (!glyphname)
 108           ERROR("Invalid Unicode char: %s", glyphname);
 109         if (agl_get_unicodes(glyphname, &ucv, 1) != 1)
 110           ERROR("Invalid Unicode char: %s", glyphname);
 111         pdf_add_array(value, pdf_new_number(ucv));
 112         RELEASE(glyphname);
 113
 114       } else {
 115         if (agl_get_unicodes(glyphname, &ucv, 1) != 1)
 116           ERROR("Invalid Unicode char: %s", glyphname);
 117         value = pdf_new_number(ucv);
 118         RELEASE(glyphname);
 119       }
 120       pdf_add_array(coverage, value);
 121       break;
 122     }
 123     skip_white(pp, endptr);
 124   }
 125
 126   return coverage;
 127 }
 128
 129 static pdf_obj *parse_block (pdf_obj *gclass, const char **pp, const char *endptr);
 130
 131 static void
 132 add_rule (pdf_obj *rule, pdf_obj *gclass,
 133           char *first, char *second, char *suffix)
 134 {
 135   pdf_obj *glyph1, *glyph2;
 136 #define MAX_UNICODES 16
 137   long     unicodes[MAX_UNICODES];
 138   int      i, n_unicodes;
 139
 140   if (first[0] == '@') {
 141     glyph1 = pdf_lookup_dict(gclass, &first[1]);
 142     if (!glyph1) {
 143       WARN("No glyph class \"%s\" found.", &first[1]);
 144       return;
 145     }
 146     pdf_link_obj(glyph1);
 147
 148     if (verbose > VERBOSE_LEVEL_MIN) {
 149       MESG("otl_conf>> Output glyph sequence: %s\n", first);
 150     }
 151
 152   } else {
 153     n_unicodes = agl_get_unicodes(first, unicodes, MAX_UNICODES);
 154     if (n_unicodes < 1) {
 155       WARN("Failed to convert glyph \"%s\" to Unicode sequence.",
 156            first);
 157       return;
 158     }
 159     glyph1 = pdf_new_array();
 160
 161     if (verbose > VERBOSE_LEVEL_MIN) {
 162       MESG("otl_conf>> Output glyph sequence: %s ->", first);
 163     }
 164
 165     for (i = 0; i < n_unicodes; i++) {
 166       pdf_add_array(glyph1, pdf_new_number(unicodes[i]));
 167
 168       if (verbose > VERBOSE_LEVEL_MIN) {
 169         if (unicodes[i] < 0x10000) {
 170           MESG(" U+%04X", unicodes[i]);
 171         } else {
 172           MESG(" U+%06X", unicodes[i]);
 173         }
 174       }
 175     }
 176
 177     if (verbose > VERBOSE_LEVEL_MIN) {
 178       MESG("\n");
 179     }
 180   }
 181
 182   if (second[0] == '@') {
 183     glyph2 = pdf_lookup_dict(gclass, &second[1]);
 184     if (!glyph2) {
 185       WARN("No glyph class \"%s\" found.", &second[1]);
 186       return;
 187     }
 188     pdf_link_obj(glyph2);
 189
 190     if (verbose > VERBOSE_LEVEL_MIN) {
 191       MESG("otl_conf>> Input glyph sequence: %s (%s)\n", second, suffix);
 192     }
 193
 194   } else {
 195     n_unicodes = agl_get_unicodes(second, unicodes, 16);
 196     if (n_unicodes < 1) {
 197       WARN("Failed to convert glyph \"%s\" to Unicode sequence.",
 198            second);
 199       return;
 200     }
 201
 202     if (verbose > VERBOSE_LEVEL_MIN) {
 203       if (suffix)
 204         MESG("otl_conf>> Input glyph sequence: %s.%s ->", second, suffix);
 205       else
 206         MESG("otl_conf>> Input glyph sequence: %s ->", second);
 207     }
 208
 209     glyph2 = pdf_new_array();
 210     for (i = 0; i < n_unicodes; i++) {
 211       pdf_add_array(glyph2, pdf_new_number(unicodes[i]));
 212
 213       if (verbose > VERBOSE_LEVEL_MIN) {
 214         if (unicodes[i] < 0x10000) {
 215           MESG(" U+%04X", unicodes[i]);
 216         } else {
 217           MESG(" U+%06X", unicodes[i]);
 218         }
 219       }
 220     }
 221     if (verbose > VERBOSE_LEVEL_MIN) {
 222       MESG(" (%s)\n", suffix);
 223     }
 224   }
 225
 226   /* OK */
 227   if (suffix) {
 228     pdf_add_array(rule, pdf_new_string(suffix, strlen(suffix)));
 229   } else {
 230     pdf_add_array(rule, pdf_new_null());
 231   }
 232   pdf_add_array(rule, glyph1);
 233   pdf_add_array(rule, glyph2);
 234 }
 235
 236 static pdf_obj *
 237 parse_substrule (pdf_obj *gclass, const char **pp, const char *endptr)
 238 {
 239   pdf_obj *substrule;
 240   char    *token;
 241
 242   skip_white(pp, endptr);
 243   if (*pp < endptr && **pp == '{')
 244     (*pp)++;
 245
 246   skip_white(pp, endptr);
 247   if (*pp >= endptr)
 248     return NULL;
 249
 250   substrule = pdf_new_array();
 251   while (*pp < endptr && **pp != '}') {
 252     skip_white(pp, endptr);
 253     if (*pp >= endptr)
 254       break;
 255
 256     if (**pp == '#') {
 257       while (*pp < endptr) {
 258         if (**pp == '\r' || **pp == '\n') {
 259           (*pp)++;
 260           break;
 261         }
 262         (*pp)++;
 263       }
 264       continue;
 265     } else if (**pp == ';') {
 266       (*pp)++;
 267       continue;
 268     }
 269
 270     skip_white(pp, endptr);
 271     token = parse_c_ident(pp, endptr);
 272     if (!token)
 273       break;
 274
 275     if (!strcmp(token, "assign") || !strcmp(token, "substitute")) {
 276       char *tmp, *first, *second, *suffix;
 277
 278       skip_white(pp, endptr);
 279
 280       first = parse_c_ident(pp, endptr);
 281       if (!first)
 282         ERROR("Syntax error (1)");
 283
 284       skip_white(pp, endptr);
 285       tmp = parse_c_ident(pp, endptr);
 286       if (strcmp(tmp, "by") && strcmp(tmp, "to"))
 287         ERROR("Syntax error (2): %s", *pp);
 288
 289       skip_white(pp, endptr);
 290       second = parse_c_ident(pp, endptr); /* allows @ */
 291       if (!second)
 292         ERROR("Syntax error (3)");
 293
 294       /* (assign|substitute) tag dst src */
 295       pdf_add_array(substrule, pdf_new_name(token));
 296       if (*pp + 1 < endptr && **pp == '.') {
 297         (*pp)++;
 298         suffix = parse_c_ident(pp, endptr);
 299       } else {
 300         suffix = NULL;
 301       }
 302       add_rule(substrule, gclass, first, second, suffix);
 303
 304       RELEASE(first);
 305       RELEASE(tmp);
 306       RELEASE(second);
 307       if (suffix)
 308         RELEASE(suffix);
 309     } else {
 310       ERROR("Unkown command %s.", token);
 311     }
 312     RELEASE(token);
 313     skip_white(pp, endptr);
 314   }
 315
 316   if (*pp < endptr && **pp == '}')
 317     (*pp)++;
 318   return substrule;
 319 }
 320
 321 static pdf_obj *
 322 parse_block (pdf_obj *gclass, const char **pp, const char *endptr)
 323 {
 324   pdf_obj *rule;
 325   char    *token, *tmp;
 326
 327   skip_white(pp, endptr);
 328   if (*pp < endptr && **pp == '{')
 329     (*pp)++;
 330
 331   skip_white(pp, endptr);
 332   if (*pp >= endptr)
 333     return NULL;
 334
 335   rule   = pdf_new_dict();
 336   while (*pp < endptr && **pp != '}') {
 337     skip_white(pp, endptr);
 338     if (*pp >= endptr)
 339       break;
 340     if (**pp == '#') {
 341       while (*pp < endptr) {
 342         if (**pp == '\r' || **pp == '\n') {
 343           (*pp)++;
 344           break;
 345         }
 346         (*pp)++;
 347       }
 348       continue;
 349     } else if (**pp == ';') {
 350       (*pp)++;
 351       continue;
 352     }
 353
 354     skip_white(pp, endptr);
 355     token = parse_c_ident(pp, endptr);
 356     if (!token)
 357       break;
 358
 359     if (!strcmp(token, "script") ||
 360         !strcmp(token, "language")) {
 361       int  i, len;
 362
 363       skip_white(pp, endptr);
 364       len = 0;
 365       while (*pp + len < endptr && *(*pp + len) != ';') {
 366         len++;
 367       }
 368       if (len > 0) {
 369         tmp = NEW(len+1, char);
 370         memset(tmp, 0, len+1);
 371         for (i = 0; i < len; i++) {
 372           if (!isspace(**pp))
 373             tmp[i] = **pp;
 374           (*pp)++;
 375         }
 376         pdf_add_dict(rule,
 377                      pdf_new_name(token),
 378                      pdf_new_string(tmp, strlen(tmp)));
 379
 380         if (verbose > VERBOSE_LEVEL_MIN) {
 381           MESG("otl_conf>> Current %s set to \"%s\"\n", token, tmp);
 382         }
 383
 384         RELEASE(tmp);
 385       }
 386     } else if (!strcmp(token, "option")) {
 387       pdf_obj *opt_dict, *opt_rule;
 388
 389       opt_dict = pdf_lookup_dict(rule, "option");
 390       if (!opt_dict) {
 391         opt_dict = pdf_new_dict();
 392         pdf_add_dict(rule,
 393                      pdf_new_name("option"), opt_dict);
 394       }
 395
 396       skip_white(pp, endptr);
 397       tmp = parse_c_ident(pp, endptr);
 398
 399       if (verbose > VERBOSE_LEVEL_MIN) {
 400         MESG("otl_conf>> Reading option \"%s\"\n", tmp);
 401       }
 402
 403       skip_white(pp, endptr);
 404       opt_rule = parse_block(gclass, pp, endptr);
 405       pdf_add_dict(opt_dict, pdf_new_name(tmp), opt_rule);
 406
 407       RELEASE(tmp);
 408     } else if (!strcmp(token, "prefered") ||
 409                !strcmp(token, "required") ||
 410                !strcmp(token, "optional")) {
 411       pdf_obj *subst, *rule_block;
 412
 413       if (verbose > VERBOSE_LEVEL_MIN) {
 414         MESG("otl_conf>> Reading block (%s)\n", token);
 415       }
 416
 417       skip_white(pp, endptr);
 418       if (*pp >= endptr || **pp != '{')
 419         ERROR("Syntax error (1)");
 420
 421       rule_block = parse_substrule(gclass, pp, endptr);
 422       subst = pdf_lookup_dict(rule, "rule");
 423       if (!subst) {
 424         subst = pdf_new_array();
 425         pdf_add_dict(rule, pdf_new_name("rule"), subst);
 426       }
 427       pdf_add_array(subst, pdf_new_number(token[0]));
 428       pdf_add_array(subst, rule_block);
 429     } else if (token[0] == '@') {
 430       pdf_obj *coverage;
 431
 432       skip_white(pp, endptr);
 433       (*pp)++; /* = */
 434       skip_white(pp, endptr);
 435
 436       if (verbose > VERBOSE_LEVEL_MIN) {
 437         MESG("otl_conf>> Glyph class \"%s\"\n", token);
 438       }
 439
 440       coverage = parse_uc_coverage(gclass, pp, endptr);
 441       if (!coverage)
 442         ERROR("No valid Unicode characters...");
 443
 444       pdf_add_dict(gclass,
 445                    pdf_new_name(&token[1]), coverage);
 446     }
 447     RELEASE(token);
 448     skip_white(pp, endptr);
 449   }
 450
 451   if (*pp < endptr && **pp == '}')
 452     (*pp)++;
 453   return rule;
 454 }
 455
 456
 457 static pdf_obj *
 458 otl_read_conf (const char *conf_name)
 459 {
 460   pdf_obj *rule;
 461   pdf_obj *gclass;
 462   FILE    *fp;
 463   char    *filename, *wbuf, *p, *endptr;
 464   const char *pp;
 465   long     size, len;
 466
 467   filename = NEW(strlen(conf_name)+strlen(".otl")+1, char);
 468   strcpy(filename, conf_name);
 469   strcat(filename, ".otl");
 470
 471   fp = DPXFOPEN(filename, DPX_RES_TYPE_TEXT);
 472   if (!fp) {
 473     RELEASE(filename);
 474     return NULL;
 475   }
 476
 477   size = file_size(fp);
 478   rewind(fp);
 479
 480   if (verbose > VERBOSE_LEVEL_MIN) {
 481     MESG("\n");
 482     MESG("otl_conf>> Layout config. \"%s\" found: file=\"%s\" (%ld bytes)\n",
 483          conf_name, filename, size);
 484   }
 485   RELEASE(filename);
 486   if (size < 1)
 487     return NULL;
 488
 489   wbuf = NEW(size, char);
 490   p = wbuf; endptr = p + size;
 491   while (size > 0 && p < endptr) {
 492     len = fread(p, sizeof(char), size, fp);
 493     p    += len;
 494     size -= len;
 495   }
 496
 497   pp     = wbuf;
 498   gclass = pdf_new_dict();
 499   rule   = parse_block(gclass, &pp, endptr);
 500   pdf_release_obj(gclass);
 501
 502   RELEASE(wbuf);
 503
 504   return rule;
 505 }
 506
 507 static pdf_obj *otl_confs = NULL;
 508
 509 pdf_obj *
 510 otl_find_conf (const char *conf_name)
 511 {
 512   pdf_obj *rule;
 513   pdf_obj *script, *language;
 514   pdf_obj *options;
 515
 516   return  NULL;
 517
 518   if (otl_confs)
 519     rule = pdf_lookup_dict(otl_confs, conf_name);
 520   else {
 521     otl_confs = pdf_new_dict();
 522     rule = NULL;
 523   }
 524
 525   if (!rule) {
 526     rule = otl_read_conf(conf_name);
 527     if (rule) {
 528       pdf_add_dict(otl_confs,
 529                    pdf_new_name(conf_name), rule);
 530       script   = pdf_lookup_dict(rule, "script");
 531       language = pdf_lookup_dict(rule, "language");
 532       options  = pdf_lookup_dict(rule, "option");
 533       if (!script) {
 534         script = pdf_new_string("*", 1);
 535         pdf_add_dict(rule,
 536                      pdf_new_name("script"),
 537                      script);
 538         WARN("Script unspecified in \"%s\"...", conf_name);
 539       }
 540       if (!language) {
 541         language = pdf_new_string("dflt", 4);
 542         pdf_add_dict(rule,
 543                      pdf_new_name("language"),
 544                      language);
 545         WARN("Language unspecified in \"%s\"...", conf_name);
 546       }
 547
 548       if (options) {
 549         pdf_obj *optkeys, *opt, *key;
 550         long     i, num_opts;
 551
 552         optkeys  = pdf_dict_keys(options);
 553         num_opts = pdf_array_length(optkeys);
 554         for (i = 0; i < num_opts; i++) {
 555           key = pdf_get_array(optkeys, i);
 556           opt = pdf_lookup_dict(options, pdf_name_value(key));
 557           if (!pdf_lookup_dict(opt, "script"))
 558             pdf_add_dict(opt,
 559                          pdf_new_name("script"),
 560                          pdf_link_obj(script));
 561           if (!pdf_lookup_dict(opt, "language"))
 562             pdf_add_dict(opt,
 563                          pdf_new_name("language"),
 564                          pdf_link_obj(language));
 565         }
 566         pdf_release_obj(optkeys);
 567       }
 568
 569     }
 570   }
 571
 572   return rule;
 573 }
 574
 575
 576 char *
 577 otl_conf_get_script (pdf_obj *conf)
 578 {
 579   pdf_obj *script;
 580
 581   ASSERT(conf);
 582
 583   script = pdf_lookup_dict(conf, "script");
 584
 585   return pdf_string_value(script);
 586 }
 587
 588 char *
 589 otl_conf_get_language (pdf_obj *conf)
 590 {
 591   pdf_obj *language;
 592
 593   ASSERT(conf);
 594
 595   language = pdf_lookup_dict(conf, "language");
 596
 597   return pdf_string_value(language);
 598 }
 599
 600 pdf_obj *
 601 otl_conf_get_rule (pdf_obj *conf)
 602 {
 603   ASSERT(conf);
 604   return pdf_lookup_dict(conf, "rule");
 605 }
 606
 607 pdf_obj *
 608 otl_conf_find_opt (pdf_obj *conf, const char *opt_tag)
 609 {
 610   pdf_obj *opt_conf = NULL;
 611   pdf_obj *options;
 612
 613   ASSERT(conf);
 614
 615   options = pdf_lookup_dict(conf, "option");
 616   if (options && opt_tag)
 617     opt_conf = pdf_lookup_dict(options, opt_tag);
 618   else
 619     opt_conf = NULL;
 620
 621   return opt_conf;
 622 }
 623
 624 void
 625 otl_init_conf (void)
 626 {
 627   if (otl_confs)
 628     pdf_release_obj(otl_confs);
 629   otl_confs = pdf_new_dict();
 630
 631   if (verbose > VERBOSE_LEVEL_MIN + 10) {
 632     pdf_release_obj(pdf_ref_obj(otl_confs));
 633   }
 634 }
 635
 636 void
 637 otl_close_conf (void)
 638 {
 639   pdf_release_obj(otl_confs);
 640   otl_confs = NULL;
 641 }