3 This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
5 Copyright (C) 2007-2012 by Jin-Hwan Cho and Shunsaku Hirata,
6 the dvipdfmx project team.
8 Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
36 #include "pdfximage.h"
46 #define ENABLE_HTML_IMG_SUPPORT 1
47 #define ENABLE_HTML_SVG_TRANSFORM 1
48 #define ENABLE_HTML_SVG_OPACITY 1
51 * Please rewrite this or remove html special support
54 #define ANCHOR_TYPE_HREF 0
55 #define ANCHOR_TYPE_NAME 1
68 static struct spc_html_ _html_state = {
74 #ifdef ENABLE_HTML_SVG_TRANSFORM
75 static int cvt_a_to_tmatrix (pdf_tmatrix *M, const char *ptr, const char **nextptr);
76 #endif /* ENABLE_HTML_SVG_TRANSFORM */
81 char *_p = (char *) (s); \
82 while (*(_p) != 0) { \
83 if (*(_p) >= 'A' && *(_p) <= 'Z') { \
84 *(_p) = (*(_p) - 'A') + 'a'; \
91 parse_key_val (const char **pp, const char *endptr, char **kp, char **vp)
97 for (p = *pp ; p < endptr && isspace(*p); p++);
99 while (!error && p < endptr &&
100 ((*p >= 'a' && *p <= 'z') ||
101 (*p >= 'A' && *p <= 'Z'))
107 ((*p >= 'a' && *p <= 'z') ||
108 (*p >= 'A' && *p <= 'Z') ||
109 (*p >= '0' && *p <= '9') ||
110 *p == '-' || *p == ':'
120 k = NEW(n + 1, char);
121 memcpy(k, q, n); k[n] = '\0';
122 if (p + 2 >= endptr || p[0] != '=' || (p[1] != '\"' && p[1] != '\'')) {
123 RELEASE(k); k = NULL;
128 p += 2; /* skip '="' */
129 for (q = p, n = 0; p < endptr && *p != qchr; p++, n++);
130 if (p == endptr || *p != qchr)
133 v = NEW(n + 1, char);
134 memcpy(v, q, n); v[n] = '\0';
136 pdf_add_dict(t->attr,
138 pdf_new_string(v, n));
147 for ( ; p < endptr && isspace(*p); p++);
151 *kp = k; *vp = v; *pp = p;
155 #define HTML_TAG_NAME_MAX 127
156 #define HTML_TAG_TYPE_EMPTY 1
157 #define HTML_TAG_TYPE_OPEN 1
158 #define HTML_TAG_TYPE_CLOSE 2
161 read_html_tag (char *name, pdf_obj *attr, int *type, const char **pp, const char *endptr)
164 int n = 0, error = 0;
166 for ( ; p < endptr && isspace(*p); p++);
167 if (p >= endptr || *p != '<')
170 *type = HTML_TAG_TYPE_OPEN;
171 for (++p; p < endptr && isspace(*p); p++);
172 if (p < endptr && *p == '/') {
173 *type = HTML_TAG_TYPE_CLOSE;
174 for (++p; p < endptr && isspace(*p); p++);
177 #define ISDELIM(c) ((c) == '>' || (c) == '/' || isspace(c))
178 for (n = 0; p < endptr && n < HTML_TAG_NAME_MAX && !ISDELIM(*p); n++, p++) {
182 if (n == 0 || p == endptr || !ISDELIM(*p)) {
187 for ( ; p < endptr && isspace(*p); p++);
188 while (p < endptr && !error && *p != '/' && *p != '>') {
189 char *kp = NULL, *vp = NULL;
190 error = parse_key_val(&p, endptr, &kp, &vp);
195 pdf_new_string(vp, strlen(vp) + 1)); /* include trailing NULL here!!! */
199 for ( ; p < endptr && isspace(*p); p++);
206 if (p < endptr && *p == '/') {
207 *type = HTML_TAG_TYPE_EMPTY;
208 for (++p; p < endptr && isspace(*p); p++);
210 if (p == endptr || *p != '>') {
223 spc_handler_html__init (struct spc_env *spe, struct spc_arg *ap, void *dp)
225 struct spc_html_ *sd = dp;
227 sd->link_dict = NULL;
229 sd->pending_type = -1;
235 spc_handler_html__clean (struct spc_env *spe, struct spc_arg *ap, void *dp)
237 struct spc_html_ *sd = dp;
240 RELEASE(sd->baseurl);
242 if (sd->pending_type >= 0 || sd->link_dict)
243 spc_warn(spe, "Unclosed html anchor found.");
246 pdf_release_obj(sd->link_dict);
248 sd->pending_type = -1;
250 sd->link_dict = NULL;
257 spc_handler_html__bophook (struct spc_env *spe, struct spc_arg *ap, void *dp)
259 struct spc_html_ *sd = dp;
261 if (sd->pending_type >= 0) {
262 spc_warn(spe, "...html anchor continues from previous page processed...");
269 spc_handler_html__eophook (struct spc_env *spe, struct spc_arg *ap, void *dp)
271 struct spc_html_ *sd = dp;
273 if (sd->pending_type >= 0) {
274 spc_warn(spe, "Unclosed html anchor at end-of-page!");
282 fqurl (const char *baseurl, const char *name)
289 len += strlen(baseurl) + 1; /* we may want to add '/' */
291 q = NEW(len + 1, char);
293 if (baseurl && baseurl[0]) {
296 p = q + strlen(q) - 1;
299 if (name[0] && name[0] != '/')
308 html_open_link (struct spc_env *spe, const char *name, struct spc_html_ *sd)
314 ASSERT( sd->link_dict == NULL ); /* Should be checked somewhere else */
316 sd->link_dict = pdf_new_dict();
317 pdf_add_dict(sd->link_dict,
318 pdf_new_name("Type"), pdf_new_name ("Annot"));
319 pdf_add_dict(sd->link_dict,
320 pdf_new_name("Subtype"), pdf_new_name ("Link"));
322 color = pdf_new_array ();
323 pdf_add_array(color, pdf_new_number(0.0));
324 pdf_add_array(color, pdf_new_number(0.0));
325 pdf_add_array(color, pdf_new_number(1.0));
326 pdf_add_dict(sd->link_dict, pdf_new_name("C"), color);
328 url = fqurl(sd->baseurl, name);
330 /* url++; causes memory leak in RELEASE(url) */
331 pdf_add_dict(sd->link_dict,
332 pdf_new_name("Dest"),
333 pdf_new_string(url+1, strlen(url+1)));
334 } else { /* Assume this is URL */
335 pdf_obj *action = pdf_new_dict();
337 pdf_new_name("Type"),
338 pdf_new_name("Action"));
341 pdf_new_name("URI"));
344 pdf_new_string(url, strlen(url)));
345 pdf_add_dict(sd->link_dict,
347 pdf_link_obj(action));
348 pdf_release_obj(action);
352 spc_begin_annot(spe, sd->link_dict);
354 sd->pending_type = ANCHOR_TYPE_HREF;
360 html_open_dest (struct spc_env *spe, const char *name, struct spc_html_ *sd)
363 pdf_obj *array, *page_ref;
366 cp.x = spe->x_user; cp.y = spe->y_user;
367 pdf_dev_transform(&cp, NULL);
369 page_ref = pdf_doc_this_page_ref();
370 ASSERT( page_ref ); /* Otherwise must be bug */
372 array = pdf_new_array();
373 pdf_add_array(array, page_ref);
374 pdf_add_array(array, pdf_new_name("XYZ"));
375 pdf_add_array(array, pdf_new_null());
376 pdf_add_array(array, pdf_new_number(cp.y + 24.0));
377 pdf_add_array(array, pdf_new_null());
379 error = pdf_doc_add_names("Dests",
384 spc_warn(spe, "Failed to add named destination: %s", name);
386 sd->pending_type = ANCHOR_TYPE_NAME;
391 #define ANCHOR_STARTED(s) ((s)->pending_type >= 0 || (s)->link_dict)
394 spc_html__anchor_open (struct spc_env *spe, pdf_obj *attr, struct spc_html_ *sd)
396 pdf_obj *href, *name;
399 if (ANCHOR_STARTED(sd)) {
400 spc_warn(spe, "Nested html anchors found!");
404 href = pdf_lookup_dict(attr, "href");
405 name = pdf_lookup_dict(attr, "name");
407 spc_warn(spe, "Sorry, you can't have both \"href\" and \"name\" in anchor tag...");
410 error = html_open_link(spe, pdf_string_value(href), sd);
411 } else if (name) { /* name */
412 error = html_open_dest(spe, pdf_string_value(name), sd);
414 spc_warn(spe, "You should have \"href\" or \"name\" in anchor tag!");
422 spc_html__anchor_close (struct spc_env *spe, pdf_obj *attr, struct spc_html_ *sd)
426 switch (sd->pending_type) {
427 case ANCHOR_TYPE_HREF:
430 pdf_release_obj(sd->link_dict);
431 sd->link_dict = NULL;
432 sd->pending_type = -1;
434 spc_warn(spe, "Closing html anchor (link) without starting!");
438 case ANCHOR_TYPE_NAME:
439 sd->pending_type = -1;
442 spc_warn(spe, "No corresponding opening tag for html anchor.");
451 spc_html__base_empty (struct spc_env *spe, pdf_obj *attr, struct spc_html_ *sd)
456 href = pdf_lookup_dict(attr, "href");
458 spc_warn(spe, "\"href\" not found for \"base\" tag!");
462 vp = (char *) pdf_string_value(href);
464 spc_warn(spe, "\"baseurl\" changed: \"%s\" --> \"%s\"", sd->baseurl, vp);
465 RELEASE(sd->baseurl);
467 sd->baseurl = NEW(strlen(vp) + 1, char);
468 strcpy(sd->baseurl, vp);
474 #ifdef ENABLE_HTML_IMG_SUPPORT
475 /* This isn't completed.
476 * Please think about placement of images.
479 atopt (const char *a)
484 const char *_ukeys[] = {
490 "pt", "in", "cm", "mm", "bp",
497 q = parse_float_decimal(&p, p + strlen(p));
499 WARN("Invalid length value: %s (%c)", a, *p);
506 q = parse_c_ident(&p, p + strlen(p));
508 for (k = 0; _ukeys[k] && strcmp(_ukeys[k], q); k++);
510 case K_UNIT__PT: u *= 72.0 / 72.27; break;
511 case K_UNIT__IN: u *= 72.0; break;
512 case K_UNIT__CM: u *= 72.0 / 2.54 ; break;
513 case K_UNIT__MM: u *= 72.0 / 25.4 ; break;
514 case K_UNIT__BP: u *= 1.0 ; break;
515 case K_UNIT__PX: u *= 1.0 ; break; /* 72dpi */
517 WARN("Unknown unit of measure: %s", q);
527 #ifdef ENABLE_HTML_SVG_OPACITY
528 /* Replicated from spc_tpic */
530 create_xgstate (double a /* alpha */, int f_ais /* alpha is shape */)
534 dict = pdf_new_dict();
536 pdf_new_name("Type"),
537 pdf_new_name("ExtGState"));
551 check_resourcestatus (const char *category, const char *resname)
553 pdf_obj *dict1, *dict2;
555 dict1 = pdf_doc_current_page_resources();
559 dict2 = pdf_lookup_dict(dict1, category);
561 pdf_obj_typeof(dict2) == PDF_DICT) {
562 if (pdf_lookup_dict(dict2, resname))
567 #endif /* ENABLE_HTML_SVG_OPACITY */
570 spc_html__img_empty (struct spc_env *spe, pdf_obj *attr, struct spc_html_ *sd)
575 #ifdef ENABLE_HTML_SVG_OPACITY
576 double alpha = 1.0; /* meaning fully opaque */
577 #endif /* ENABLE_HTML_SVG_OPACITY */
578 #ifdef ENABLE_HTML_SVG_TRANSFORM
581 pdf_setmatrix(&M, 1.0, 0.0, 0.0, 1.0, spe->x_user, spe->y_user);
582 #endif /* ENABLE_HTML_SVG_TRANSFORM */
584 spc_warn(spe, "html \"img\" tag found (not completed, plese don't use!).");
586 src = pdf_lookup_dict(attr, "src");
588 spc_warn(spe, "\"src\" attribute not found for \"img\" tag!");
592 transform_info_clear(&ti);
593 obj = pdf_lookup_dict(attr, "width");
595 ti.width = atopt(pdf_string_value(obj));
596 ti.flags |= INFO_HAS_WIDTH;
598 obj = pdf_lookup_dict(attr, "height");
600 ti.height = atopt(pdf_string_value(obj));
601 ti.flags |= INFO_HAS_HEIGHT;
604 #ifdef ENABLE_HTML_SVG_OPACITY
605 obj = pdf_lookup_dict(attr, "svg:opacity");
607 alpha = atof(pdf_string_value(obj));
608 if (alpha < 0.0 || alpha > 1.0) {
609 spc_warn(spe, "Invalid opacity value: %s", pdf_string_value(obj));
613 #endif /* ENABLE_HTML_SVG_OPCAITY */
615 #ifdef ENABLE_HTML_SVG_TRANSFORM
616 obj = pdf_lookup_dict(attr, "svg:transform");
618 const char *p = pdf_string_value(obj);
620 for ( ; *p && isspace(*p); p++);
621 while (*p && !error) {
622 pdf_setmatrix(&N, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0);
623 error = cvt_a_to_tmatrix(&N, p, &p);
626 pdf_concatmatrix(&M, &N);
627 for ( ; *p && isspace(*p); p++);
629 for (++p; *p && isspace(*p); p++);
633 #endif /* ENABLE_HTML_SVG_TRANSFORM */
636 spc_warn(spe, "Error in html \"img\" tag attribute.");
640 id = pdf_ximage_findresource(pdf_string_value(src), 0, NULL);
642 spc_warn(spe, "Could not find/load image: %s", pdf_string_value(src));
645 #if defined(ENABLE_HTML_SVG_TRANSFORM) || defined(ENABLE_HTML_SVG_OPACITY)
654 #ifdef ENABLE_HTML_SVG_OPACITY
657 int a = round(100.0 * alpha);
659 res_name = NEW(strlen("_Tps_a100_") + 1, char);
660 sprintf(res_name, "_Tps_a%03d_", a); /* Not Tps prefix but... */
661 if (!check_resourcestatus("ExtGState", res_name)) {
662 dict = create_xgstate(round_at(0.01 * a, 0.01), 0);
663 pdf_doc_add_page_resource("ExtGState",
664 res_name, pdf_ref_obj(dict));
665 pdf_release_obj(dict);
667 pdf_doc_add_page_content(" /", 2); /* op: */
668 pdf_doc_add_page_content(res_name, strlen(res_name)); /* op: */
669 pdf_doc_add_page_content(" gs", 3); /* op: gs */
673 #endif /* ENABLE_HTML_SVG_OPACITY */
675 pdf_ximage_scale_image(id, &M1, &r, &ti);
676 pdf_concatmatrix(&M, &M1);
679 pdf_dev_rectclip(r.llx, r.lly, r.urx - r.llx, r.ury - r.lly);
681 res_name = pdf_ximage_get_resname(id);
682 pdf_doc_add_page_content(" /", 2); /* op: */
683 pdf_doc_add_page_content(res_name, strlen(res_name)); /* op: */
684 pdf_doc_add_page_content(" Do", 3); /* op: Do */
688 pdf_doc_add_page_resource("XObject",
690 pdf_ximage_get_reference(id));
693 pdf_dev_put_image(id, &ti, spe->x_user, spe->y_user);
694 #endif /* ENABLE_HTML_SVG_XXX */
701 spc_html__img_empty (struct spc_env *spe, pdf_obj *attr, struct spc_html_ *sd)
703 spc_warn(spe, "IMG tag not yet supported yet...");
706 #endif /* ENABLE_HTML_IMG_SUPPORT */
710 spc_handler_html_default (struct spc_env *spe, struct spc_arg *ap)
712 struct spc_html_ *sd = &_html_state;
713 char name[HTML_TAG_NAME_MAX + 1];
715 int error = 0, type = HTML_TAG_TYPE_OPEN;
717 if (ap->curptr >= ap->endptr)
720 attr = pdf_new_dict();
721 error = read_html_tag(name, attr, &type, &ap->curptr, ap->endptr);
723 pdf_release_obj(attr);
726 if (!strcmp(name, "a")) {
728 case HTML_TAG_TYPE_OPEN:
729 error = spc_html__anchor_open (spe, attr, sd);
731 case HTML_TAG_TYPE_CLOSE:
732 error = spc_html__anchor_close(spe, attr, sd);
735 spc_warn(spe, "Empty html anchor tag???");
739 } else if (!strcmp(name, "base")) {
740 if (type == HTML_TAG_TYPE_CLOSE) {
741 spc_warn(spe, "Close tag for \"base\"???");
743 } else { /* treat "open" same as "empty" */
744 error = spc_html__base_empty(spe, attr, sd);
746 } else if (!strcmp(name, "img")) {
747 if (type == HTML_TAG_TYPE_CLOSE) {
748 spc_warn(spe, "Close tag for \"img\"???");
750 } else { /* treat "open" same as "empty" */
751 error = spc_html__img_empty(spe, attr, sd);
754 pdf_release_obj(attr);
756 for ( ; ap->curptr < ap->endptr && isspace(ap->curptr[0]); ap->curptr++);
762 #ifdef ENABLE_HTML_SVG_TRANSFORM
763 /* translate wsp* '(' wsp* number (comma-wsp number)? wsp* ')' */
765 cvt_a_to_tmatrix (pdf_tmatrix *M, const char *ptr, const char **nextptr)
771 static const char *_tkeys[] = {
772 #define K_TRNS__MATRIX 0
773 "matrix", /* a b c d e f */
774 #define K_TRNS__TRANSLATE 1
775 "translate", /* tx [ty] : dflt. tf = 0 */
776 #define K_TRNS__SCALE 2
777 "scale", /* sx [sy] : dflt. sy = sx */
778 #define K_TRNS__ROTATE 3
779 "rotate", /* ang [cx cy] : dflt. cx, cy = 0 */
780 #define K_TRNS__SKEWX 4
781 #define K_TRNS__SKEWY 5
788 for ( ; *p && isspace(*p); p++);
790 q = parse_c_ident(&p, p + strlen(p));
793 /* parsed transformation key */
794 for (k = 0; _tkeys[k] && strcmp(q, _tkeys[k]); k++);
798 for ( ; *p && isspace(*p); p++);
799 if (*p != '(' || *(p + 1) == 0)
801 for (++p; *p && isspace(*p); p++);
802 for (n = 0; n < 6 && *p && *p != ')'; n++) {
803 q = parse_float_decimal(&p, p + strlen(p));
810 for ( ; *p && isspace(*p); p++);
812 for (++p; *p && isspace(*p); p++);
824 M->a = v[0]; M->c = v[1];
825 M->b = v[2]; M->d = v[3];
826 M->e = v[4]; M->f = v[5];
828 case K_TRNS__TRANSLATE:
829 if (n != 1 && n != 2)
833 M->e = v[0]; M->f = (n == 2) ? v[1] : 0.0;
836 if (n != 1 && n != 2)
838 M->a = v[0]; M->d = (n == 2) ? v[1] : v[0];
843 if (n != 1 && n != 3)
845 M->a = cos(v[0] * M_PI / 180.0);
846 M->c = sin(v[0] * M_PI / 180.0);
847 M->b = -M->c; M->d = M->a;
848 M->e = (n == 3) ? v[1] : 0.0;
849 M->f = (n == 3) ? v[2] : 0.0;
856 M->b = tan(v[0] * M_PI / 180.0);
862 M->c = tan(v[0] * M_PI / 180.0);
871 #endif /* ENABLE_HTML_SVG_TRANSFORM */
874 spc_html_at_begin_document (void)
876 struct spc_html_ *sd = &_html_state;
877 return spc_handler_html__init(NULL, NULL, sd);
881 spc_html_at_begin_page (void)
883 struct spc_html_ *sd = &_html_state;
884 return spc_handler_html__bophook(NULL, NULL, sd);
888 spc_html_at_end_page (void)
890 struct spc_html_ *sd = &_html_state;
891 return spc_handler_html__eophook(NULL, NULL, sd);
895 spc_html_at_end_document (void)
897 struct spc_html_ *sd = &_html_state;
898 return spc_handler_html__clean(NULL, NULL, sd);
903 spc_html_check_special (const char *buffer, long size)
905 const char *p, *endptr;
910 for ( ; p < endptr && isspace(*p); p++);
911 size = (long) (endptr - p);
912 if (size >= strlen("html:") &&
913 !memcmp(p, "html:", strlen("html:"))) {
922 spc_html_setup_handler (struct spc_handler *sph,
923 struct spc_env *spe, struct spc_arg *ap)
925 ASSERT(sph && spe && ap);
927 for ( ; ap->curptr < ap->endptr && isspace(ap->curptr[0]); ap->curptr++);
928 if (ap->curptr + strlen("html:") > ap->endptr ||
929 memcmp(ap->curptr, "html:", strlen("html:"))) {
936 sph->exec = &spc_handler_html_default;
938 ap->curptr += strlen("html:");
939 for ( ; ap->curptr < ap->endptr && isspace(ap->curptr[0]); ap->curptr++);