From 321403f0246a633f815799578d000fe24e5c3d5e Mon Sep 17 00:00:00 2001 From: Krzysztof Kowalczyk Date: Wed, 20 Nov 2024 21:08:47 +0100 Subject: [PATCH] update mupdf to 1.25-rc2 --- mupdf/CHANGES | 1 + mupdf/Makefile | 13 +- mupdf/docs/src/changes.rst | 29 +- mupdf/docs/src/language-bindings.rst | 6 +- mupdf/include/mupdf/fitz/outline.h | 8 + mupdf/include/mupdf/fitz/structured-text.h | 4 +- mupdf/platform/gl/gl-main.c | 2 +- mupdf/scripts/wrap/cpp.py | 1 + mupdf/source/fitz/directory.c | 8 +- mupdf/source/fitz/draw-rasterize.c | 1 - mupdf/source/fitz/error.c | 2 +- mupdf/source/fitz/printf.c | 26 +- mupdf/source/fitz/stext-device.c | 118 +++++-- mupdf/source/fitz/stext-output.c | 15 +- mupdf/source/fitz/subset-cff.c | 339 ++++++++++++++++----- mupdf/source/fitz/subset-ttf.c | 166 ++++++---- mupdf/source/fitz/time.c | 2 - mupdf/source/fitz/xml.c | 8 + mupdf/source/pdf/pdf-cmap.c | 19 -- mupdf/source/pdf/pdf-outline.c | 2 +- mupdf/source/pdf/pdf-repair.c | 2 +- mupdf/source/pdf/pdf-subset.c | 111 +++---- mupdf/source/pdf/pdf-xref.c | 8 +- mupdf/source/tools/mudraw.c | 32 +- mupdf/source/tools/muraster.c | 2 +- mupdf/source/tools/murun.c | 27 +- mupdf/source/tools/pdfposter.c | 4 +- 27 files changed, 679 insertions(+), 277 deletions(-) diff --git a/mupdf/CHANGES b/mupdf/CHANGES index 6f828f2a0872..5e809390b64a 100644 --- a/mupdf/CHANGES +++ b/mupdf/CHANGES @@ -45,6 +45,7 @@ List of changes in MuPDF 1.25 - fz_text_item now has an explicit pen "advance" member. - mutool recolor can now be used as a function. - pdf_annot_rect works with the "design" rectangle. + - structured text "color" field renamed to "argb" and now includes alpha channel Important bug fixes: - More robust font subsetting. diff --git a/mupdf/Makefile b/mupdf/Makefile index 62afcc8cb7e5..f1b1ea3b8b87 100755 --- a/mupdf/Makefile +++ b/mupdf/Makefile @@ -503,19 +503,26 @@ install-docs: install: install-libs install-apps install-docs -install-docs-html: +docs: python3 scripts/build-docs.py + +install-docs-html: docs install -d $(DESTDIR)$(docdir) install -d $(DESTDIR)$(docdir)/_images install -d $(DESTDIR)$(docdir)/_static + install -d $(DESTDIR)$(docdir)/_static/styles + install -d $(DESTDIR)$(docdir)/_static/scripts install -m 644 build/docs/html/*.html $(DESTDIR)$(docdir) install -m 644 build/docs/html/*.inv $(DESTDIR)$(docdir) install -m 644 build/docs/html/*.js $(DESTDIR)$(docdir) install -m 644 build/docs/html/_images/* $(DESTDIR)$(docdir)/_images - install -m 644 build/docs/html/_static/*.css $(DESTDIR)$(docdir)/_static install -m 644 build/docs/html/_static/*.ico $(DESTDIR)$(docdir)/_static install -m 644 build/docs/html/_static/*.js $(DESTDIR)$(docdir)/_static install -m 644 build/docs/html/_static/*.png $(DESTDIR)$(docdir)/_static + install -m 644 build/docs/html/_static/*.css $(DESTDIR)$(docdir)/_static + install -m 644 build/docs/html/_static/scripts/*.js $(DESTDIR)$(docdir)/_static/scripts + install -m 644 build/docs/html/_static/scripts/*.map $(DESTDIR)$(docdir)/_static/scripts + install -m 644 build/docs/html/_static/styles/*.css $(DESTDIR)$(docdir)/_static/styles tarball: bash scripts/archive.sh @@ -668,7 +675,7 @@ install-% c++-% python-% csharp-%: endif -.PHONY: all clean nuke install third libs apps generate tags +.PHONY: all clean nuke install third libs apps generate tags docs .PHONY: shared shared-debug shared-clean .PHONY: c++-% python-% csharp-% .PHONY: c++-clean python-clean csharp-clean diff --git a/mupdf/docs/src/changes.rst b/mupdf/docs/src/changes.rst index 117702880573..6d9fa68428d6 100644 --- a/mupdf/docs/src/changes.rst +++ b/mupdf/docs/src/changes.rst @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2023 Artifex Software, Inc. +.. Copyright (C) 2001-2024 Artifex Software, Inc. .. All Rights Reserved. @@ -34,6 +34,33 @@ To minimise the impact of such changes, we undertake to list the :title:`API` ch The changes listed below only affects *existing* :title:`APIs`. +Changes from 1.24 to 1.25 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- New `wants_file` member for `fz_document_handler`. +- `fz_text_item` now has an explicit pen `advance` member. +- `pdf_annot_rect` works with the "design" rectangle. +- mutool recolor can now be used as a function. + + +Changes from 1.23 to 1.24 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- You must call `pdf_report_error` in the final `fz_catch`. Any unreported errors will be automatically reported when a new error is raised, or when closing the fitz context. +- The deprecated pdf_check_signature was removed. + + +Changes from 1.22 to 1.23 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- `pdf_field_name` renamed to `pdf_load_field_name`. +- mutool run changed many methods to match Java and new WASM library. + + +Changes from 1.21 to 1.22 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +None. + Changes from 1.20 to 1.21 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/mupdf/docs/src/language-bindings.rst b/mupdf/docs/src/language-bindings.rst index cde0a091bf77..b6bc9b4424a0 100644 --- a/mupdf/docs/src/language-bindings.rst +++ b/mupdf/docs/src/language-bindings.rst @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2023 Artifex Software, Inc. +.. Copyright (C) 2001-2024 Artifex Software, Inc. .. All Rights Reserved. .. include:: header.rst @@ -350,9 +350,9 @@ The Python and C# MuPDF APIs `mupdf::FzBuffer` instance and `truncated` is an integer. * Allows implementation of mutool in Python - see - `mupdf:scripts/mutool.py `_ + `mupdf:scripts/mutool.py `_ and - `mupdf:scripts/mutool_draw.py `_. + `mupdf:scripts/mutool_draw.py `_. * Provides text representation of simple 'POD' structs: diff --git a/mupdf/include/mupdf/fitz/outline.h b/mupdf/include/mupdf/fitz/outline.h index 737d30010c34..8e19d8cc5e8d 100644 --- a/mupdf/include/mupdf/fitz/outline.h +++ b/mupdf/include/mupdf/fitz/outline.h @@ -65,6 +65,10 @@ int fz_outline_iterator_down(fz_context *ctx, fz_outline_iterator *iter); After an insert, we do not change where we are pointing. The return code is the same as for next, it indicates the current iterator position. + + Note that for PDF documents at least, the is_open field is ignored. All childless + nodes are considered closed by PDF, hence (given every newly inserted node is + childless by definition) all new nodes are inserted with is_open == false. */ int fz_outline_iterator_insert(fz_context *ctx, fz_outline_iterator *iter, fz_outline_item *item); @@ -107,6 +111,10 @@ void fz_drop_outline_iterator(fz_context *ctx, fz_outline_iterator *iter); down: The outline items immediate children in the hierarchy. May be NULL if no children exist. + + is_open: If zero, the outline element is closed in the UI. If + 1, it should be open, showing any child elements. All other + values reserved. */ typedef struct fz_outline { diff --git a/mupdf/include/mupdf/fitz/structured-text.h b/mupdf/include/mupdf/fitz/structured-text.h index bb7ef96213b1..150d7086b87f 100644 --- a/mupdf/include/mupdf/fitz/structured-text.h +++ b/mupdf/include/mupdf/fitz/structured-text.h @@ -311,7 +311,7 @@ struct fz_stext_block struct { fz_stext_line *first_line, *last_line; } t; struct { fz_matrix transform; fz_image *image; } i; struct { fz_stext_struct *down; int index; } s; - struct { uint8_t stroked; uint8_t rgba[4]; } v; + struct { uint8_t stroked; uint32_t argb; } v; struct { fz_stext_grid_positions *xs; fz_stext_grid_positions *ys; } b; } u; fz_stext_block *prev, *next; @@ -338,7 +338,7 @@ struct fz_stext_char int c; /* unicode character value */ uint16_t bidi; /* even for LTR, odd for RTL - probably only needs 8 bits? */ uint16_t flags; - int color; /* sRGB hex color */ + uint32_t argb; /* sRGB hex color (alpha in top 8 bits, then r, then g, then b in low bits) */ fz_point origin; fz_quad quad; float size; diff --git a/mupdf/platform/gl/gl-main.c b/mupdf/platform/gl/gl-main.c index 654f86f16a25..2194ef2ca0ec 100644 --- a/mupdf/platform/gl/gl-main.c +++ b/mupdf/platform/gl/gl-main.c @@ -3275,7 +3275,7 @@ int main(int argc, char **argv) return 0; } -#ifdef _MSC_VER +#ifdef _WIN32 int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd) { int argc; diff --git a/mupdf/scripts/wrap/cpp.py b/mupdf/scripts/wrap/cpp.py index 647528949ff4..afb8baba3c62 100644 --- a/mupdf/scripts/wrap/cpp.py +++ b/mupdf/scripts/wrap/cpp.py @@ -5324,6 +5324,7 @@ def show_clang_diagnostic( diagnostic, depth=0): #include "mupdf/fitz/geometry.h" + #include #include #include #include diff --git a/mupdf/source/fitz/directory.c b/mupdf/source/fitz/directory.c index 1a82b56fbe36..92dc7920d58d 100644 --- a/mupdf/source/fitz/directory.c +++ b/mupdf/source/fitz/directory.c @@ -26,7 +26,7 @@ #include #include -#ifdef _MSC_VER +#ifdef _WIN32 #include #include #define stat _stat @@ -139,7 +139,7 @@ fz_archive * fz_open_directory(fz_context *ctx, const char *path) { fz_directory *dir; -#ifdef _MSC_VER +#ifdef _WIN32 WCHAR *wpath = NULL; size_t z = 3; HANDLE h = NULL; @@ -168,7 +168,7 @@ fz_open_directory(fz_context *ctx, const char *path) fz_try(ctx) { -#ifdef _MSC_VER +#ifdef _WIN32 char const *p = path; WCHAR *w; while (*p) @@ -254,7 +254,7 @@ fz_open_directory(fz_context *ctx, const char *path) } fz_always(ctx) { -#ifdef _MSC_VER +#ifdef _WIN32 fz_free(ctx, wpath); if (h) (void)FindClose(h); diff --git a/mupdf/source/fitz/draw-rasterize.c b/mupdf/source/fitz/draw-rasterize.c index b808f64224bb..597b6de9f607 100644 --- a/mupdf/source/fitz/draw-rasterize.c +++ b/mupdf/source/fitz/draw-rasterize.c @@ -148,7 +148,6 @@ fz_set_rasterizer_graphics_aa_level(fz_context *ctx, fz_aa_context *aa, int leve aa->bits = 0; } aa->scale = 0xFF00 / (aa->hscale * aa->vscale); - fz_set_rasterizer_text_aa_level(ctx, aa, level); #endif } diff --git a/mupdf/source/fitz/error.c b/mupdf/source/fitz/error.c index f5ee0b227db2..f01e49dbd93d 100644 --- a/mupdf/source/fitz/error.c +++ b/mupdf/source/fitz/error.c @@ -29,7 +29,7 @@ #include #include -#ifdef _MSC_VER +#ifdef _WIN32 #ifndef NDEBUG #define USE_OUTPUT_DEBUG_STRING #include diff --git a/mupdf/source/fitz/printf.c b/mupdf/source/fitz/printf.c index 1ce84f1e0f43..eadebf9478fa 100644 --- a/mupdf/source/fitz/printf.c +++ b/mupdf/source/fitz/printf.c @@ -253,7 +253,7 @@ static void fmtquote(struct fmtbuf *out, const char *s, int sq, int eq, int verb for (i = 0; i < n; ++i) fmtputc(out, s[i]); } - else + else if (c <= 0xffff) { fmtputc(out, '\\'); fmtputc(out, 'u'); @@ -262,6 +262,24 @@ static void fmtquote(struct fmtbuf *out, const char *s, int sq, int eq, int verb fmtputc(out, "0123456789ABCDEF"[(c>>4)&15]); fmtputc(out, "0123456789ABCDEF"[(c)&15]); } + else + { + /* Use a surrogate pair */ + int hi = 0xd800 + ((c - 0x10000) >> 10); + int lo = 0xdc00 + ((c - 0x10000) & 0x3ff); + fmtputc(out, '\\'); + fmtputc(out, 'u'); + fmtputc(out, "0123456789ABCDEF"[(hi>>12)&15]); + fmtputc(out, "0123456789ABCDEF"[(hi>>8)&15]); + fmtputc(out, "0123456789ABCDEF"[(hi>>4)&15]); + fmtputc(out, "0123456789ABCDEF"[(hi)&15]); + fmtputc(out, '\\'); + fmtputc(out, 'u'); + fmtputc(out, "0123456789ABCDEF"[(lo>>12)&15]); + fmtputc(out, "0123456789ABCDEF"[(lo>>8)&15]); + fmtputc(out, "0123456789ABCDEF"[(lo>>4)&15]); + fmtputc(out, "0123456789ABCDEF"[(lo)&15]); + } } else { if (c == sq || c == eq) fmtputc(out, '\\'); @@ -356,6 +374,12 @@ static void fmtquote_xml(struct fmtbuf *out, const char *s) if (c < 32 || c >= 127) { fmtputc(out, '&'); fmtputc(out, '#'); + fmtputc(out, 'x'); + if (c > 65535) + { + fmtputc(out, "0123456789ABCDEF"[(c>>20)&15]); + fmtputc(out, "0123456789ABCDEF"[(c>>16)&15]); + } if (c > 255) { fmtputc(out, "0123456789ABCDEF"[(c>>12)&15]); diff --git a/mupdf/source/fitz/stext-device.c b/mupdf/source/fitz/stext-device.c index 689e6a088a35..652185a79d47 100644 --- a/mupdf/source/fitz/stext-device.c +++ b/mupdf/source/fitz/stext-device.c @@ -345,7 +345,7 @@ add_char_to_line(fz_context *ctx, fz_stext_page *page, fz_stext_line *line, fz_m } ch->c = c; - ch->color = color; + ch->argb = color; ch->bidi = bidi; ch->origin = *p; ch->size = size; @@ -1060,14 +1060,15 @@ fz_stext_extract(fz_context *ctx, fz_stext_device *dev, fz_text_span *span, fz_m do_extract(ctx, dev, span, ctm, 0, span->len); } -static int hexrgb_from_color(fz_context *ctx, fz_colorspace *colorspace, const float *color) +static int hexrgba_from_color(fz_context *ctx, fz_colorspace *colorspace, const float *color, float alpha) { float rgb[3]; fz_convert_color(ctx, colorspace, color, fz_device_rgb(ctx), rgb, NULL, fz_default_color_params); return - (fz_clampi(rgb[0] * 255, 0, 255) << 16) | - (fz_clampi(rgb[1] * 255, 0, 255) << 8) | - (fz_clampi(rgb[2] * 255, 0, 255)); + (fz_clampi(alpha * 255 + 0.5f, 0, 255) << 24) | + (fz_clampi(rgb[0] * 255 + 0.5f, 0, 255) << 16) | + (fz_clampi(rgb[1] * 255 + 0.5f, 0, 255) << 8) | + (fz_clampi(rgb[2] * 255 + 0.5f, 0, 255)); } static void @@ -1078,7 +1079,7 @@ fz_stext_fill_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matr fz_text_span *span; if (text == tdev->lasttext) return; - tdev->color = hexrgb_from_color(ctx, colorspace, color); + tdev->color = hexrgba_from_color(ctx, colorspace, color, alpha); tdev->new_obj = 1; for (span = text->head; span; span = span->next) fz_stext_extract(ctx, tdev, span, ctm); @@ -1094,7 +1095,7 @@ fz_stext_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_text_span *span; if (text == tdev->lasttext) return; - tdev->color = hexrgb_from_color(ctx, colorspace, color); + tdev->color = hexrgba_from_color(ctx, colorspace, color, alpha); tdev->new_obj = 1; for (span = text->head; span; span = span->next) fz_stext_extract(ctx, tdev, span, ctm); @@ -1633,6 +1634,90 @@ line_crosses_rect(fz_point a, fz_point b, fz_rect r) return fz_is_point_inside_rect(a, r); } +static float +calculate_ascent(fz_point p, fz_point origin, fz_point dir) +{ + return fabsf((origin.x-p.x)*dir.y - (origin.y-p.y)*dir.x); +} + +/* Create us a rect from the given quad, but extend it downwards + * to allow for underlines that pass under the glyphs. */ +static fz_rect expanded_rect_from_quad(fz_quad quad, fz_point dir, fz_point origin, float size) +{ + /* Consider the two rects from A and g respectively. + * + * ul +------+ ur or + * | /\ | ul +------+ ur + * | /__\ | | /''\ | + * |/ \| |( || + * ll +------+ lr | ''''|| + * | ''' | <-expected underline level + * ll +------+ lr + * + * So an underline won't cross A's rect, but will cross g's. + * We want to make a rect that includes a suitable amount of + * space underneath. The information we have available to us + * is summed up here: + * + * ul +---------+ ur + * | | + * | origin | + * |+----------> dir + * | | + * ll +---------+ lr + * + * Consider the distance from ul to the line that passes through + * the origin with direction dir. Similarly, consider the distance + * from ur to the same line. This can be thought of as the 'ascent' + * of this character. + * + * We'd like the distance from ul to ll to be greater than this, so + * as to ensure we cover the possible location where an underline + * might reasonably go. + * + * If we have a line (l) through point A with direction vector u, + * the distance between point P and line(l) is: + * + * d(P,l) = || AP x u || / || u || + * + * where x is the cross product. + * + * For us, because || dir || = 1: + * + * d(ul, origin) = || (origin-ul) x dir || + * + * The cross product is only defined in 3 (or 7!) dimensions, so + * extend both vectors into 3d by defining a 0 z component. + * + * (origin-ul) x dir = [ (origin.y - ul.y) . 0 - 0 . dir.y ] + * [ 0 . dir.x - (origin.x - ul.y) . 0 ] + * [ (origin.x - ul.x) . dir.y - (origin.y - ul.y) . dir.x ] + * + * So d(ul, origin) = abs(D) where D = (origin.x-ul.x).dir.y - (origin.y-ul.y).dir.x + */ + float ascent = (calculate_ascent(quad.ul, origin, dir) + calculate_ascent(quad.ur, origin, dir)) / 2; + fz_point left = { quad.ll.x - quad.ul.x, quad.ll.y - quad.ul.y }; + fz_point right = { quad.lr.x - quad.ur.x, quad.lr.y - quad.ur.y }; + float height = (hypotf(left.x, left.y) + hypotf(right.x, right.y))/2; + int neg = 0; + + /* We'd like height to be at least ascent + 1/4 size */ + if (height < 0) + neg = 1, height = -height; + if (height < ascent + size * 0.25f) + height = ascent + size * 0.25f; + + height -= ascent; + if (neg) + height = -height; + quad.ll.x += - height * dir.y; + quad.ll.y += height * dir.x; + quad.lr.x += - height * dir.y; + quad.lr.y += height * dir.x; + + return fz_rect_from_quad(quad); +} + static void check_for_strikeout(fz_context *ctx, fz_stext_device *tdev, fz_stext_page *page, const fz_path *path, fz_matrix ctm) { @@ -1679,7 +1764,7 @@ check_for_strikeout(fz_context *ctx, fz_stext_device *tdev, fz_stext_page *page, fz_stext_char *ch; for (ch = line->first_char; ch; ch = ch->next) { - fz_rect ch_box = fz_rect_from_quad(ch->quad); + fz_rect ch_box = expanded_rect_from_quad(ch->quad, line->dir, ch->origin, ch->size); if (line_crosses_rect(from, to, ch_box)) { @@ -1711,30 +1796,15 @@ check_for_strikeout(fz_context *ctx, fz_stext_device *tdev, fz_stext_page *page, } } -static uint8_t -to255(float x) -{ - if (x <= 0) - return 0; - if (x >= 1) - return 255; - return (uint8_t)(x*255 + 0.5); -} - static void add_vector(fz_context *ctx, fz_stext_page *page, fz_rect bbox, int stroked, fz_colorspace *cs, const float *color, float alpha, fz_color_params cp) { - float rgb[3]; fz_stext_block *b = add_block_to_page(ctx, page); b->type = FZ_STEXT_BLOCK_VECTOR; b->bbox = bbox; b->u.v.stroked = stroked; - fz_convert_color(ctx, cs, color, fz_device_rgb(ctx), rgb, NULL, cp); - b->u.v.rgba[0] = to255(rgb[0]); - b->u.v.rgba[1] = to255(rgb[1]); - b->u.v.rgba[2] = to255(rgb[2]); - b->u.v.rgba[3] = to255(alpha); + b->u.v.argb = hexrgba_from_color(ctx, cs, color, alpha); } static void diff --git a/mupdf/source/fitz/stext-output.c b/mupdf/source/fitz/stext-output.c index ee1e9c3cf3b2..e55f9ae6e4cc 100644 --- a/mupdf/source/fitz/stext-output.c +++ b/mupdf/source/fitz/stext-output.c @@ -298,7 +298,7 @@ fz_print_stext_block_as_html(fz_context *ctx, fz_output *out, fz_stext_block *bl fz_font *font = NULL; float size = 0; int sup = 0; - int color = 0; + uint32_t color = 0; for (line = block->u.t.first_line; line; line = line->next) { @@ -318,13 +318,13 @@ fz_print_stext_block_as_html(fz_context *ctx, fz_output *out, fz_stext_block *bl for (ch = line->first_char; ch; ch = ch->next) { int ch_sup = detect_super_script(line, ch); - if (ch->font != font || ch->size != size || ch_sup != sup || ch->color != color) + if (ch->font != font || ch->size != size || ch_sup != sup || ch->argb != color) { if (font) fz_print_style_end_html(ctx, out, font, size, sup, color); font = ch->font; size = ch->size; - color = ch->color; + color = ch->argb; sup = ch_sup; fz_print_style_begin_html(ctx, out, font, size, sup, color); } @@ -631,14 +631,15 @@ as_xml(fz_context *ctx, fz_stext_block *block, fz_output *out) name = font_full_name(ctx, font); fz_write_printf(ctx, out, "\n", name, size); } - fz_write_printf(ctx, out, "quad.ul.x, ch->quad.ul.y, ch->quad.ur.x, ch->quad.ur.y, ch->quad.ll.x, ch->quad.ll.y, ch->quad.lr.x, ch->quad.lr.y, ch->origin.x, ch->origin.y, ch->bidi, - ch->color, + ch->argb & 0xFFFFFF, + ch->argb>>24, ch->flags); switch (ch->c) { @@ -682,9 +683,9 @@ as_xml(fz_context *ctx, fz_stext_block *block, fz_output *out) break; case FZ_STEXT_BLOCK_VECTOR: - fz_write_printf(ctx, out, "\n", + fz_write_printf(ctx, out, "\n", block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1, - !!block->u.v.stroked, block->u.v.rgba[0], block->u.v.rgba[1], block->u.v.rgba[2], block->u.v.rgba[3]); + !!block->u.v.stroked, block->u.v.argb); break; case FZ_STEXT_BLOCK_GRID: diff --git a/mupdf/source/fitz/subset-cff.c b/mupdf/source/fitz/subset-cff.c index 22f76d9f76a4..4870204ddc07 100644 --- a/mupdf/source/fitz/subset-cff.c +++ b/mupdf/source/fitz/subset-cff.c @@ -97,7 +97,7 @@ typedef struct size_t len; int symbolic; - int cidfont; + int is_cidfont; uint8_t major; uint8_t minor; @@ -112,8 +112,8 @@ typedef struct index_t charstrings_index; index_t local_index; index_t fdarray_index; - int gsubr_bias; - int subr_bias; + uint16_t gsubr_bias; + uint16_t subr_bias; uint32_t top_dict_index_offset; uint32_t string_index_offset; uint32_t global_index_offset; @@ -128,6 +128,7 @@ typedef struct uint32_t fdselect_offset; uint32_t fdselect_len; uint32_t fdarray_index_offset; + uint32_t charstring_type; uint16_t unpacked_charset_len; uint16_t unpacked_charset_max; @@ -136,9 +137,15 @@ typedef struct struct { fz_buffer *rewritten_dict; + fz_buffer *rewritten_private; uint32_t offset; uint32_t len; uint32_t fixup; + uint32_t local_index_offset; + index_t local_index; + usage_list_t local_usage; + uint16_t subr_bias; + fz_buffer *local_subset; } *fdarray; struct @@ -163,6 +170,7 @@ typedef struct usage_list_t extra_gids_to_keep; uint16_t *gid_to_cid; + uint8_t *gid_to_font; } cff_t; /* cid -> gid */ @@ -250,6 +258,19 @@ offsize_for_offset(uint32_t offset) return 4; } +uint16_t +subr_bias(fz_context *ctx, cff_t *cff, uint16_t count) +{ + if (cff->charstring_type == 1) + return 0; + else if (count < 1240) + return 107; + else if (count < 33900) + return 1131; + else + return 32768; +} + /* Index functions */ /* "Load" an index and check it for plausibility (no overflows etc) */ @@ -709,7 +730,7 @@ dict_write_args(fz_context *ctx, fz_output *out, dict_iterator *di) } static void -do_subset(fz_context *ctx, cff_t *cff, fz_buffer **buffer, usage_list_t *keep_list, index_t *index) +do_subset(fz_context *ctx, cff_t *cff, fz_buffer **buffer, usage_list_t *keep_list, index_t *index, int keep_notdef) { uint8_t *d, *strings; uint32_t i, offset, end; @@ -734,7 +755,7 @@ do_subset(fz_context *ctx, cff_t *cff, fz_buffer **buffer, usage_list_t *keep_li /* Keep this */ gid++; } - else if (i == 0) + else if (keep_notdef && i == 0) { /* Keep this. */ } @@ -764,10 +785,12 @@ do_subset(fz_context *ctx, cff_t *cff, fz_buffer **buffer, usage_list_t *keep_li /* Write out the index header */ put16(d, num_charstrings); /* count */ - put8(d+2, offset_size); /* offset size */ + d +=2; + put8(d, offset_size); /* offset size */ + d += 1; + /* Now copy the charstrings themselves */ - d += 3; strings = d + offset_size * (num_charstrings+1) - 1; gid = 0; fill = 1; @@ -780,7 +803,7 @@ do_subset(fz_context *ctx, cff_t *cff, fz_buffer **buffer, usage_list_t *keep_li /* Keep this */ gid++; } - else if (i == 0) + else if (keep_notdef && i == 0) { /* Keep this */ } @@ -804,19 +827,28 @@ do_subset(fz_context *ctx, cff_t *cff, fz_buffer **buffer, usage_list_t *keep_li static void subset_charstrings(fz_context *ctx, cff_t *cff) { - do_subset(ctx, cff, &cff->charstrings_subset, &cff->gids_to_keep, &cff->charstrings_index); + do_subset(ctx, cff, &cff->charstrings_subset, &cff->gids_to_keep, &cff->charstrings_index, 1); } static void subset_locals(fz_context *ctx, cff_t *cff) { - do_subset(ctx, cff, &cff->local_subset, &cff->local_usage, &cff->local_index); + do_subset(ctx, cff, &cff->local_subset, &cff->local_usage, &cff->local_index, 0); } static void subset_globals(fz_context *ctx, cff_t *cff) { - do_subset(ctx, cff, &cff->global_subset, &cff->global_usage, &cff->global_index); + do_subset(ctx, cff, &cff->global_subset, &cff->global_usage, &cff->global_index, 0); +} + +static void +subset_fdarray_locals(fz_context *ctx, cff_t *cff) +{ + uint16_t i, n = cff->fdarray_index.count; + + for (i = 0; i < n; i++) + do_subset(ctx, cff, &cff->fdarray[i].local_subset, &cff->fdarray[i].local_usage, &cff->fdarray[i].local_index, 0); } /* Charstring "executing" functions */ @@ -885,11 +917,11 @@ drop_usage_list(fz_context *ctx, usage_list_t *list) } static void -mark_subr_used(fz_context *ctx, cff_t *cff, int subr, int global) +mark_subr_used(fz_context *ctx, cff_t *cff, int subr, int global, int local_subr_bias, usage_list_t *local_usage) { - usage_list_t *list = global ? &cff->global_usage : &cff->local_usage; + usage_list_t *list = global ? &cff->global_usage : local_usage; - subr += global ? cff->gsubr_bias : cff->subr_bias; + subr += global ? cff->gsubr_bias : local_subr_bias; usage_list_add(ctx, list, subr); } @@ -931,10 +963,10 @@ use_sub_char(fz_context *ctx, cff_t *cff, int code) do { if (sp + n > (int)(sizeof(stack)/sizeof(*stack))) fz_throw(ctx, FZ_ERROR_FORMAT, "Stack overflow"); sp += n; } while (0) static void -execute_charstring(fz_context *ctx, cff_t *cff, const uint8_t *pc, const uint8_t *end) +execute_charstring(fz_context *ctx, cff_t *cff, const uint8_t *pc, const uint8_t *end, uint16_t subr_bias, usage_list_t *local_usage) { double trans[32] = { 0 }; - double stack[48]; + double stack[513]; int sp = 0; int stem_hints = 0; uint8_t c; @@ -1003,7 +1035,7 @@ execute_charstring(fz_context *ctx, cff_t *cff, const uint8_t *pc, const uint8_t case 10: /* callsubr */ ATLEAST(1); - mark_subr_used(ctx, cff, stack[sp-1], 0); + mark_subr_used(ctx, cff, stack[sp-1], 0, subr_bias, local_usage); sp--; break; case 11: /* return */ @@ -1199,7 +1231,7 @@ execute_charstring(fz_context *ctx, cff_t *cff, const uint8_t *pc, const uint8_t break; case 29: /* callgsubr */ ATLEAST(1); - mark_subr_used(ctx, cff, stack[sp-1], 1); + mark_subr_used(ctx, cff, stack[sp-1], 1, subr_bias, local_usage); sp--; break; case 28: /* shortint */ @@ -1248,8 +1280,35 @@ execute_charstring(fz_context *ctx, cff_t *cff, const uint8_t *pc, const uint8_t fz_throw(ctx, FZ_ERROR_FORMAT, "Insufficient operators on the stack: op=%d", c); } + +usage_list_t * +get_font_locals(fz_context *ctx, cff_t *cff, int gid, int is_pdf_cidfont, uint16_t *subr_bias) +{ + usage_t *gids = cff->gids_to_keep.list; + int num_gids = cff->gids_to_keep.len; + + if (is_pdf_cidfont && cff->is_cidfont) + { + uint8_t font = 0; + if (gid < num_gids && gids[gid].num < cff->charstrings_index.count) + font = cff->gid_to_font[gids[gid].num]; + else if (gid == 0) + font = cff->gid_to_font[gid]; + if (font >= cff->fdarray_index.count) + font = 0; + + if (subr_bias) + *subr_bias = cff->fdarray[font].subr_bias; + return &cff->fdarray[font].local_usage; + } + + if (subr_bias) + *subr_bias = cff->subr_bias; + return &cff->local_usage; +} + static void -scan_charstrings(fz_context *ctx, cff_t *cff) +scan_charstrings(fz_context *ctx, cff_t *cff, int is_pdf_cidfont) { uint32_t offset, end; int num_charstrings = (int)cff->charstrings_index.count; @@ -1257,6 +1316,8 @@ scan_charstrings(fz_context *ctx, cff_t *cff) usage_t *gids = cff->gids_to_keep.list; int num_gids = cff->gids_to_keep.len; int changed; + uint16_t subr_bias; + usage_list_t *local_usage = NULL; /* Scan through the charstrings.*/ offset = index_get(ctx, &cff->charstrings_index, 0); @@ -1278,7 +1339,8 @@ scan_charstrings(fz_context *ctx, cff_t *cff) /* Drop this */ continue; } - execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end]); + local_usage = get_font_locals(ctx, cff, gid, is_pdf_cidfont, &subr_bias); + execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end], subr_bias, local_usage); } /* Now we search the 'extra' ones, the 'subrs' (local) and 'gsubrs' (globals) @@ -1297,7 +1359,9 @@ scan_charstrings(fz_context *ctx, cff_t *cff) usage_list_add(ctx, &cff->gids_to_keep, gid); offset = index_get(ctx, &cff->charstrings_index, gid); end = index_get(ctx, &cff->charstrings_index, gid+1); - execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end]); + + local_usage = get_font_locals(ctx, cff, gid, is_pdf_cidfont, &subr_bias); + execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end], subr_bias, local_usage); changed = 1; } @@ -1310,7 +1374,9 @@ scan_charstrings(fz_context *ctx, cff_t *cff) gid = cff->local_usage.list[i].num; offset = index_get(ctx, &cff->local_index, gid); end = index_get(ctx, &cff->local_index, gid+1); - execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end]); + + local_usage = get_font_locals(ctx, cff, gid, is_pdf_cidfont, &subr_bias); + execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end], subr_bias, local_usage); changed = 1; } @@ -1323,7 +1389,9 @@ scan_charstrings(fz_context *ctx, cff_t *cff) gid = cff->global_usage.list[i].num; offset = index_get(ctx, &cff->global_index, gid); end = index_get(ctx, &cff->global_index, gid+1); - execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end]); + + local_usage = get_font_locals(ctx, cff, gid, is_pdf_cidfont, &subr_bias); + execute_charstring(ctx, cff, &cff->base[offset], &cff->base[end], subr_bias, local_usage); changed = 1; } } @@ -1488,13 +1556,13 @@ get_charset_len(fz_context *ctx, cff_t *cff) } static void -get_fdselect_len(fz_context *ctx, cff_t *cff) +read_fdselect(fz_context *ctx, cff_t *cff) { uint32_t fdselect_offset = cff->fdselect_offset; const uint8_t *d = cff->base + fdselect_offset; const uint8_t *d0 = d; uint8_t fmt; - uint32_t n; + uint16_t n, m, i, first, last, k; if (fdselect_offset == 0) { @@ -1508,19 +1576,39 @@ get_fdselect_len(fz_context *ctx, cff_t *cff) fmt = *d++; n = cff->charstrings_index.count; + cff->gid_to_font = fz_calloc(ctx, n, sizeof(*cff->gid_to_font)); + if (fmt == 0) { - d += n; + for (i = 0; i < n; i++) + { + if (d >= cff->base + cff->len) + fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect"); + cff->gid_to_font[i] = d[0]; + d++; + } } else if (fmt == 3) { if (d + 2 >= cff->base + cff->len) fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect"); - n = get16(d); - d += 2 + 3*n; - if (d + 2 >= cff->base + cff->len) - fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect"); + m = get16(d); d += 2; + if (m > cff->charstrings_index.count) + fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect"); + + for (i = 0; i < m; i++) + { + if (d + 5 >= cff->base + cff->len) + fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect"); + first = get16(d); + last = get16(d + 3); + if (first >= cff->charstrings_index.count || last > cff->charstrings_index.count || first >= last) + fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt fdselect"); + for (k = first; k < last; k++) + cff->gid_to_font[k] = d[2]; + d += 3; + } } cff->fdselect_len = (uint32_t)(d - d0); @@ -1556,13 +1644,13 @@ load_charset_for_cidfont(fz_context *ctx, cff_t *cff) for (i = 1; i < n;) { uint16_t first; - uint32_t nleft; + int32_t nleft; if (d + 3 >= cff->base + cff->len) fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt charset"); first = get16(d); nleft = d[2] + 1; d += 3; - while (nleft--) + while (nleft-- && i < n) { cff->gid_to_cid[i++] = first++; } @@ -1573,13 +1661,13 @@ load_charset_for_cidfont(fz_context *ctx, cff_t *cff) for (i = 1; i < n; i++) { uint16_t first; - uint32_t nleft; + int32_t nleft; if (d + 4 >= cff->base + cff->len) fz_throw(ctx, FZ_ERROR_FORMAT, "corrupt charset"); first = get16(d); nleft = get16(d+2); d += 4; - while (nleft--) + while (nleft-- && i < n) { cff->gid_to_cid[i++] = first++; } @@ -1666,9 +1754,17 @@ rewrite_fdarray(fz_context *ctx, cff_t *cff, uint32_t offset0) { assert(cff->fdarray[i].rewritten_dict->data[cff->fdarray[i].fixup] == 29); assert(cff->fdarray[i].rewritten_dict->data[cff->fdarray[i].fixup+5] == 29); - put32(&cff->fdarray[i].rewritten_dict->data[cff->fdarray[i].fixup+1], cff->fdarray[i].len); + put32(&cff->fdarray[i].rewritten_dict->data[cff->fdarray[i].fixup+1], cff->fdarray[i].rewritten_private->len); put32(&cff->fdarray[i].rewritten_dict->data[cff->fdarray[i].fixup+6], offset); - offset += cff->fdarray[i].len; + offset += cff->fdarray[i].rewritten_private->len; + if (cff->fdarray[i].local_subset) + { + offset += cff->fdarray[i].local_subset->len; + } + else + { + offset += 2; + } } return offset; @@ -1760,12 +1856,18 @@ read_top_dict(fz_context *ctx, cff_t *cff, int idx) { switch (k) { + case DICT_OP_ROS: + cff->is_cidfont = 1; + break; case DICT_OP_charset: cff->charset_offset = dict_arg_int(ctx, &di, 0); break; case DICT_OP_Encoding: cff->encoding_offset = dict_arg_int(ctx, &di, 0); break; + case DICT_OP_CharstringType: + cff->charstring_type = 1; + break; case DICT_OP_CharStrings: cff->charstrings_index_offset = dict_arg_int(ctx, &di, 0); break; @@ -1900,7 +2002,7 @@ make_new_private_dict(fz_context *ctx, cff_t *cff) { /* Everything is in the DICT except for the local subr offset. Insert * that now. This is tricky, because what is the offset? It depends on - * the size of he dict we are creating now, and the size of the dict + * the size of the dict we are creating now, and the size of the dict * we are creating now depends on the size of the offset! */ /* Length so far */ len = fz_tell_output(ctx, out); @@ -1956,6 +2058,8 @@ read_fdarray_and_privates(fz_context *ctx, cff_t *cff) dict_operator k; uint16_t i; uint16_t n = cff->fdarray_index.count; + int subrs; + int64_t len; cff->fdarray = fz_calloc(ctx, n, sizeof(*cff->fdarray)); @@ -1996,6 +2100,91 @@ read_fdarray_and_privates(fz_context *ctx, cff_t *cff) fz_drop_output(ctx, out); fz_catch(ctx) fz_rethrow(ctx); + + + offset = cff->fdarray[i].offset; + end = cff->fdarray[i].offset + cff->fdarray[i].len; + + fz_try(ctx) + { + cff->fdarray[i].rewritten_private = fz_new_buffer(ctx, 1024); + + out = fz_new_output_with_buffer(ctx, cff->fdarray[i].rewritten_private); + cff->fdarray[i].local_index_offset = 0; + + subrs = 0; + + for (k = dict_init(ctx, &di, cff->base, cff->len, offset, end); dict_more(&di); k = dict_next(ctx, &di)) + { + switch (k) + { + case DICT_OP_Subrs: + subrs = 1; + cff->fdarray[i].local_index_offset = dict_arg_int(ctx, &di, 0) + offset; + break; + default: + dict_write_args(ctx, out, &di); + break; + } + } + + if (subrs != 0) + { + /* Everything is in the DICT except for the local subr offset. Insert + * that now. This is tricky, because what is the offset? It depends on + * the size of he dict we are creating now, and the size of the dict + * we are creating now depends on the size of the offset! */ + /* Length so far */ + len = fz_tell_output(ctx, out); + /* We have to encode an offset, plus the Subrs token (19). Offset + * can take up to 5 bytes. */ + if (len+2 < 107) + { + /* We can code it with a single byte encoding */ + len += 2; + fz_write_byte(ctx, out, len + 139); + } + else if (len+3 < 1131) + { + /* We can code it with a 2 byte encoding */ + /* (b0-247) * 256 + b1 + 108 == len+3 */ + len = len+3 - 108; + fz_write_byte(ctx, out, (len>>8) + 247); + fz_write_byte(ctx, out, len); + } + else if (len+4 < 32767) + { + /* We can code it with a 3 byte encoding */ + len += 4; + fz_write_byte(ctx, out, 28); + fz_write_byte(ctx, out, len>>8); + fz_write_byte(ctx, out, len); + } + else + { + /* We can code it with a 5 byte encoding */ + len += 5; + fz_write_byte(ctx, out, 29); + fz_write_byte(ctx, out, len>>24); + fz_write_byte(ctx, out, len>>16); + fz_write_byte(ctx, out, len>>8); + fz_write_byte(ctx, out, len); + } + fz_write_byte(ctx, out, DICT_OP_Subrs); + } + + fz_close_output(ctx, out); + } + fz_always(ctx) + fz_drop_output(ctx, out); + fz_catch(ctx) + fz_rethrow(ctx); + + if (cff->fdarray[i].local_index_offset != 0) + { + index_load(ctx, &cff->fdarray[i].local_index, cff->base, cff->len, cff->fdarray[i].local_index_offset); + cff->fdarray[i].subr_bias = subr_bias(ctx, cff, cff->fdarray[i].local_index.count); + } } } @@ -2034,7 +2223,11 @@ output_fdarray(fz_context *ctx, fz_output *out, cff_t *cff) /* Now we can write out the private dicts, unchanged from the original file. */ for (i = 0; i < n; i++) { - fz_write_data(ctx, out, cff->base + cff->fdarray[i].offset, cff->fdarray[i].len); + fz_write_data(ctx, out, cff->fdarray[i].rewritten_private->data, cff->fdarray[i].rewritten_private->len); + if (cff->fdarray[i].local_subset) + fz_write_data(ctx, out, cff->fdarray[i].local_subset->data, cff->fdarray[i].local_subset->len); + else + fz_write_uint16_be(ctx, out, 0); } } @@ -2053,8 +2246,9 @@ cid_to_gid(fz_context *ctx, cff_t *cff, uint16_t cid) return 0; } + fz_buffer * -fz_subset_cff_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids, int symbolic, int cidfont) +fz_subset_cff_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids, int symbolic, int is_pdf_cidfont) { cff_t cff = { 0 }; fz_buffer *newbuf = NULL; @@ -2062,6 +2256,7 @@ fz_subset_cff_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids size_t len; fz_output *out = NULL; int i; + uint16_t n, k; fz_var(newbuf); fz_var(out); @@ -2078,7 +2273,6 @@ fz_subset_cff_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids cff.len = len; cff.symbolic = symbolic; - cff.cidfont = cidfont; if (len < 4) fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated CFF"); @@ -2105,61 +2299,62 @@ fz_subset_cff_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids /* Next the Global subr index */ index_load(ctx, &cff.global_index, base, (uint32_t)len, cff.global_index_offset); - if (cff.global_index.count < 1240) - cff.gsubr_bias = 107; - else if (cff.global_index.count < 33900) - cff.gsubr_bias = 1131; - else - cff.gsubr_bias = 32768; + + /* Default value, possibly updated by top dict entries */ + cff.charstring_type = 2; /* CFF files can contain several fonts, but we only want the first one. */ read_top_dict(ctx, &cff, 0); + cff.gsubr_bias = subr_bias(ctx, &cff, cff.global_index.count); + if (cff.charstrings_index_offset == 0) fz_throw(ctx, FZ_ERROR_FORMAT, "Missing charstrings table"); index_load(ctx, &cff.charstrings_index, base, (uint32_t)len, cff.charstrings_index_offset); index_load(ctx, &cff.local_index, base, (uint32_t)len, cff.local_index_offset); - if (cff.local_index.count < 1240) - cff.subr_bias = 107; - else if (cff.local_index.count < 33900) - cff.subr_bias = 1131; - else - cff.subr_bias = 32768; + cff.subr_bias = subr_bias(ctx, &cff, cff.local_index.count); index_load(ctx, &cff.fdarray_index, base, (uint32_t)len, cff.fdarray_index_offset); + get_encoding_len(ctx, &cff); + get_charset_len(ctx, &cff); + + if (is_pdf_cidfont && cff.is_cidfont) + { + read_fdselect(ctx, &cff); + read_fdarray_and_privates(ctx, &cff); + } + /* Move our list of gids into our own storage. */ - if (cidfont) + if (is_pdf_cidfont && cff.is_cidfont) { - /* For CIDFonts we are given CIDs here, not gids. Accordingly - * we need to look them up in the charset */ + /* For CIDFontType0 FontDescriptor with a CFF that uses CIDFont operators, + * we are given CIDs here, not GIDs. Accordingly + * we need to look them up in the CharSet. + */ load_charset_for_cidfont(ctx, &cff); for (i = 0; i < num_gids; i++) usage_list_add(ctx, &cff.gids_to_keep, cid_to_gid(ctx, &cff, gids[i])); } else { + /* For CIDFontType0 FontDescriptor with a CFF that DOES NOT use CIDFont operators, + * and for Type1 FontDescriptors, we are given GIDs directly. + */ for (i = 0; i < num_gids; i++) usage_list_add(ctx, &cff.gids_to_keep, gids[i]); } - get_encoding_len(ctx, &cff); - get_charset_len(ctx, &cff); - /* Scan charstrings. */ - scan_charstrings(ctx, &cff); + scan_charstrings(ctx, &cff, is_pdf_cidfont); /* Now subset the data. */ subset_charstrings(ctx, &cff); + if (is_pdf_cidfont && cff.is_cidfont) + subset_fdarray_locals(ctx, &cff); subset_locals(ctx, &cff); subset_globals(ctx, &cff); - if (cidfont) - { - get_fdselect_len(ctx, &cff); - read_fdarray_and_privates(ctx, &cff); - } - /* FIXME: cull the strings? */ /* Now, rewrite the font. @@ -2270,17 +2465,21 @@ fz_subset_cff_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids fz_drop_buffer(ctx, cff.local_subset); fz_drop_buffer(ctx, cff.global_subset); fz_free(ctx, cff.gid_to_cid); + fz_free(ctx, cff.gid_to_font); drop_usage_list(ctx, &cff.local_usage); drop_usage_list(ctx, &cff.global_usage); drop_usage_list(ctx, &cff.gids_to_keep); drop_usage_list(ctx, &cff.extra_gids_to_keep); if (cff.fdarray) { - int n = cff.fdarray_index.count; - int i; - - for (i = 0; i < n; i++) - fz_drop_buffer(ctx, cff.fdarray[i].rewritten_dict); + n = cff.fdarray_index.count; + for (k = 0; k < n; k++) + { + fz_drop_buffer(ctx, cff.fdarray[k].rewritten_dict); + fz_drop_buffer(ctx, cff.fdarray[k].rewritten_private); + fz_drop_buffer(ctx, cff.fdarray[k].local_subset); + drop_usage_list(ctx, &cff.fdarray[k].local_usage); + } fz_free(ctx, cff.fdarray); } fz_free(ctx, cff.unpacked_charset); diff --git a/mupdf/source/fitz/subset-ttf.c b/mupdf/source/fitz/subset-ttf.c index 24600be288a5..6ba2d91631f1 100644 --- a/mupdf/source/fitz/subset-ttf.c +++ b/mupdf/source/fitz/subset-ttf.c @@ -72,8 +72,8 @@ typedef struct int is_otf; int symbolic; encoding_t *encoding; - uint32_t orig_num_glyphs; - uint32_t new_num_glyphs; + uint16_t orig_num_glyphs; + uint16_t new_num_glyphs; uint16_t index_to_loc_format; uint8_t *index_to_loc_formatp; uint16_t orig_num_long_hor_metrics; @@ -140,7 +140,7 @@ find_table(fz_context *ctx, fz_stream *stm, uint32_t tag, uint32_t *len) uint32_t t = fz_read_uint32(ctx, stm); uint32_t cs = fz_read_uint32(ctx, stm); uint32_t off = fz_read_uint32(ctx, stm); - cs = cs; /* UNUSED */ + (void) cs; /* UNUSED */ *len = fz_read_uint32(ctx, stm); if (t == tag) return off; @@ -263,7 +263,7 @@ write_tables(fz_context *ctx, ttf_t *ttf, fz_output *out) /* number of tables */ fz_write_uint16_be(ctx, out, ttf->len); - while (1<<(i+1) < ttf->len) + while (1<<(i+1) <= ttf->len) i++; /* searchRange */ @@ -303,7 +303,7 @@ fix_checksum(fz_context *ctx, fz_buffer *buf) fz_stream *stm = fz_open_buffer(ctx, buf); uint32_t csumpos = find_table(ctx, stm, TAG("head"), &namesize) + 8; - len = len; // UNUSED + (void) len; // UNUSED fz_drop_stream(ctx, stm); @@ -574,7 +574,7 @@ load_enc_tab4(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset) uint16_t seg_count; uint32_t i; - if (data_size < 26) + if (data_size < offset + 26) fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small"); seg_count = get16(d+offset+6); /* 2 * seg_count */ @@ -734,11 +734,10 @@ load_encoding(fz_context *ctx, ttf_t *ttf, fz_stream *stm) { if (ttf->symbolic) { - /* PDF Spec says that for symbolic fonts we look for (1,0). */ - /* (3, 0) may also be present, but we'll just use (1, 0) for - * now. If we find files with a (3,0), but not a (1,0), then - * we'll deal with that then. */ + /* For symbolic fonts, we look for (1,0) as per PDF Spec, then (3,0). */ enc = load_enc(ctx, t, 1, 0); + if (!enc) + enc = load_enc(ctx, t, 3, 0); } else { @@ -746,6 +745,8 @@ load_encoding(fz_context *ctx, ttf_t *ttf, fz_stream *stm) enc = load_enc(ctx, t, 3, 1); if (!enc) enc = load_enc(ctx, t, 1, 0); + if (!enc) + enc = load_enc(ctx, t, 0, 1); } if (!enc) fz_throw(ctx, FZ_ERROR_FORMAT, "No suitable cmap table found"); @@ -792,7 +793,7 @@ reduce_encoding(fz_context *ctx, ttf_t *ttf, int *gids, int num_gids) } /* Not found */ - enc->gid[0] = 0; + enc->gid[i] = 0; found: {} } @@ -834,9 +835,6 @@ make_cmap(fz_context *ctx, ttf_t *ttf) } segs++; /* For the terminator */ - - - len = 12 + 14 + 2 + segs * 2 * 4 + entries * 2; buf = fz_new_buffer(ctx, len); d = buf->data; @@ -1205,8 +1203,8 @@ read_glyf(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids) uint32_t len = get_loca(ctx, ttf, ttf->orig_num_glyphs); fz_buffer *t = read_table(ctx, stm, TAG("glyf"), 1); encoding_t *enc = ttf->encoding; - uint32_t i, j; - uint32_t new_start, old_start, old_end; + uint32_t last_loca, i, j, k; + uint32_t new_start, old_start, old_end, last_loca_ofs; if (t->len < len) { @@ -1256,35 +1254,69 @@ read_glyf(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids) } /* Now subset the glyf table. */ - new_start = 0; - old_start = get_loca(ctx, ttf, 0); - if (old_start >= t->len) - fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); - for (i = 0; i < ttf->orig_num_glyphs; i++) + if (enc) { - old_end = get_loca(ctx, ttf, i+1); + old_start = get_loca(ctx, ttf, 0); + if (old_start > t->len) + fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); + old_end = get_loca(ctx, ttf, 1); if (old_end > t->len || old_end < old_start) fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); - if ((old_end != old_start) && (i == 0 || ttf->gid_renum[i] != 0)) + len = old_end - old_start; + new_start = 0; + put_loca(ctx, ttf, 0, new_start); + last_loca = 0; + last_loca_ofs = len; + for (i = 0; i < ttf->orig_num_glyphs; i++) { + old_end = get_loca(ctx, ttf, i + 1); + if (old_end > t->len || old_end < old_start) + fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); len = old_end - old_start; - memmove(t->data + new_start, t->data + old_start, len); - if (enc) + if (len > 0 && (i == 0 || ttf->gid_renum[i] != 0)) { + memmove(t->data + new_start, t->data + old_start, len); if ((int16_t)get16(t->data + new_start) < 0) renumber_composite(ctx, ttf, t->data + new_start, len); - put_loca(ctx, ttf, ttf->gid_renum[i], new_start); + for (k = last_loca + 1; k <= ttf->gid_renum[i]; k++) + put_loca(ctx, ttf, k, last_loca_ofs); + new_start += len; + last_loca = ttf->gid_renum[i]; + last_loca_ofs = new_start; + } + old_start = old_end; + } + for (k = last_loca + 1; k <= ttf->new_num_glyphs; k++) + put_loca(ctx, ttf, k, last_loca_ofs); + } + else + { + new_start = 0; + old_start = get_loca(ctx, ttf, 0); + if (old_start > t->len) + fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); + for (i = 0; i < ttf->orig_num_glyphs; i++) + { + old_end = get_loca(ctx, ttf, i + 1); + if (old_end > t->len || old_end < old_start) + fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); + len = old_end - old_start; + if (len > 0 && ttf->gid_renum[i] != 0) + { + memmove(t->data + new_start, t->data + old_start, len); + put_loca(ctx, ttf, i, new_start); + new_start += len; } else + { put_loca(ctx, ttf, i, new_start); - new_start += len; + } + old_start = old_end; } - else if (!enc) - put_loca(ctx, ttf, i, new_start); - old_start = old_end; + put_loca(ctx, ttf, ttf->orig_num_glyphs, new_start); } - put_loca(ctx, ttf, ttf->new_num_glyphs, new_start); - *ttf->loca_len = (size_t) (ttf->new_num_glyphs+1) * (2<index_to_loc_format); + + *ttf->loca_len = (size_t) (ttf->new_num_glyphs + 1) * (2<index_to_loc_format); t->len = new_start; } @@ -1298,27 +1330,24 @@ static void subset_hmtx(fz_context *ctx, ttf_t *ttf, fz_stream *stm) { fz_buffer *t = read_table(ctx, stm, TAG("hmtx"), 1); - uint16_t i, max16; + uint16_t long_metrics, short_metrics, i, k; uint8_t *s = t->data; uint8_t *d = t->data; int cidfont = (ttf->encoding == NULL); - size_t max = t->len; - if (ttf->orig_num_long_hor_metrics * 4 > max) - { - fz_drop_buffer(ctx, t); - fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed hmtx table"); - } - max -= ttf->orig_num_long_hor_metrics * 4; - max /= 2; - if (max > ttf->orig_num_glyphs) - max = ttf->orig_num_glyphs; - /* We know orig_num_glyphs is 16bit, so this cast safe. */ - max16 = (uint16_t)max; - - for (i = 0; i < ttf->orig_num_long_hor_metrics; i++) + long_metrics = ttf->orig_num_long_hor_metrics; + if (long_metrics > ttf->orig_num_glyphs) + long_metrics = ttf->orig_num_glyphs; + if (long_metrics > t->len / 4) + long_metrics = t->len / 4; + + short_metrics = (t->len - long_metrics * 4) / 2; + if (short_metrics > ttf->orig_num_glyphs - long_metrics) + short_metrics = ttf->orig_num_glyphs - long_metrics; + + for (i = 0; i < long_metrics; i++) { - if (i == 0 || ttf->is_otf || ttf->gid_renum[i]) + if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i])) { put32(d, get32(s)); d += 4; @@ -1330,9 +1359,9 @@ subset_hmtx(fz_context *ctx, ttf_t *ttf, fz_stream *stm) } s += 4; } - for ( ; i < max16; i++) + for (k = 0 ; k < short_metrics; k++, i++) { - if (i == 0 || ttf->is_otf || ttf->gid_renum[i]) + if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i])) { put16(d, get16(s)); d += 2; @@ -1382,12 +1411,12 @@ shrink_loca_if_possible(fz_context *ctx, ttf_t *ttf) static size_t subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int num_gids) { - int i, n; + int i, n, new_glyphs; int j; fz_int2_heap heap = { 0 }; - uint8_t *d0, *e, *idx; + uint8_t *d0, *e, *idx , *p; - if (len < 2 + 2 * ttf->orig_num_glyphs) + if (len < (size_t) 2 + 2 * ttf->orig_num_glyphs) fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated post table"); n = get16(d); @@ -1397,27 +1426,35 @@ subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int d0 = d; d += 2; len -= 2; idx = d; + e = d; + p = d; - /* Store all the indexes. */ - j = 0; + /* Store all kept indexes. */ if (len < (size_t)n*2) fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table"); + new_glyphs = 0; + j = 0; len -= (size_t)n*2; for (i = 0; i < n; i++) { uint16_t o = get16(d); fz_int2 i2; - d += 2; + p += 2; /* We're only keeping gids we want. */ - if (j >= num_gids || gids[j] != i) + if (i != 0 && (j >= num_gids || gids[j] != i)) { - put16(d-2, 0); + memmove(d, d + 2, (n - i - 1) * 2); continue; } + if (i != 0) + j++; + + d += 2; + e += 2; /* We want this gid. */ - j++; + new_glyphs++; /* 257 or smaller: same as in the basic order. */ if (o <= 257) @@ -1429,6 +1466,11 @@ subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int fz_int2_heap_insert(ctx, &heap, i2); } + d = p; + + /* Update number of indexes */ + put16(d0, new_glyphs); + fz_int2_heap_sort(ctx, &heap); /* So, the heap is sorted on i2.a (the string indexes we want to keep), @@ -1436,7 +1478,6 @@ subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int /* Run through the list moving the strings down that we care about. */ j = 0; - e = d; n = heap.len; for (i = 0; i < n; i++) { @@ -1453,13 +1494,14 @@ subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int { /* Drop this one. */ d += slen; + continue; } memmove(e, d, slen); d += slen; e += slen; - put16(idx + 2*i, 258 + j); + put16(idx + 2*j, 258 + j); j++; } @@ -1510,7 +1552,7 @@ subset_post(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids fz_rethrow(ctx); } - t->len = len; + t->len = 32 + len; add_table(ctx, ttf, TAG("post"), t); } diff --git a/mupdf/source/fitz/time.c b/mupdf/source/fitz/time.c index 1be5358dbfb8..4f8d35a6ddbc 100644 --- a/mupdf/source/fitz/time.c +++ b/mupdf/source/fitz/time.c @@ -32,7 +32,6 @@ #include #include /* for mkdir */ -#ifdef _MSC_VER #ifndef _WINRT #define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 @@ -61,7 +60,6 @@ int gettimeofday(struct timeval *tv, struct timezone *tz) } #endif /* !_WINRT */ -#endif /* _MSC_VER */ static char * utf8_from_wchar(const wchar_t *s) diff --git a/mupdf/source/fitz/xml.c b/mupdf/source/fitz/xml.c index ba018423dd16..035f8107125d 100644 --- a/mupdf/source/fitz/xml.c +++ b/mupdf/source/fitz/xml.c @@ -26,7 +26,9 @@ #include #include +#if FZ_ENABLE_HTML_ENGINE #include +#endif #define FZ_XML_MAX_DEPTH 4096 @@ -1129,6 +1131,7 @@ fz_parse_xml(fz_context *ctx, fz_buffer *buf, int preserve_white) return xml; } +#if FZ_ENABLE_HTML_ENGINE /* Parse the contents of buffer into a tree of XML nodes, using the HTML5 syntax. @@ -1203,10 +1206,12 @@ static void xml_from_gumbo(fz_context *ctx, struct parser *parser, GumboNode *no break; } } +#endif fz_xml * fz_parse_xml_from_html5(fz_context *ctx, fz_buffer *buf) { +#if FZ_ENABLE_HTML_ENGINE struct parser parser; fz_xml *xml = NULL; fz_xml root, *node; @@ -1294,6 +1299,9 @@ fz_parse_xml_from_html5(fz_context *ctx, fz_buffer *buf) } return xml; +#else + fz_throw(ctx, FZ_ERROR_GENERIC, "HTML Engine not enabled in this build"); +#endif } fz_xml *fz_xml_find_dfs(fz_xml *item, const char *tag, const char *att, const char *match) diff --git a/mupdf/source/pdf/pdf-cmap.c b/mupdf/source/pdf/pdf-cmap.c index d38f8cd1093e..1122795bb459 100644 --- a/mupdf/source/pdf/pdf-cmap.c +++ b/mupdf/source/pdf/pdf-cmap.c @@ -507,9 +507,6 @@ add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, { int current; cmap_splay *tree; - int i; - int inrange = 0; - unsigned int k, count; if (low > high) { @@ -523,22 +520,6 @@ add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, pdf_add_codespace(ctx, cmap, 0, 65535, 2); } - count = high - low + 1; - for (k = 0; k < count; k++) { - unsigned int c = low + k; - - inrange = 0; - for (i = 0; i < cmap->codespace_len; i++) { - if (cmap->codespace[i].low <= c && c <= cmap->codespace[i].high) - inrange = 1; - } - if (!inrange) - { - fz_warn(ctx, "ignoring CMap range (%u-%u) that is outside of the codespace", low, high); - return; - } - } - tree = cmap->tree; if (cmap->tlen) diff --git a/mupdf/source/pdf/pdf-outline.c b/mupdf/source/pdf/pdf-outline.c index 987b2a525658..f12c14c515c8 100644 --- a/mupdf/source/pdf/pdf-outline.c +++ b/mupdf/source/pdf/pdf-outline.c @@ -244,7 +244,7 @@ do_outline_update(fz_context *ctx, pdf_obj *obj, fz_outline_item *item, int is_n pdf_obj *cobj = pdf_dict_get(ctx, parent, PDF_NAME(Count)); count = pdf_to_int(ctx, cobj); if (open_delta || cobj == NULL) - pdf_dict_put_int(ctx, parent, PDF_NAME(Count), count >= 0 ? count + open_delta : count - open_delta); + pdf_dict_put_int(ctx, parent, PDF_NAME(Count), count > 0 ? count + open_delta : count - open_delta); if (count < 0) break; parent = pdf_dict_get(ctx, parent, PDF_NAME(Parent)); diff --git a/mupdf/source/pdf/pdf-repair.c b/mupdf/source/pdf/pdf-repair.c index 8b96fc7826b6..c74df1c9101d 100644 --- a/mupdf/source/pdf/pdf-repair.c +++ b/mupdf/source/pdf/pdf-repair.c @@ -269,7 +269,7 @@ pdf_repair_obj_stm(fz_context *ctx, pdf_document *doc, int stm_num) fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", n, i); continue; } - else if (n >= pdf_xref_len(ctx, doc)) + else if (n >= PDF_MAX_OBJECT_NUMBER) { fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", n, i); continue; diff --git a/mupdf/source/pdf/pdf-subset.c b/mupdf/source/pdf/pdf-subset.c index 0e6137a8b485..59e40e315263 100644 --- a/mupdf/source/pdf/pdf-subset.c +++ b/mupdf/source/pdf/pdf-subset.c @@ -44,7 +44,7 @@ typedef struct { int num; int gen; - int is_cff; + int is_ttf; int is_cidfont; pdf_obj *fontfile; unsigned char digest[16]; @@ -169,77 +169,80 @@ static void font_analysis_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size) { pdf_font_analysis_processor *p = (pdf_font_analysis_processor*)proc; - pdf_obj *obj = pdf_dict_gets(ctx, pdf_dict_get(ctx, p->rstack->res, PDF_NAME(Font)), name); + pdf_obj *dict = pdf_dict_gets(ctx, pdf_dict_get(ctx, p->rstack->res, PDF_NAME(Font)), name); pdf_obj *subtype, *fontdesc; - pdf_obj *key = NULL; pdf_obj *fontfile = NULL; + pdf_obj *key; int num, gen, i; - int is_cff = 0; - int cidfont = 0; + int is_cidfont = 0; + int is_ttf = 0; unsigned char digest[16]; - if (obj == NULL) + p->gs->current_font = -1; /* unknown font! */ + + if (dict == NULL) return; /* We can have multiple fonts that rely on the same underlying fontfile * object. Therefore, resolve down to that. */ - subtype = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)); + subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype)); - if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType))) + if (subtype == PDF_NAME(Type1) || subtype == PDF_NAME(MMType1)) + { + // fontfile subtype should be Type1C for us to be able to subset it + key = PDF_NAME(FontFile); + fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); + fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile)); + is_cidfont = 0; + is_ttf = 0; + } + else if (subtype == PDF_NAME(TrueType)) { - fontdesc = pdf_dict_get(ctx, obj, PDF_NAME(FontDescriptor)); key = PDF_NAME(FontFile2); - fontfile = pdf_dict_get(ctx, fontdesc, key); - cidfont = 0; + fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); + fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile2)); + is_cidfont = 0; + is_ttf = 1; } else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0))) { - obj = pdf_array_get(ctx, pdf_dict_get(ctx, obj, PDF_NAME(DescendantFonts)), 0); - fontdesc = pdf_dict_get(ctx, obj, PDF_NAME(FontDescriptor)); - key = PDF_NAME(FontFile2); - fontfile = pdf_dict_get(ctx, fontdesc, key); - cidfont = 1; // fontsub7a - if (!fontfile) + dict = pdf_array_get(ctx, pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts)), 0); + subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype)); + fontdesc = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); + if (subtype == PDF_NAME(CIDFontType0)) { + // fontfile subtype is either CIDFontType0C or OpenType key = PDF_NAME(FontFile3); - fontfile = pdf_dict_get(ctx, fontdesc, key); + fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile3)); subtype = pdf_dict_get(ctx, fontfile, PDF_NAME(Subtype)); - if (pdf_name_eq(ctx, subtype, PDF_NAME(OpenType))) + if (subtype == PDF_NAME(CIDFontType0C)) { - cidfont = 0; // fontsub2a + is_cidfont = 1; + is_ttf = 0; } - else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0C))) + else if (subtype == PDF_NAME(OpenType)) { - is_cff = 1; + is_cidfont = 1; + is_ttf = 1; } else { - is_cff = 1; - cidfont = 0; // fontsub1a + fontfile = NULL; } } - } - else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1))) - { - fontdesc = pdf_dict_get(ctx, obj, PDF_NAME(FontDescriptor)); - key = PDF_NAME(FontFile3); - fontfile = pdf_dict_get(ctx, fontdesc, key); - is_cff = 1; - cidfont = 0; - } - else - { -#ifdef DEBUG_SUBSETTING - fz_write_printf(ctx, fz_stddbg(ctx), "Unknown font of subtype "); - pdf_debug_obj(ctx, subtype); -#endif + else if (subtype == PDF_NAME(CIDFontType2)) + { + key = PDF_NAME(FontFile2); + fontfile = pdf_dict_get(ctx, fontdesc, PDF_NAME(FontFile2)); + is_cidfont = 1; + is_ttf = 1; + } } if (!fontfile) { #ifdef DEBUG_SUBSETTING - fz_write_printf(ctx, fz_stddbg(ctx), "No key found for font of subtype "); - pdf_debug_obj(ctx, subtype); + fz_write_printf(ctx, fz_stddbg(ctx), "No embedded file found for font of subtype %s\n", pdf_to_name(ctx, subtype)); #endif return; } @@ -280,7 +283,7 @@ font_analysis_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_fon for (j = 0; j < p->usage->font[i].len; j++) { - if (pdf_objcmp(ctx, p->usage->font[i].font[j], obj) == 0) + if (pdf_objcmp(ctx, p->usage->font[i].font[j], dict) == 0) return; } @@ -290,7 +293,7 @@ font_analysis_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_fon p->usage->font[i].font = fz_realloc(ctx, p->usage->font[i].font, sizeof(*p->usage->font[i].font) * newmax); p->usage->font[i].max = newmax; } - p->usage->font[i].font[j] = pdf_keep_obj(ctx, obj); + p->usage->font[i].font[j] = pdf_keep_obj(ctx, dict); p->usage->font[i].len++; return; @@ -306,8 +309,8 @@ font_analysis_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_fon p->usage->max = n; } - p->usage->font[i].is_cff = is_cff; - p->usage->font[i].is_cidfont = cidfont; + p->usage->font[i].is_ttf = is_ttf; + p->usage->font[i].is_cidfont = is_cidfont; p->usage->font[i].fontfile = pdf_keep_obj(ctx, fontfile); p->usage->font[i].num = num; p->usage->font[i].gen = gen; @@ -326,7 +329,7 @@ font_analysis_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_fon p->usage->font[i].font = fz_malloc(ctx, sizeof(*p->usage->font[i].font) * 4); p->usage->font[i].len = 1; p->usage->font[i].max = 4; - p->usage->font[i].font[0] = pdf_keep_obj(ctx, obj); + p->usage->font[i].font[0] = pdf_keep_obj(ctx, dict); } static void @@ -342,12 +345,14 @@ show_string(fz_context *ctx, pdf_font_analysis_processor *p, unsigned char *buf, gstate *gs = p->gs; pdf_font_desc *fontdesc = gs->font; size_t pos = 0; - font_usage_t *font = &p->usage->font[gs->current_font]; + font_usage_t *font; - /* e.g. for non-embedded base14 fonts. */ - if (fontdesc == NULL) + // Not an embedded font! + if (gs->current_font < 0 || fontdesc == NULL) return; + font = &p->usage->font[gs->current_font]; + while (pos < len) { unsigned int cpt; @@ -471,6 +476,8 @@ pdf_new_font_analysis_processor(fz_context *ctx, fonts_usage_t *usage) fz_rethrow(ctx); } + proc->gs->current_font = -1; // no font set yet + proc->usage = usage; return &proc->super; @@ -788,10 +795,10 @@ pdf_subset_fonts(fz_context *ctx, pdf_document *doc, int len, const int *pages) * This will leave this font alone. */ fz_try(ctx) { - if (font->is_cff) - subset_cff(ctx, doc, font, font->fontfile, symbolic, font->is_cidfont); - else + if (font->is_ttf) subset_ttf(ctx, doc, font, font->fontfile, symbolic, font->is_cidfont); + else if (font->is_cidfont) + subset_cff(ctx, doc, font, font->fontfile, symbolic, font->is_cidfont); } fz_catch(ctx) { diff --git a/mupdf/source/pdf/pdf-xref.c b/mupdf/source/pdf/pdf-xref.c index 616756c5f52c..732c200d6874 100644 --- a/mupdf/source/pdf/pdf-xref.c +++ b/mupdf/source/pdf/pdf-xref.c @@ -1886,6 +1886,7 @@ pdf_init_document(fz_context *ctx, pdf_document *doc) if (repaired) { + pdf_repair_obj_stms(ctx, doc); pdf_repair_trailer(ctx, doc); } } @@ -1903,7 +1904,6 @@ void pdf_repair_trailer(fz_context *ctx, pdf_document *doc) int i; int xref_len = pdf_xref_len(ctx, doc); - pdf_repair_obj_stms(ctx, doc); hasroot = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)) != NULL); hasinfo = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)) != NULL); @@ -2224,6 +2224,12 @@ pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, pdf_lexbuf *buf, i else { entry->obj = obj; + /* If we've just read a 'null' object, don't leave this as a NULL 'o' object, + * as that will a) confuse the code that called us into thinking that nothing + * was loaded, and b) cause the entire objstm to be reloaded every time that + * object is acccessed. Instead, just mark it as an 'f'. */ + if (obj == NULL) + entry->type = 'f'; fz_drop_buffer(ctx, entry->stm_buf); entry->stm_buf = NULL; } diff --git a/mupdf/source/tools/mudraw.c b/mupdf/source/tools/mudraw.c index a900671163ad..9bed35da969c 100644 --- a/mupdf/source/tools/mudraw.c +++ b/mupdf/source/tools/mudraw.c @@ -42,7 +42,7 @@ #include #include #include -#ifdef _MSC_VER +#ifdef _WIN32 struct timeval; struct timezone; int gettimeofday(struct timeval *tv, struct timezone *tz); @@ -1358,6 +1358,20 @@ static void drawpage(fz_context *ctx, fz_document *doc, int pagenum) start = (showtime ? gettime() : 0); + if (output_file_per_page) + { + char text_buffer[512]; + + bgprint_flush(); + if (out) + { + fz_close_output(ctx, out); + fz_drop_output(ctx, out); + } + fz_format_output_path(ctx, text_buffer, sizeof text_buffer, output, pagenum); + out = fz_new_output_with_path(ctx, text_buffer, 0); + } + page = fz_load_page(ctx, doc, pagenum - 1); if (spots != SPOTS_NONE) @@ -1458,20 +1472,6 @@ static void drawpage(fz_context *ctx, fz_document *doc, int pagenum) features = iscolor ? " color" : " grayscale"; } - if (output_file_per_page) - { - char text_buffer[512]; - - bgprint_flush(); - if (out) - { - fz_close_output(ctx, out); - fz_drop_output(ctx, out); - } - fz_format_output_path(ctx, text_buffer, sizeof text_buffer, output, pagenum); - out = fz_new_output_with_path(ctx, text_buffer, 0); - } - if (bgprint.active) { bgprint_flush(); @@ -2534,7 +2534,7 @@ int mudraw_main(int argc, char **argv) fz_register_document_handlers(ctx); #ifdef HAVE_SMARTOFFICE { - void *cfg = so_doc_handler_enable(ctx, "en-gb"); + void *cfg = so_doc_handler_enable(ctx, "en-gb", NULL, 1); so_doc_handler_configure(ctx, cfg, SO_DOC_HANDLER_MODE, SO_DOC_HANDLER_MODE_HTML); } #endif diff --git a/mupdf/source/tools/muraster.c b/mupdf/source/tools/muraster.c index 1ca303db9bf7..3ca07f4ee446 100644 --- a/mupdf/source/tools/muraster.c +++ b/mupdf/source/tools/muraster.c @@ -165,7 +165,7 @@ #include #include -#ifdef _MSC_VER +#ifdef _WIN32 struct timeval; struct timezone; int gettimeofday(struct timeval *tv, struct timezone *tz); diff --git a/mupdf/source/tools/murun.c b/mupdf/source/tools/murun.c index 732b21ca7086..2ae5e6f0a140 100644 --- a/mupdf/source/tools/murun.c +++ b/mupdf/source/tools/murun.c @@ -314,7 +314,7 @@ const char *postfix_js = "mupdf.PDFDocument.prototype.getEmbeddedFiles = function () {\n" " function _getEmbeddedFilesRec(result, N) {\n" " var i, n\n" - " if (N) {\n" + " if (N.isDictionary()) {\n" " var NN = N.get('Names')\n" " if (NN)\n" " for (i = 0, n = NN.length; i < n; i += 2)\n" @@ -328,6 +328,29 @@ const char *postfix_js = " }\n" " return _getEmbeddedFilesRec({}, this.getTrailer().get('Root', 'Names', 'EmbeddedFiles'))\n" "}\n" + "mupdf.PDFDocument.prototype.insertEmbeddedFile = function (filename, filespec) {\n" + " var efs = this.getEmbeddedFiles()\n" + " efs[filename] = filespec\n" + " this._rewriteEmbeddedFiles(efs)\n" + "}\n" + "mupdf.PDFDocument.prototype.deleteEmbeddedFile = function (filename) {\n" + " var efs = this.getEmbeddedFiles()\n" + " delete efs[filename]\n" + " this._rewriteEmbeddedFiles(efs)\n" + "}\n" + "mupdf.PDFDocument.prototype._rewriteEmbeddedFiles = function (efs) {\n" + " var efs_keys = Object.keys(efs)\n" + " var root = this.getTrailer().get('Root')\n" + " var root_names = root.get('Names')\n" + " if (!root_names.isDictionary())\n" + " root_names = root.put('Names', this.newDictionary(1))\n" + " var root_names_efs = root_names.put('EmbeddedFiles', this.newDictionary(1))\n" + " var root_names_efs_names = root_names_efs.put('Names', this.newArray(efs_keys.length * 2))\n" + " for (var i = 0; i < efs_keys.length; ++i) {\n" + " root_names_efs_names.push(this.newString(efs_keys[i]))\n" + " root_names_efs_names.push(efs[efs_keys[i]])\n" + " }\n" + "}\n" ; struct event_cb_data @@ -5571,7 +5594,7 @@ stext_walk(js_State *J, fz_stext_block *block) ffi_pushfont(J, ch->font); js_pushnumber(J, ch->size); ffi_pushquad(J, ch->quad); - js_pushnumber(J, ch->color); + js_pushnumber(J, ch->argb); js_call(J, 6); js_pop(J, 1); } diff --git a/mupdf/source/tools/pdfposter.c b/mupdf/source/tools/pdfposter.c index 915cc187e485..08c3cae00a1e 100644 --- a/mupdf/source/tools/pdfposter.c +++ b/mupdf/source/tools/pdfposter.c @@ -129,14 +129,14 @@ static void decimatepages(fz_context *ctx, pdf_document *doc) { xf = y_factor; yf = x_factor; - yd = x_dir; + yd = -x_dir; xd = 1; } else { xf = x_factor; yf = y_factor; - xd = -x_dir; + xd = x_dir; yd = -1; }