diff options
author | Vincent Sanders <vince@kyllikki.org> | 2017-12-30 00:38:07 +0000 |
---|---|---|
committer | Vincent Sanders <vince@kyllikki.org> | 2017-12-30 00:38:07 +0000 |
commit | 31b1f792826f51e9271475d124c3a1df4aa5116b (patch) | |
tree | 3e0c1083064ba66398dee4fa0aa1c464be3b5325 | |
parent | 5422dd50a49fe1a282271f22cd324f815e592e07 (diff) | |
download | libnspdf-31b1f792826f51e9271475d124c3a1df4aa5116b.tar.gz libnspdf-31b1f792826f51e9271475d124c3a1df4aa5116b.tar.bz2 |
make an actual library
-rw-r--r-- | COPYING | 19 | ||||
-rw-r--r-- | Makefile | 48 | ||||
-rw-r--r-- | README | 4 | ||||
-rw-r--r-- | include/nspdf/document.h | 45 | ||||
-rw-r--r-- | include/nspdf/errors.h | 29 | ||||
-rw-r--r-- | libnspdf.pc.in | 10 | ||||
-rw-r--r-- | src/Makefile | 15 | ||||
-rw-r--r-- | src/cos_object.c | 31 | ||||
-rw-r--r-- | src/cos_object.h | 37 | ||||
-rw-r--r-- | src/cos_parse.c (renamed from src/cos_decode.c) | 34 | ||||
-rw-r--r-- | src/cos_parse.h | 10 | ||||
-rw-r--r-- | src/document.c (renamed from src/xref.c) | 188 | ||||
-rw-r--r-- | src/nspdferror.h | 10 | ||||
-rw-r--r-- | src/pdf_doc.c | 12 | ||||
-rw-r--r-- | src/pdf_doc.h | 12 | ||||
-rw-r--r-- | test/Makefile | 3 | ||||
-rw-r--r-- | test/parsepdf.c | 94 | ||||
-rwxr-xr-x | test/runtest.sh | 4 |
18 files changed, 418 insertions, 187 deletions
@@ -0,0 +1,19 @@ +Copyright 2018 Vincent Sanders <vince@netsurf-browser.org> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..470505f --- /dev/null +++ b/Makefile @@ -0,0 +1,48 @@ +#!/bin/make +# +# Makefile for libnspdf +# +# Copyright 2018 Vincent Sanders <vince@netsurf-browser.org> + +# Component settings +COMPONENT := nspdf +COMPONENT_VERSION := 0.0.1 +# Default to a static library +COMPONENT_TYPE ?= lib-static + +# Setup the tooling +PREFIX ?= /opt/netsurf +NSSHARED ?= $(PREFIX)/share/netsurf-buildsystem +include $(NSSHARED)/makefiles/Makefile.tools + +# Reevaluate when used, as BUILDDIR won't be defined yet +TESTRUNNER = test/runtest.sh $(BUILDDIR) $(EXEEXT) + +# Toolchain flags +WARNFLAGS := -Wall -W -Wundef -Wpointer-arith -Wcast-align \ + -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes \ + -Wmissing-declarations -Wnested-externs + +CFLAGS := -D_GNU_SOURCE -D_DEFAULT_SOURCE \ + -I$(CURDIR)/include/ -I$(CURDIR)/src \ + $(WARNFLAGS) $(CFLAGS) +ifneq ($(GCCVER),2) + CFLAGS := $(CFLAGS) -std=c99 +else + # __inline__ is a GCCism + CFLAGS := $(CFLAGS) -Dinline="__inline__" +endif +CFLAGS := $(CFLAGS) -D_POSIX_C_SOURCE=200809L + +REQUIRED_LIBS := nspdf + +TESTCFLAGS := -g -O2 +TESTLDFLAGS := -l$(COMPONENT) $(TESTLDFLAGS) + +include $(NSBUILD)/Makefile.top + +# Extra installation rules +I := /$(INCLUDEDIR) +INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf.h +INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR)/pkgconfig:lib$(COMPONENT).pc.in +INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR):$(OUTPUT) @@ -0,0 +1,4 @@ +NetSurf Portable Document Format handling library +================================================= + +library to handle manipulating PDF files diff --git a/include/nspdf/document.h b/include/nspdf/document.h new file mode 100644 index 0000000..4e4931d --- /dev/null +++ b/include/nspdf/document.h @@ -0,0 +1,45 @@ +/* + * Copyright 2018 Vincent Sanders <vince@netsurf-browser.org> + * + * This file is part of libnspdf. + * + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + */ + +/** + * \file + * NetSurf PDF library document handling + */ + +#ifndef NSPDF_DOCUMENT_H_ +#define NSPDF_DOCUMENT_H_ + +#include <nspdf/errors.h> + +struct nspdf_doc; + +/** + * create a new PDF document + */ +nspdferror nspdf_document_create(struct nspdf_doc **doc_out); + +/** + * destroys a previously created document + * + * any allocated resources are freed but any buffers passed for parse are not + * altered and may now be freed by the caller. + */ +nspdferror nspdf_document_destroy(struct nspdf_doc *doc); + +/** + * parse a PDF from a memory buffer + * + * reads all metadata and validates header, trailer, xref table and page tree + * ready to render pages. The passed buffer ownership is transfered and must + * not be altered untill the document is destroyed. + */ +nspdferror nspdf_document_parse(struct nspdf_doc *doc, const uint8_t *buffer, uint64_t buffer_length); + + +#endif /* NSPDF_DOCUMENT_H_ */ diff --git a/include/nspdf/errors.h b/include/nspdf/errors.h new file mode 100644 index 0000000..f2142ff --- /dev/null +++ b/include/nspdf/errors.h @@ -0,0 +1,29 @@ +/* + * Copyright 2018 Vincent Sanders <vince@netsurf-browser.org> + * + * This file is part of libnspdf. + * + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + */ + +/** + * \file + * NetSurf PDF library return codes + */ + +#ifndef NSPDF_ERRORS_H_ +#define NSPDF_ERRORS_H_ + +typedef enum { + NSPDFERROR_OK, /**< no error */ + NSPDFERROR_NOMEM, /**< memory allocation error */ + NSPDFERROR_SYNTAX, /**< syntax error in parse */ + NSPDFERROR_SIZE, /**< not enough input data */ + NSPDFERROR_RANGE, /**< value outside type range */ + NSPDFERROR_TYPE, /**< wrong type error */ + NSPDFERROR_NOTFOUND, /**< key not found */ + NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */ +} nspdferror; + +#endif diff --git a/libnspdf.pc.in b/libnspdf.pc.in new file mode 100644 index 0000000..0898deb --- /dev/null +++ b/libnspdf.pc.in @@ -0,0 +1,10 @@ +prefix=PREFIX +exec_prefix=${prefix} +libdir=${exec_prefix}/LIBDIR +includedir=${prefix}/INCLUDEDIR + +Name: libnspdf +Description: NetSurf PDF library +Version: VERSION +Libs: -L${libdir} LIBRARIES +Cflags: -I${includedir} diff --git a/src/Makefile b/src/Makefile index af806f3..ed0b4ba 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,14 +1,3 @@ -# +DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c -CFLAGS+=-g -Wall -Wextra - -OBJS=xref.o byte_class.o cos_decode.o cos_object.o pdf_doc.o - -.PHONY:all clean - -all:xref - -xref:$(OBJS) - -clean: - ${RM} xref $(OBJS) +include $(NSBUILD)/Makefile.subdir diff --git a/src/cos_object.c b/src/cos_object.c index 5bfd423..2fa3a93 100644 --- a/src/cos_object.c +++ b/src/cos_object.c @@ -14,7 +14,8 @@ #include <stdio.h> #include <string.h> -#include "nspdferror.h" +#include <nspdf/errors.h> + #include "cos_object.h" #include "pdf_doc.h" @@ -110,7 +111,7 @@ cos_extract_dictionary_value(struct cos_object *dict, * get a value for a key from a dictionary */ nspdferror -cos_get_dictionary_value(struct pdf_doc *doc, +cos_get_dictionary_value(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out) @@ -140,7 +141,7 @@ cos_get_dictionary_value(struct pdf_doc *doc, } nspdferror -cos_get_dictionary_int(struct pdf_doc *doc, +cos_get_dictionary_int(struct nspdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out) @@ -156,7 +157,7 @@ cos_get_dictionary_int(struct pdf_doc *doc, } nspdferror -cos_get_dictionary_name(struct pdf_doc *doc, +cos_get_dictionary_name(struct nspdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out) @@ -172,7 +173,7 @@ cos_get_dictionary_name(struct pdf_doc *doc, } nspdferror -cos_get_dictionary_dictionary(struct pdf_doc *doc, +cos_get_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out) @@ -188,7 +189,7 @@ cos_get_dictionary_dictionary(struct pdf_doc *doc, } nspdferror -cos_heritable_dictionary_dictionary(struct pdf_doc *doc, +cos_heritable_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out) @@ -206,7 +207,7 @@ cos_heritable_dictionary_dictionary(struct pdf_doc *doc, } nspdferror -cos_get_dictionary_array(struct pdf_doc *doc, +cos_get_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out) @@ -222,7 +223,7 @@ cos_get_dictionary_array(struct pdf_doc *doc, } nspdferror -cos_heritable_dictionary_array(struct pdf_doc *doc, +cos_heritable_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out) @@ -241,7 +242,7 @@ cos_heritable_dictionary_array(struct pdf_doc *doc, } nspdferror -cos_get_int(struct pdf_doc *doc, +cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out) { @@ -259,7 +260,7 @@ cos_get_int(struct pdf_doc *doc, } nspdferror -cos_get_name(struct pdf_doc *doc, +cos_get_name(struct nspdf_doc *doc, struct cos_object *cobj, const char **value_out) { @@ -279,7 +280,7 @@ cos_get_name(struct pdf_doc *doc, nspdferror -cos_get_dictionary(struct pdf_doc *doc, +cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out) { @@ -297,7 +298,7 @@ cos_get_dictionary(struct pdf_doc *doc, } nspdferror -cos_get_array(struct pdf_doc *doc, +cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out) { @@ -318,7 +319,7 @@ cos_get_array(struct pdf_doc *doc, * get a value for a key from a dictionary */ nspdferror -cos_get_array_value(struct pdf_doc *doc, +cos_get_array_value(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out) @@ -350,7 +351,7 @@ cos_get_array_value(struct pdf_doc *doc, } nspdferror -cos_get_array_dictionary(struct pdf_doc *doc, +cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out) @@ -366,7 +367,7 @@ cos_get_array_dictionary(struct pdf_doc *doc, } nspdferror -cos_get_array_size(struct pdf_doc *doc, +cos_get_array_size(struct nspdf_doc *doc, struct cos_object *cobj, unsigned int *size_out) { diff --git a/src/cos_object.h b/src/cos_object.h index 48241c6..a40c691 100644 --- a/src/cos_object.h +++ b/src/cos_object.h @@ -1,4 +1,4 @@ -struct pdf_doc; +struct nspdf_doc; enum cos_type { COS_TYPE_NULL, @@ -83,13 +83,6 @@ struct cos_object { } u; }; -/** - * Decode input stream into an object - * - * lex and parse a byte stream to generate a COS object. - */ -nspdferror cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out); - nspdferror cos_free_object(struct cos_object *cos_obj); /** @@ -117,35 +110,35 @@ nspdferror cos_extract_dictionary_value(struct cos_object *dict, const char *key * NSPDFERROR_TYPE if the object passed in \p dict is not a dictionary. * NSPDFERROR_NOTFOUND if the key is not present in the dictionary. */ -nspdferror cos_get_dictionary_value(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); +nspdferror cos_get_dictionary_value(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_get_dictionary_int(struct pdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out); +nspdferror cos_get_dictionary_int(struct nspdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out); -nspdferror cos_get_dictionary_name(struct pdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out); +nspdferror cos_get_dictionary_name(struct nspdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out); -nspdferror cos_get_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); +nspdferror cos_get_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_heritable_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); +nspdferror cos_heritable_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_get_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); +nspdferror cos_get_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_heritable_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); +nspdferror cos_heritable_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_get_int(struct pdf_doc *doc, struct cos_object *cobj, int64_t *value_out); +nspdferror cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out); -nspdferror cos_get_name(struct pdf_doc *doc, struct cos_object *cobj, const char **value_out); +nspdferror cos_get_name(struct nspdf_doc *doc, struct cos_object *cobj, const char **value_out); -nspdferror cos_get_dictionary(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); +nspdferror cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); -nspdferror cos_get_array(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); +nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); -nspdferror cos_get_array_size(struct pdf_doc *doc, struct cos_object *cobj, unsigned int *size_out); +nspdferror cos_get_array_size(struct nspdf_doc *doc, struct cos_object *cobj, unsigned int *size_out); -nspdferror cos_get_array_value(struct pdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out); +nspdferror cos_get_array_value(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out); -nspdferror cos_get_array_dictionary(struct pdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out); +nspdferror cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out); diff --git a/src/cos_decode.c b/src/cos_parse.c index 8873060..ca3d802 100644 --- a/src/cos_decode.c +++ b/src/cos_parse.c @@ -5,8 +5,10 @@ #include <stdio.h> #include <string.h> +#include <nspdf/errors.h> + +#include "cos_parse.h" #include "byte_class.h" -#include "nspdferror.h" #include "cos_object.h" #include "pdf_doc.h" @@ -46,7 +48,7 @@ static uint8_t xtoi(uint8_t x) } static nspdferror -cos_decode_number(struct pdf_doc *doc, +cos_decode_number(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -101,7 +103,7 @@ cos_decode_number(struct pdf_doc *doc, * */ static nspdferror -cos_decode_string(struct pdf_doc *doc, +cos_decode_string(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -230,7 +232,7 @@ cos_decode_string(struct pdf_doc *doc, * decode hex encoded string */ static nspdferror -cos_decode_hex_string(struct pdf_doc *doc, +cos_decode_hex_string(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -293,7 +295,7 @@ cos_decode_hex_string(struct pdf_doc *doc, * decode a dictionary object */ static nspdferror -cos_decode_dictionary(struct pdf_doc *doc, +cos_decode_dictionary(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -324,7 +326,7 @@ cos_decode_dictionary(struct pdf_doc *doc, while ((DOC_BYTE(doc, offset) != '>') && (DOC_BYTE(doc, offset + 1) != '>')) { - res = cos_decode_object(doc, &offset, &key); + res = cos_parse_object(doc, &offset, &key); if (res != NSPDFERROR_OK) { /* todo free up any dictionary entries already created */ printf("key object decode failed\n"); @@ -337,7 +339,7 @@ cos_decode_dictionary(struct pdf_doc *doc, } //printf("key: %s\n", key->u.n); - res = cos_decode_object(doc, &offset, &value); + res = cos_parse_object(doc, &offset, &value); if (res != NSPDFERROR_OK) { printf("Unable to decode value object in dictionary\n"); /* todo free up any dictionary entries already created */ @@ -371,7 +373,7 @@ cos_decode_dictionary(struct pdf_doc *doc, * decode a list */ static nspdferror -cos_decode_list(struct pdf_doc *doc, +cos_decode_list(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -406,7 +408,7 @@ cos_decode_list(struct pdf_doc *doc, while (DOC_BYTE(doc, offset) != ']') { - res = cos_decode_object(doc, &offset, &value); + res = cos_parse_object(doc, &offset, &value); if (res != NSPDFERROR_OK) { cos_free_object(cosobj); printf("Unable to decode value object in list\n"); @@ -442,7 +444,7 @@ cos_decode_list(struct pdf_doc *doc, * \todo deal with # symbols on pdf versions 1.2 and later */ static nspdferror -cos_decode_name(struct pdf_doc *doc, +cos_decode_name(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -498,7 +500,7 @@ cos_decode_name(struct pdf_doc *doc, * decode a cos boolean object */ static int -cos_decode_boolean(struct pdf_doc *doc, +cos_decode_boolean(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -574,7 +576,7 @@ cos_decode_boolean(struct pdf_doc *doc, * decode the null object. */ static nspdferror -cos_decode_null(struct pdf_doc *doc, +cos_decode_null(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -640,7 +642,7 @@ cos_decode_null(struct pdf_doc *doc, * integer */ static nspdferror -cos_attempt_decode_reference(struct pdf_doc *doc, +cos_attempt_decode_reference(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { @@ -712,7 +714,7 @@ cos_attempt_decode_reference(struct pdf_doc *doc, } //printf("decoding\n"); - res = cos_decode_object(doc, &offset, &indirect); + res = cos_parse_object(doc, &offset, &indirect); if (res != NSPDFERROR_OK) { cos_free_object(generation); return res; @@ -752,7 +754,7 @@ cos_attempt_decode_reference(struct pdf_doc *doc, /* - * Decode input stream into an object + * Parse input stream into an object * * lex and parse a byte stream to generate COS objects * @@ -804,7 +806,7 @@ cos_attempt_decode_reference(struct pdf_doc *doc, * ; */ nspdferror -cos_decode_object(struct pdf_doc *doc, +cos_parse_object(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out) { diff --git a/src/cos_parse.h b/src/cos_parse.h new file mode 100644 index 0000000..adfb835 --- /dev/null +++ b/src/cos_parse.h @@ -0,0 +1,10 @@ +struct nspdf_doc; +struct cos_object; + +/** + * Decode input stream into an object + * + * lex and parse a byte stream to generate a COS object. + */ +nspdferror cos_parse_object(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out); + diff --git a/src/xref.c b/src/document.c index 452aa19..9be0ab5 100644 --- a/src/xref.c +++ b/src/document.c @@ -1,3 +1,12 @@ +/* + * Copyright 2018 Vincent Sanders <vince@netsurf-browser.org> + * + * This file is part of libnspdf. + * + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + */ + #include <stdio.h> #include <stdint.h> #include <inttypes.h> @@ -5,59 +14,27 @@ #include <stdbool.h> #include <string.h> -#include "nspdferror.h" +#include <nspdf/document.h> + +#include "cos_parse.h" #include "byte_class.h" #include "cos_object.h" #include "pdf_doc.h" #define SLEN(x) (sizeof((x)) - 1) - -int -read_whole_pdf(struct pdf_doc *doc, const char *fname) -{ - FILE *f; - off_t len; - uint8_t *buf; - size_t rd; - - f = fopen(fname, "r"); - if (f == NULL) { - perror("pdf open"); - return 1; - } - - fseek(f, 0, SEEK_END); - len = ftello(f); - - buf = malloc(len); - fseek(f, 0, SEEK_SET); - - rd = fread(buf, len, 1, f); - if (rd != 1) { - perror("pdf read"); - free(buf); - return 1; - } - - fclose(f); - - doc->start = doc->buffer = buf; - doc->length = doc->buffer_length = len; - - return 0; -} - - #define STARTXREF_TOK "startxref" -/* Number of bytes to search back from file end to find xref start token, convention says 1024 bytes */ -#define STARTXREF_SEARCH_SIZE 1024 - +/* Number of bytes to search back from file end to find xref start token, + * convention says 1024 bytes + */ +#define STARTXREF_SEARCH_SIZE 1024 static nspdferror -doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out) +doc_read_uint(struct nspdf_doc *doc, + uint64_t *offset_out, + uint64_t *result_out) { uint8_t c; /* current byte from source data */ unsigned int len; /* number of decimal places in number */ @@ -90,10 +67,11 @@ doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out) return -1; /* number too long */ } + /** * finds the startxref marker at the end of input */ -nspdferror find_startxref(struct pdf_doc *doc, uint64_t *offset_out) +static nspdferror find_startxref(struct nspdf_doc *doc, uint64_t *offset_out) { uint64_t offset; /* offset of characters being considered for startxref */ uint64_t earliest; /* earliest offset to serch for startxref */ @@ -123,10 +101,14 @@ nspdferror find_startxref(struct pdf_doc *doc, uint64_t *offset_out) return NSPDFERROR_SYNTAX; } + /** * decodes a startxref field */ -nspdferror decode_startxref(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *start_xref_out) +static nspdferror +decode_startxref(struct nspdf_doc *doc, + uint64_t *offset_out, + uint64_t *start_xref_out) { uint64_t offset; /* offset of characters being considered for startxref */ uint64_t start_xref; @@ -181,7 +163,7 @@ nspdferror decode_startxref(struct pdf_doc *doc, uint64_t *offset_out, uint64_t /** * finds the next trailer */ -nspdferror find_trailer(struct pdf_doc *doc, uint64_t *offset_out) +static nspdferror find_trailer(struct nspdf_doc *doc, uint64_t *offset_out) { uint64_t offset; /* offset of characters being considered for trailer */ @@ -200,33 +182,9 @@ nspdferror find_trailer(struct pdf_doc *doc, uint64_t *offset_out) return NSPDFERROR_SYNTAX; } -/** - * find the PDF comment marker to identify the start of the document - */ -int check_header(struct pdf_doc *doc) -{ - uint64_t offset; /* offset of characters being considered for startxref */ - - for (offset = 0; offset < 1024; offset++) { - if ((DOC_BYTE(doc, offset) == '%') && - (DOC_BYTE(doc, offset + 1) == 'P') && - (DOC_BYTE(doc, offset + 2) == 'D') && - (DOC_BYTE(doc, offset + 3) == 'F') && - (DOC_BYTE(doc, offset + 4) == '-') && - (DOC_BYTE(doc, offset + 5) == '1') && - (DOC_BYTE(doc, offset + 6) == '.')) { - doc->start = doc->buffer + offset; - doc->length -= offset; - /* read number for minor */ - return 0; - } - } - return -1; -} - -nspdferror -decode_trailer(struct pdf_doc *doc, +static nspdferror +decode_trailer(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **trailer_out) { @@ -249,7 +207,7 @@ decode_trailer(struct pdf_doc *doc, offset += 7; doc_skip_ws(doc, &offset); - res = cos_decode_object(doc, &offset, &trailer); + res = cos_parse_object(doc, &offset, &trailer); if (res != 0) { return res; } @@ -265,8 +223,9 @@ decode_trailer(struct pdf_doc *doc, return NSPDFERROR_OK; } -nspdferror -decode_xref(struct pdf_doc *doc, uint64_t *offset_out) + +static nspdferror +decode_xref(struct nspdf_doc *doc, uint64_t *offset_out) { uint64_t offset; nspdferror res; @@ -359,7 +318,8 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out) /** * recursively parse trailers and xref tables */ -nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset) +static nspdferror +decode_xref_trailer(struct nspdf_doc *doc, uint64_t xref_offset) { nspdferror res; uint64_t offset; /* the current data offset */ @@ -455,6 +415,7 @@ decode_xref_trailer_failed: return res; } + /** * decode non-linear pdf trailer data * @@ -477,7 +438,7 @@ decode_xref_trailer_failed: * find the subsequent trailer. * */ -nspdferror decode_trailers(struct pdf_doc *doc) +static nspdferror decode_trailers(struct nspdf_doc *doc) { nspdferror res; uint64_t offset; /* the current data offset */ @@ -499,11 +460,12 @@ nspdferror decode_trailers(struct pdf_doc *doc) return decode_xref_trailer(doc, startxref); } + /** * recursively decodes a page tree */ -nspdferror -decode_page_tree(struct pdf_doc *doc, +static nspdferror +decode_page_tree(struct nspdf_doc *doc, struct cos_object *page_tree_node, unsigned int *page_index) { @@ -594,12 +556,14 @@ decode_page_tree(struct pdf_doc *doc, return res; } + /* printf("page index:%d page:%p resources:%p mediabox:%p contents:%p\n", *page_index, page, page->resources, page->mediabox, page->contents); + */ (*page_index)++; res = NSPDFERROR_OK; @@ -609,7 +573,8 @@ decode_page_tree(struct pdf_doc *doc, return res; } -nspdferror decode_catalog(struct pdf_doc *doc) + +static nspdferror decode_catalog(struct nspdf_doc *doc) { nspdferror res; struct cos_object *catalog; @@ -645,38 +610,63 @@ nspdferror decode_catalog(struct pdf_doc *doc) return res; } -nspdferror new_pdf_doc(struct pdf_doc **doc_out) +/* exported interface documented in nspdf/document.h */ +nspdferror nspdf_document_create(struct nspdf_doc **doc_out) { - struct pdf_doc *doc; - doc = calloc(1, sizeof(struct pdf_doc)); + struct nspdf_doc *doc; + doc = calloc(1, sizeof(struct nspdf_doc)); if (doc == NULL) { return NSPDFERROR_NOMEM; } + *doc_out = doc; + return NSPDFERROR_OK; } -int main(int argc, char **argv) +/* exported interface documented in nspdf/document.h */ +nspdferror nspdf_document_destroy(struct nspdf_doc *doc) { - struct pdf_doc *doc; - int res; + free(doc); - if (argc < 2) { - fprintf(stderr, "Usage %s <filename>\n", argv[0]); - return 1; - } + return NSPDFERROR_OK; +} - res = new_pdf_doc(&doc); - if (res != NSPDFERROR_OK) { - printf("failed to read file\n"); - return res; - } - res = read_whole_pdf(doc, argv[1]); - if (res != 0) { - printf("failed to read file\n"); - return res; +/** + * find the PDF comment marker to identify the start of the document + */ +static nspdferror check_header(struct nspdf_doc *doc) +{ + uint64_t offset; /* offset of characters being considered for header */ + for (offset = 0; offset < 1024; offset++) { + if ((DOC_BYTE(doc, offset) == '%') && + (DOC_BYTE(doc, offset + 1) == 'P') && + (DOC_BYTE(doc, offset + 2) == 'D') && + (DOC_BYTE(doc, offset + 3) == 'F') && + (DOC_BYTE(doc, offset + 4) == '-') && + (DOC_BYTE(doc, offset + 5) == '1') && + (DOC_BYTE(doc, offset + 6) == '.')) { + doc->start += offset; + doc->length -= offset; + + /* \todo read number for minor */ + return NSPDFERROR_OK; + } } + return NSPDFERROR_NOTFOUND; +} + +/* exported interface documented in nspdf/document.h */ +nspdferror +nspdf_document_parse(struct nspdf_doc *doc, + const uint8_t *buffer, + uint64_t buffer_length) +{ + nspdferror res; + + doc->start = buffer; + doc->length = buffer_length; res = check_header(doc); if (res != 0) { @@ -696,5 +686,5 @@ int main(int argc, char **argv) return res; } - return 0; + return res; } diff --git a/src/nspdferror.h b/src/nspdferror.h deleted file mode 100644 index 3e26813..0000000 --- a/src/nspdferror.h +++ /dev/null @@ -1,10 +0,0 @@ -typedef enum { - NSPDFERROR_OK, - NSPDFERROR_NOMEM, - NSPDFERROR_SYNTAX, /**< syntax error in parse */ - NSPDFERROR_SIZE, /**< not enough input data */ - NSPDFERROR_RANGE, /**< value outside type range */ - NSPDFERROR_TYPE, /**< wrong type error */ - NSPDFERROR_NOTFOUND, /**< key not found */ - NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */ -} nspdferror; diff --git a/src/pdf_doc.c b/src/pdf_doc.c index 4a5cad1..281025c 100644 --- a/src/pdf_doc.c +++ b/src/pdf_doc.c @@ -12,7 +12,9 @@ #include <stdbool.h> #include <stdio.h> -#include "nspdferror.h" +#include <nspdf/errors.h> + +#include "cos_parse.h" #include "byte_class.h" #include "cos_object.h" #include "pdf_doc.h" @@ -20,7 +22,7 @@ /** * move offset to next non whitespace byte */ -nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset) +nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset) { uint8_t c; /* TODO sort out keeping offset in range */ @@ -43,7 +45,7 @@ nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset) /** * move offset to next non eol byte */ -nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset) +nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset) { uint8_t c; /* TODO sort out keeping offset in range */ @@ -60,7 +62,7 @@ static struct cos_object cos_null_obj = { }; nspdferror -xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out) +xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out) { nspdferror res; struct cos_object *cobj; @@ -90,7 +92,7 @@ xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out) if (entry->object == NULL) { /* indirect object has never been decoded */ offset = entry->offset; - res = cos_decode_object(doc, &offset, &indirect); + res = cos_parse_object(doc, &offset, &indirect); if (res != NSPDFERROR_OK) { printf("failed to decode indirect object\n"); return res; diff --git a/src/pdf_doc.h b/src/pdf_doc.h index 986556f..e9bdc14 100644 --- a/src/pdf_doc.h +++ b/src/pdf_doc.h @@ -18,11 +18,9 @@ struct page_table_entry { }; /** pdf document */ -struct pdf_doc { - uint8_t *buffer; - uint64_t buffer_length; +struct nspdf_doc { - uint8_t *start; /* start of pdf document in input stream */ + const uint8_t *start; /* start of pdf document in input stream */ uint64_t length; int major; @@ -47,7 +45,7 @@ struct pdf_doc { /* byte data acessory, allows for more complex buffer handling in future */ #define DOC_BYTE(doc, offset) (doc->start[(offset)]) -nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset); -nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset); +nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset); +nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset); -nspdferror xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out); +nspdferror xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out); diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..e3a2929 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,3 @@ +DIR_TEST_ITEMS := parsepdf:parsepdf.c + +include $(NSBUILD)/Makefile.subdir diff --git a/test/parsepdf.c b/test/parsepdf.c new file mode 100644 index 0000000..3482af5 --- /dev/null +++ b/test/parsepdf.c @@ -0,0 +1,94 @@ +/* + * Copyright 2018 Vincent Sanders <vince@netsurf-browser.org> + * + * This file is part of libnspdf. + * + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + */ + +#include <stdio.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> + +#include <nspdf/document.h> + +static nspdferror +read_whole_pdf(const char *fname, uint8_t **buffer, uint64_t *buffer_length) +{ + FILE *f; + off_t len; + uint8_t *buf; + size_t rd; + + f = fopen(fname, "r"); + if (f == NULL) { + perror("pdf open"); + return NSPDFERROR_NOTFOUND; + } + + fseek(f, 0, SEEK_END); + len = ftello(f); + + buf = malloc(len); + fseek(f, 0, SEEK_SET); + + rd = fread(buf, len, 1, f); + if (rd != 1) { + perror("pdf read"); + free(buf); + return 1; + } + + fclose(f); + + *buffer = buf; + *buffer_length = len; + + return NSPDFERROR_OK; +} + + +int main(int argc, char **argv) +{ + uint8_t *buffer; + uint64_t buffer_length; + struct nspdf_doc *doc; + nspdferror res; + + if (argc < 2) { + fprintf(stderr, "Usage %s <filename>\n", argv[0]); + return 1; + } + + res = read_whole_pdf(argv[1], &buffer, &buffer_length); + if (res != 0) { + printf("failed to read file\n"); + return res; + } + + res = nspdf_document_create(&doc); + if (res != NSPDFERROR_OK) { + printf("failed to create a document\n"); + return res; + } + + res = nspdf_document_parse(doc, buffer, buffer_length); + if (res != NSPDFERROR_OK) { + printf("document parse failed (%d)\n", res); + return res; + } + + res = nspdf_document_destroy(doc); + if (res != NSPDFERROR_OK) { + printf("failed to destroy document (%d)\n", res); + return res; + } + + free(buffer); + + return 0; +} diff --git a/test/runtest.sh b/test/runtest.sh new file mode 100755 index 0000000..1aa83c7 --- /dev/null +++ b/test/runtest.sh @@ -0,0 +1,4 @@ +#!/bin/sh +TEST_PATH=$1 + +${TEST_PATH}/test_parsepdf ~/Downloads/HiKey_User_Guide_Rev0.2.pdf |