summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2017-12-30 00:38:07 +0000
committerVincent Sanders <vince@kyllikki.org>2017-12-30 00:38:07 +0000
commit31b1f792826f51e9271475d124c3a1df4aa5116b (patch)
tree3e0c1083064ba66398dee4fa0aa1c464be3b5325
parent5422dd50a49fe1a282271f22cd324f815e592e07 (diff)
downloadlibnspdf-31b1f792826f51e9271475d124c3a1df4aa5116b.tar.gz
libnspdf-31b1f792826f51e9271475d124c3a1df4aa5116b.tar.bz2
make an actual library
-rw-r--r--COPYING19
-rw-r--r--Makefile48
-rw-r--r--README4
-rw-r--r--include/nspdf/document.h45
-rw-r--r--include/nspdf/errors.h29
-rw-r--r--libnspdf.pc.in10
-rw-r--r--src/Makefile15
-rw-r--r--src/cos_object.c31
-rw-r--r--src/cos_object.h37
-rw-r--r--src/cos_parse.c (renamed from src/cos_decode.c)34
-rw-r--r--src/cos_parse.h10
-rw-r--r--src/document.c (renamed from src/xref.c)188
-rw-r--r--src/nspdferror.h10
-rw-r--r--src/pdf_doc.c12
-rw-r--r--src/pdf_doc.h12
-rw-r--r--test/Makefile3
-rw-r--r--test/parsepdf.c94
-rwxr-xr-xtest/runtest.sh4
18 files changed, 418 insertions, 187 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..fe0a4e8
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,19 @@
+Copyright 2018 Vincent Sanders <vince@netsurf-browser.org>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..470505f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,48 @@
+#!/bin/make
+#
+# Makefile for libnspdf
+#
+# Copyright 2018 Vincent Sanders <vince@netsurf-browser.org>
+
+# Component settings
+COMPONENT := nspdf
+COMPONENT_VERSION := 0.0.1
+# Default to a static library
+COMPONENT_TYPE ?= lib-static
+
+# Setup the tooling
+PREFIX ?= /opt/netsurf
+NSSHARED ?= $(PREFIX)/share/netsurf-buildsystem
+include $(NSSHARED)/makefiles/Makefile.tools
+
+# Reevaluate when used, as BUILDDIR won't be defined yet
+TESTRUNNER = test/runtest.sh $(BUILDDIR) $(EXEEXT)
+
+# Toolchain flags
+WARNFLAGS := -Wall -W -Wundef -Wpointer-arith -Wcast-align \
+ -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes \
+ -Wmissing-declarations -Wnested-externs
+
+CFLAGS := -D_GNU_SOURCE -D_DEFAULT_SOURCE \
+ -I$(CURDIR)/include/ -I$(CURDIR)/src \
+ $(WARNFLAGS) $(CFLAGS)
+ifneq ($(GCCVER),2)
+ CFLAGS := $(CFLAGS) -std=c99
+else
+ # __inline__ is a GCCism
+ CFLAGS := $(CFLAGS) -Dinline="__inline__"
+endif
+CFLAGS := $(CFLAGS) -D_POSIX_C_SOURCE=200809L
+
+REQUIRED_LIBS := nspdf
+
+TESTCFLAGS := -g -O2
+TESTLDFLAGS := -l$(COMPONENT) $(TESTLDFLAGS)
+
+include $(NSBUILD)/Makefile.top
+
+# Extra installation rules
+I := /$(INCLUDEDIR)
+INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf.h
+INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR)/pkgconfig:lib$(COMPONENT).pc.in
+INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR):$(OUTPUT)
diff --git a/README b/README
new file mode 100644
index 0000000..96602d4
--- /dev/null
+++ b/README
@@ -0,0 +1,4 @@
+NetSurf Portable Document Format handling library
+=================================================
+
+library to handle manipulating PDF files
diff --git a/include/nspdf/document.h b/include/nspdf/document.h
new file mode 100644
index 0000000..4e4931d
--- /dev/null
+++ b/include/nspdf/document.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince@netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library document handling
+ */
+
+#ifndef NSPDF_DOCUMENT_H_
+#define NSPDF_DOCUMENT_H_
+
+#include <nspdf/errors.h>
+
+struct nspdf_doc;
+
+/**
+ * create a new PDF document
+ */
+nspdferror nspdf_document_create(struct nspdf_doc **doc_out);
+
+/**
+ * destroys a previously created document
+ *
+ * any allocated resources are freed but any buffers passed for parse are not
+ * altered and may now be freed by the caller.
+ */
+nspdferror nspdf_document_destroy(struct nspdf_doc *doc);
+
+/**
+ * parse a PDF from a memory buffer
+ *
+ * reads all metadata and validates header, trailer, xref table and page tree
+ * ready to render pages. The passed buffer ownership is transfered and must
+ * not be altered untill the document is destroyed.
+ */
+nspdferror nspdf_document_parse(struct nspdf_doc *doc, const uint8_t *buffer, uint64_t buffer_length);
+
+
+#endif /* NSPDF_DOCUMENT_H_ */
diff --git a/include/nspdf/errors.h b/include/nspdf/errors.h
new file mode 100644
index 0000000..f2142ff
--- /dev/null
+++ b/include/nspdf/errors.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince@netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+/**
+ * \file
+ * NetSurf PDF library return codes
+ */
+
+#ifndef NSPDF_ERRORS_H_
+#define NSPDF_ERRORS_H_
+
+typedef enum {
+ NSPDFERROR_OK, /**< no error */
+ NSPDFERROR_NOMEM, /**< memory allocation error */
+ NSPDFERROR_SYNTAX, /**< syntax error in parse */
+ NSPDFERROR_SIZE, /**< not enough input data */
+ NSPDFERROR_RANGE, /**< value outside type range */
+ NSPDFERROR_TYPE, /**< wrong type error */
+ NSPDFERROR_NOTFOUND, /**< key not found */
+ NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */
+} nspdferror;
+
+#endif
diff --git a/libnspdf.pc.in b/libnspdf.pc.in
new file mode 100644
index 0000000..0898deb
--- /dev/null
+++ b/libnspdf.pc.in
@@ -0,0 +1,10 @@
+prefix=PREFIX
+exec_prefix=${prefix}
+libdir=${exec_prefix}/LIBDIR
+includedir=${prefix}/INCLUDEDIR
+
+Name: libnspdf
+Description: NetSurf PDF library
+Version: VERSION
+Libs: -L${libdir} LIBRARIES
+Cflags: -I${includedir}
diff --git a/src/Makefile b/src/Makefile
index af806f3..ed0b4ba 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,14 +1,3 @@
-#
+DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c
-CFLAGS+=-g -Wall -Wextra
-
-OBJS=xref.o byte_class.o cos_decode.o cos_object.o pdf_doc.o
-
-.PHONY:all clean
-
-all:xref
-
-xref:$(OBJS)
-
-clean:
- ${RM} xref $(OBJS)
+include $(NSBUILD)/Makefile.subdir
diff --git a/src/cos_object.c b/src/cos_object.c
index 5bfd423..2fa3a93 100644
--- a/src/cos_object.c
+++ b/src/cos_object.c
@@ -14,7 +14,8 @@
#include <stdio.h>
#include <string.h>
-#include "nspdferror.h"
+#include <nspdf/errors.h>
+
#include "cos_object.h"
#include "pdf_doc.h"
@@ -110,7 +111,7 @@ cos_extract_dictionary_value(struct cos_object *dict,
* get a value for a key from a dictionary
*/
nspdferror
-cos_get_dictionary_value(struct pdf_doc *doc,
+cos_get_dictionary_value(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
struct cos_object **value_out)
@@ -140,7 +141,7 @@ cos_get_dictionary_value(struct pdf_doc *doc,
}
nspdferror
-cos_get_dictionary_int(struct pdf_doc *doc,
+cos_get_dictionary_int(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
int64_t *value_out)
@@ -156,7 +157,7 @@ cos_get_dictionary_int(struct pdf_doc *doc,
}
nspdferror
-cos_get_dictionary_name(struct pdf_doc *doc,
+cos_get_dictionary_name(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
const char **value_out)
@@ -172,7 +173,7 @@ cos_get_dictionary_name(struct pdf_doc *doc,
}
nspdferror
-cos_get_dictionary_dictionary(struct pdf_doc *doc,
+cos_get_dictionary_dictionary(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
struct cos_object **value_out)
@@ -188,7 +189,7 @@ cos_get_dictionary_dictionary(struct pdf_doc *doc,
}
nspdferror
-cos_heritable_dictionary_dictionary(struct pdf_doc *doc,
+cos_heritable_dictionary_dictionary(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
struct cos_object **value_out)
@@ -206,7 +207,7 @@ cos_heritable_dictionary_dictionary(struct pdf_doc *doc,
}
nspdferror
-cos_get_dictionary_array(struct pdf_doc *doc,
+cos_get_dictionary_array(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
struct cos_object **value_out)
@@ -222,7 +223,7 @@ cos_get_dictionary_array(struct pdf_doc *doc,
}
nspdferror
-cos_heritable_dictionary_array(struct pdf_doc *doc,
+cos_heritable_dictionary_array(struct nspdf_doc *doc,
struct cos_object *dict,
const char *key,
struct cos_object **value_out)
@@ -241,7 +242,7 @@ cos_heritable_dictionary_array(struct pdf_doc *doc,
}
nspdferror
-cos_get_int(struct pdf_doc *doc,
+cos_get_int(struct nspdf_doc *doc,
struct cos_object *cobj,
int64_t *value_out)
{
@@ -259,7 +260,7 @@ cos_get_int(struct pdf_doc *doc,
}
nspdferror
-cos_get_name(struct pdf_doc *doc,
+cos_get_name(struct nspdf_doc *doc,
struct cos_object *cobj,
const char **value_out)
{
@@ -279,7 +280,7 @@ cos_get_name(struct pdf_doc *doc,
nspdferror
-cos_get_dictionary(struct pdf_doc *doc,
+cos_get_dictionary(struct nspdf_doc *doc,
struct cos_object *cobj,
struct cos_object **value_out)
{
@@ -297,7 +298,7 @@ cos_get_dictionary(struct pdf_doc *doc,
}
nspdferror
-cos_get_array(struct pdf_doc *doc,
+cos_get_array(struct nspdf_doc *doc,
struct cos_object *cobj,
struct cos_object **value_out)
{
@@ -318,7 +319,7 @@ cos_get_array(struct pdf_doc *doc,
* get a value for a key from a dictionary
*/
nspdferror
-cos_get_array_value(struct pdf_doc *doc,
+cos_get_array_value(struct nspdf_doc *doc,
struct cos_object *array,
unsigned int index,
struct cos_object **value_out)
@@ -350,7 +351,7 @@ cos_get_array_value(struct pdf_doc *doc,
}
nspdferror
-cos_get_array_dictionary(struct pdf_doc *doc,
+cos_get_array_dictionary(struct nspdf_doc *doc,
struct cos_object *array,
unsigned int index,
struct cos_object **value_out)
@@ -366,7 +367,7 @@ cos_get_array_dictionary(struct pdf_doc *doc,
}
nspdferror
-cos_get_array_size(struct pdf_doc *doc,
+cos_get_array_size(struct nspdf_doc *doc,
struct cos_object *cobj,
unsigned int *size_out)
{
diff --git a/src/cos_object.h b/src/cos_object.h
index 48241c6..a40c691 100644
--- a/src/cos_object.h
+++ b/src/cos_object.h
@@ -1,4 +1,4 @@
-struct pdf_doc;
+struct nspdf_doc;
enum cos_type {
COS_TYPE_NULL,
@@ -83,13 +83,6 @@ struct cos_object {
} u;
};
-/**
- * Decode input stream into an object
- *
- * lex and parse a byte stream to generate a COS object.
- */
-nspdferror cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
-
nspdferror cos_free_object(struct cos_object *cos_obj);
/**
@@ -117,35 +110,35 @@ nspdferror cos_extract_dictionary_value(struct cos_object *dict, const char *key
* NSPDFERROR_TYPE if the object passed in \p dict is not a dictionary.
* NSPDFERROR_NOTFOUND if the key is not present in the dictionary.
*/
-nspdferror cos_get_dictionary_value(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_get_dictionary_value(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_get_dictionary_int(struct pdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out);
+nspdferror cos_get_dictionary_int(struct nspdf_doc *doc, struct cos_object *dict, const char *key, int64_t *value_out);
-nspdferror cos_get_dictionary_name(struct pdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out);
+nspdferror cos_get_dictionary_name(struct nspdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out);
-nspdferror cos_get_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_get_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_heritable_dictionary_dictionary(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_heritable_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_get_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_get_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_heritable_dictionary_array(struct pdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
+nspdferror cos_heritable_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out);
-nspdferror cos_get_int(struct pdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
+nspdferror cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out);
-nspdferror cos_get_name(struct pdf_doc *doc, struct cos_object *cobj, const char **value_out);
+nspdferror cos_get_name(struct nspdf_doc *doc, struct cos_object *cobj, const char **value_out);
-nspdferror cos_get_dictionary(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+nspdferror cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
-nspdferror cos_get_array(struct pdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
+nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out);
-nspdferror cos_get_array_size(struct pdf_doc *doc, struct cos_object *cobj, unsigned int *size_out);
+nspdferror cos_get_array_size(struct nspdf_doc *doc, struct cos_object *cobj, unsigned int *size_out);
-nspdferror cos_get_array_value(struct pdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out);
+nspdferror cos_get_array_value(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out);
-nspdferror cos_get_array_dictionary(struct pdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out);
+nspdferror cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out);
diff --git a/src/cos_decode.c b/src/cos_parse.c
index 8873060..ca3d802 100644
--- a/src/cos_decode.c
+++ b/src/cos_parse.c
@@ -5,8 +5,10 @@
#include <stdio.h>
#include <string.h>
+#include <nspdf/errors.h>
+
+#include "cos_parse.h"
#include "byte_class.h"
-#include "nspdferror.h"
#include "cos_object.h"
#include "pdf_doc.h"
@@ -46,7 +48,7 @@ static uint8_t xtoi(uint8_t x)
}
static nspdferror
-cos_decode_number(struct pdf_doc *doc,
+cos_decode_number(struct nspdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -101,7 +103,7 @@ cos_decode_number(struct pdf_doc *doc,
*
*/
static nspdferror
-cos_decode_string(struct pdf_doc *doc,
+cos_decode_string(struct nspdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -230,7 +232,7 @@ cos_decode_string(struct pdf_doc *doc,
* decode hex encoded string
*/
static nspdferror
-cos_decode_hex_string(struct pdf_doc *doc,
+cos_decode_hex_string(struct nspdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -293,7 +295,7 @@ cos_decode_hex_string(struct pdf_doc *doc,
* decode a dictionary object
*/
static nspdferror
-cos_decode_dictionary(struct pdf_doc *doc,
+cos_decode_dictionary(struct nspdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -324,7 +326,7 @@ cos_decode_dictionary(struct pdf_doc *doc,
while ((DOC_BYTE(doc, offset) != '>') &&
(DOC_BYTE(doc, offset + 1) != '>')) {
- res = cos_decode_object(doc, &offset, &key);
+ res = cos_parse_object(doc, &offset, &key);
if (res != NSPDFERROR_OK) {
/* todo free up any dictionary entries already created */
printf("key object decode failed\n");
@@ -337,7 +339,7 @@ cos_decode_dictionary(struct pdf_doc *doc,
}
//printf("key: %s\n", key->u.n);
- res = cos_decode_object(doc, &offset, &value);
+ res = cos_parse_object(doc, &offset, &value);
if (res != NSPDFERROR_OK) {
printf("Unable to decode value object in dictionary\n");
/* todo free up any dictionary entries already created */
@@ -371,7 +373,7 @@ cos_decode_dictionary(struct pdf_doc *doc,
* decode a list
*/
static nspdferror
-cos_decode_list(struct pdf_doc *doc,
+cos_decode_list(struct nspdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -406,7 +408,7 @@ cos_decode_list(struct pdf_doc *doc,
while (DOC_BYTE(doc, offset) != ']') {
- res = cos_decode_object(doc, &offset, &value);
+ res = cos_parse_object(doc, &offset, &value);
if (res != NSPDFERROR_OK) {
cos_free_object(cosobj);
printf("Unable to decode value object in list\n");
@@ -442,7 +444,7 @@ cos_decode_list(struct pdf_doc *doc,
* \todo deal with # symbols on pdf versions 1.2 and later
*/
static nspdferror
-cos_decode_name(struct pdf_doc *doc,
+cos_decode_name(struct nspdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -498,7 +500,7 @@ cos_decode_name(struct pdf_doc *doc,
* decode a cos boolean object
*/
static int
-cos_decode_boolean(struct pdf_doc *doc,
+cos_decode_boolean(struct nspdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -574,7 +576,7 @@ cos_decode_boolean(struct pdf_doc *doc,
* decode the null object.
*/
static nspdferror
-cos_decode_null(struct pdf_doc *doc,
+cos_decode_null(struct nspdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -640,7 +642,7 @@ cos_decode_null(struct pdf_doc *doc,
* integer
*/
static nspdferror
-cos_attempt_decode_reference(struct pdf_doc *doc,
+cos_attempt_decode_reference(struct nspdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
@@ -712,7 +714,7 @@ cos_attempt_decode_reference(struct pdf_doc *doc,
}
//printf("decoding\n");
- res = cos_decode_object(doc, &offset, &indirect);
+ res = cos_parse_object(doc, &offset, &indirect);
if (res != NSPDFERROR_OK) {
cos_free_object(generation);
return res;
@@ -752,7 +754,7 @@ cos_attempt_decode_reference(struct pdf_doc *doc,
/*
- * Decode input stream into an object
+ * Parse input stream into an object
*
* lex and parse a byte stream to generate COS objects
*
@@ -804,7 +806,7 @@ cos_attempt_decode_reference(struct pdf_doc *doc,
* ;
*/
nspdferror
-cos_decode_object(struct pdf_doc *doc,
+cos_parse_object(struct nspdf_doc *doc,
uint64_t *offset_out,
struct cos_object **cosobj_out)
{
diff --git a/src/cos_parse.h b/src/cos_parse.h
new file mode 100644
index 0000000..adfb835
--- /dev/null
+++ b/src/cos_parse.h
@@ -0,0 +1,10 @@
+struct nspdf_doc;
+struct cos_object;
+
+/**
+ * Decode input stream into an object
+ *
+ * lex and parse a byte stream to generate a COS object.
+ */
+nspdferror cos_parse_object(struct nspdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
+
diff --git a/src/xref.c b/src/document.c
index 452aa19..9be0ab5 100644
--- a/src/xref.c
+++ b/src/document.c
@@ -1,3 +1,12 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince@netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
@@ -5,59 +14,27 @@
#include <stdbool.h>
#include <string.h>
-#include "nspdferror.h"
+#include <nspdf/document.h>
+
+#include "cos_parse.h"
#include "byte_class.h"
#include "cos_object.h"
#include "pdf_doc.h"
#define SLEN(x) (sizeof((x)) - 1)
-
-int
-read_whole_pdf(struct pdf_doc *doc, const char *fname)
-{
- FILE *f;
- off_t len;
- uint8_t *buf;
- size_t rd;
-
- f = fopen(fname, "r");
- if (f == NULL) {
- perror("pdf open");
- return 1;
- }
-
- fseek(f, 0, SEEK_END);
- len = ftello(f);
-
- buf = malloc(len);
- fseek(f, 0, SEEK_SET);
-
- rd = fread(buf, len, 1, f);
- if (rd != 1) {
- perror("pdf read");
- free(buf);
- return 1;
- }
-
- fclose(f);
-
- doc->start = doc->buffer = buf;
- doc->length = doc->buffer_length = len;
-
- return 0;
-}
-
-
#define STARTXREF_TOK "startxref"
-/* Number of bytes to search back from file end to find xref start token, convention says 1024 bytes */
-#define STARTXREF_SEARCH_SIZE 1024
-
+/* Number of bytes to search back from file end to find xref start token,
+ * convention says 1024 bytes
+ */
+#define STARTXREF_SEARCH_SIZE 1024
static nspdferror
-doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out)
+doc_read_uint(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ uint64_t *result_out)
{
uint8_t c; /* current byte from source data */
unsigned int len; /* number of decimal places in number */
@@ -90,10 +67,11 @@ doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out)
return -1; /* number too long */
}
+
/**
* finds the startxref marker at the end of input
*/
-nspdferror find_startxref(struct pdf_doc *doc, uint64_t *offset_out)
+static nspdferror find_startxref(struct nspdf_doc *doc, uint64_t *offset_out)
{
uint64_t offset; /* offset of characters being considered for startxref */
uint64_t earliest; /* earliest offset to serch for startxref */
@@ -123,10 +101,14 @@ nspdferror find_startxref(struct pdf_doc *doc, uint64_t *offset_out)
return NSPDFERROR_SYNTAX;
}
+
/**
* decodes a startxref field
*/
-nspdferror decode_startxref(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *start_xref_out)
+static nspdferror
+decode_startxref(struct nspdf_doc *doc,
+ uint64_t *offset_out,
+ uint64_t *start_xref_out)
{
uint64_t offset; /* offset of characters being considered for startxref */
uint64_t start_xref;
@@ -181,7 +163,7 @@ nspdferror decode_startxref(struct pdf_doc *doc, uint64_t *offset_out, uint64_t
/**
* finds the next trailer
*/
-nspdferror find_trailer(struct pdf_doc *doc, uint64_t *offset_out)
+static nspdferror find_trailer(struct nspdf_doc *doc, uint64_t *offset_out)
{
uint64_t offset; /* offset of characters being considered for trailer */
@@ -200,33 +182,9 @@ nspdferror find_trailer(struct pdf_doc *doc, uint64_t *offset_out)
return NSPDFERROR_SYNTAX;
}
-/**
- * find the PDF comment marker to identify the start of the document
- */
-int check_header(struct pdf_doc *doc)
-{
- uint64_t offset; /* offset of characters being considered for startxref */
-
- for (offset = 0; offset < 1024; offset++) {
- if ((DOC_BYTE(doc, offset) == '%') &&
- (DOC_BYTE(doc, offset + 1) == 'P') &&
- (DOC_BYTE(doc, offset + 2) == 'D') &&
- (DOC_BYTE(doc, offset + 3) == 'F') &&
- (DOC_BYTE(doc, offset + 4) == '-') &&
- (DOC_BYTE(doc, offset + 5) == '1') &&
- (DOC_BYTE(doc, offset + 6) == '.')) {
- doc->start = doc->buffer + offset;
- doc->length -= offset;
- /* read number for minor */
- return 0;
- }
- }
- return -1;
-}
-
-nspdferror
-decode_trailer(struct pdf_doc *doc,
+static nspdferror
+decode_trailer(struct nspdf_doc *doc,
uint64_t *offset_out,
struct cos_object **trailer_out)
{
@@ -249,7 +207,7 @@ decode_trailer(struct pdf_doc *doc,
offset += 7;
doc_skip_ws(doc, &offset);
- res = cos_decode_object(doc, &offset, &trailer);
+ res = cos_parse_object(doc, &offset, &trailer);
if (res != 0) {
return res;
}
@@ -265,8 +223,9 @@ decode_trailer(struct pdf_doc *doc,
return NSPDFERROR_OK;
}
-nspdferror
-decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
+
+static nspdferror
+decode_xref(struct nspdf_doc *doc, uint64_t *offset_out)
{
uint64_t offset;
nspdferror res;
@@ -359,7 +318,8 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
/**
* recursively parse trailers and xref tables
*/
-nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
+static nspdferror
+decode_xref_trailer(struct nspdf_doc *doc, uint64_t xref_offset)
{
nspdferror res;
uint64_t offset; /* the current data offset */
@@ -455,6 +415,7 @@ decode_xref_trailer_failed:
return res;
}
+
/**
* decode non-linear pdf trailer data
*
@@ -477,7 +438,7 @@ decode_xref_trailer_failed:
* find the subsequent trailer.
*
*/
-nspdferror decode_trailers(struct pdf_doc *doc)
+static nspdferror decode_trailers(struct nspdf_doc *doc)
{
nspdferror res;
uint64_t offset; /* the current data offset */
@@ -499,11 +460,12 @@ nspdferror decode_trailers(struct pdf_doc *doc)
return decode_xref_trailer(doc, startxref);
}
+
/**
* recursively decodes a page tree
*/
-nspdferror
-decode_page_tree(struct pdf_doc *doc,
+static nspdferror
+decode_page_tree(struct nspdf_doc *doc,
struct cos_object *page_tree_node,
unsigned int *page_index)
{
@@ -594,12 +556,14 @@ decode_page_tree(struct pdf_doc *doc,
return res;
}
+ /*
printf("page index:%d page:%p resources:%p mediabox:%p contents:%p\n",
*page_index,
page,
page->resources,
page->mediabox,
page->contents);
+ */
(*page_index)++;
res = NSPDFERROR_OK;
@@ -609,7 +573,8 @@ decode_page_tree(struct pdf_doc *doc,
return res;
}
-nspdferror decode_catalog(struct pdf_doc *doc)
+
+static nspdferror decode_catalog(struct nspdf_doc *doc)
{
nspdferror res;
struct cos_object *catalog;
@@ -645,38 +610,63 @@ nspdferror decode_catalog(struct pdf_doc *doc)
return res;
}
-nspdferror new_pdf_doc(struct pdf_doc **doc_out)
+/* exported interface documented in nspdf/document.h */
+nspdferror nspdf_document_create(struct nspdf_doc **doc_out)
{
- struct pdf_doc *doc;
- doc = calloc(1, sizeof(struct pdf_doc));
+ struct nspdf_doc *doc;
+ doc = calloc(1, sizeof(struct nspdf_doc));
if (doc == NULL) {
return NSPDFERROR_NOMEM;
}
+
*doc_out = doc;
+
return NSPDFERROR_OK;
}
-int main(int argc, char **argv)
+/* exported interface documented in nspdf/document.h */
+nspdferror nspdf_document_destroy(struct nspdf_doc *doc)
{
- struct pdf_doc *doc;
- int res;
+ free(doc);
- if (argc < 2) {
- fprintf(stderr, "Usage %s <filename>\n", argv[0]);
- return 1;
- }
+ return NSPDFERROR_OK;
+}
- res = new_pdf_doc(&doc);
- if (res != NSPDFERROR_OK) {
- printf("failed to read file\n");
- return res;
- }
- res = read_whole_pdf(doc, argv[1]);
- if (res != 0) {
- printf("failed to read file\n");
- return res;
+/**
+ * find the PDF comment marker to identify the start of the document
+ */
+static nspdferror check_header(struct nspdf_doc *doc)
+{
+ uint64_t offset; /* offset of characters being considered for header */
+ for (offset = 0; offset < 1024; offset++) {
+ if ((DOC_BYTE(doc, offset) == '%') &&
+ (DOC_BYTE(doc, offset + 1) == 'P') &&
+ (DOC_BYTE(doc, offset + 2) == 'D') &&
+ (DOC_BYTE(doc, offset + 3) == 'F') &&
+ (DOC_BYTE(doc, offset + 4) == '-') &&
+ (DOC_BYTE(doc, offset + 5) == '1') &&
+ (DOC_BYTE(doc, offset + 6) == '.')) {
+ doc->start += offset;
+ doc->length -= offset;
+
+ /* \todo read number for minor */
+ return NSPDFERROR_OK;
+ }
}
+ return NSPDFERROR_NOTFOUND;
+}
+
+/* exported interface documented in nspdf/document.h */
+nspdferror
+nspdf_document_parse(struct nspdf_doc *doc,
+ const uint8_t *buffer,
+ uint64_t buffer_length)
+{
+ nspdferror res;
+
+ doc->start = buffer;
+ doc->length = buffer_length;
res = check_header(doc);
if (res != 0) {
@@ -696,5 +686,5 @@ int main(int argc, char **argv)
return res;
}
- return 0;
+ return res;
}
diff --git a/src/nspdferror.h b/src/nspdferror.h
deleted file mode 100644
index 3e26813..0000000
--- a/src/nspdferror.h
+++ /dev/null
@@ -1,10 +0,0 @@
-typedef enum {
- NSPDFERROR_OK,
- NSPDFERROR_NOMEM,
- NSPDFERROR_SYNTAX, /**< syntax error in parse */
- NSPDFERROR_SIZE, /**< not enough input data */
- NSPDFERROR_RANGE, /**< value outside type range */
- NSPDFERROR_TYPE, /**< wrong type error */
- NSPDFERROR_NOTFOUND, /**< key not found */
- NSPDFERROR_FORMAT, /**< objects do not cornform to expected format */
-} nspdferror;
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
index 4a5cad1..281025c 100644
--- a/src/pdf_doc.c
+++ b/src/pdf_doc.c
@@ -12,7 +12,9 @@
#include <stdbool.h>
#include <stdio.h>
-#include "nspdferror.h"
+#include <nspdf/errors.h>
+
+#include "cos_parse.h"
#include "byte_class.h"
#include "cos_object.h"
#include "pdf_doc.h"
@@ -20,7 +22,7 @@
/**
* move offset to next non whitespace byte
*/
-nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
+nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset)
{
uint8_t c;
/* TODO sort out keeping offset in range */
@@ -43,7 +45,7 @@ nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
/**
* move offset to next non eol byte
*/
-nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
+nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset)
{
uint8_t c;
/* TODO sort out keeping offset in range */
@@ -60,7 +62,7 @@ static struct cos_object cos_null_obj = {
};
nspdferror
-xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out)
+xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out)
{
nspdferror res;
struct cos_object *cobj;
@@ -90,7 +92,7 @@ xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out)
if (entry->object == NULL) {
/* indirect object has never been decoded */
offset = entry->offset;
- res = cos_decode_object(doc, &offset, &indirect);
+ res = cos_parse_object(doc, &offset, &indirect);
if (res != NSPDFERROR_OK) {
printf("failed to decode indirect object\n");
return res;
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
index 986556f..e9bdc14 100644
--- a/src/pdf_doc.h
+++ b/src/pdf_doc.h
@@ -18,11 +18,9 @@ struct page_table_entry {
};
/** pdf document */
-struct pdf_doc {
- uint8_t *buffer;
- uint64_t buffer_length;
+struct nspdf_doc {
- uint8_t *start; /* start of pdf document in input stream */
+ const uint8_t *start; /* start of pdf document in input stream */
uint64_t length;
int major;
@@ -47,7 +45,7 @@ struct pdf_doc {
/* byte data acessory, allows for more complex buffer handling in future */
#define DOC_BYTE(doc, offset) (doc->start[(offset)])
-nspdferror doc_skip_ws(struct pdf_doc *doc, uint64_t *offset);
-nspdferror doc_skip_eol(struct pdf_doc *doc, uint64_t *offset);
+nspdferror doc_skip_ws(struct nspdf_doc *doc, uint64_t *offset);
+nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset);
-nspdferror xref_get_referenced(struct pdf_doc *doc, struct cos_object **cobj_out);
+nspdferror xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out);
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 0000000..e3a2929
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,3 @@
+DIR_TEST_ITEMS := parsepdf:parsepdf.c
+
+include $(NSBUILD)/Makefile.subdir
diff --git a/test/parsepdf.c b/test/parsepdf.c
new file mode 100644
index 0000000..3482af5
--- /dev/null
+++ b/test/parsepdf.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2018 Vincent Sanders <vince@netsurf-browser.org>
+ *
+ * This file is part of libnspdf.
+ *
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include <nspdf/document.h>
+
+static nspdferror
+read_whole_pdf(const char *fname, uint8_t **buffer, uint64_t *buffer_length)
+{
+ FILE *f;
+ off_t len;
+ uint8_t *buf;
+ size_t rd;
+
+ f = fopen(fname, "r");
+ if (f == NULL) {
+ perror("pdf open");
+ return NSPDFERROR_NOTFOUND;
+ }
+
+ fseek(f, 0, SEEK_END);
+ len = ftello(f);
+
+ buf = malloc(len);
+ fseek(f, 0, SEEK_SET);
+
+ rd = fread(buf, len, 1, f);
+ if (rd != 1) {
+ perror("pdf read");
+ free(buf);
+ return 1;
+ }
+
+ fclose(f);
+
+ *buffer = buf;
+ *buffer_length = len;
+
+ return NSPDFERROR_OK;
+}
+
+
+int main(int argc, char **argv)
+{
+ uint8_t *buffer;
+ uint64_t buffer_length;
+ struct nspdf_doc *doc;
+ nspdferror res;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage %s <filename>\n", argv[0]);
+ return 1;
+ }
+
+ res = read_whole_pdf(argv[1], &buffer, &buffer_length);
+ if (res != 0) {
+ printf("failed to read file\n");
+ return res;
+ }
+
+ res = nspdf_document_create(&doc);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to create a document\n");
+ return res;
+ }
+
+ res = nspdf_document_parse(doc, buffer, buffer_length);
+ if (res != NSPDFERROR_OK) {
+ printf("document parse failed (%d)\n", res);
+ return res;
+ }
+
+ res = nspdf_document_destroy(doc);
+ if (res != NSPDFERROR_OK) {
+ printf("failed to destroy document (%d)\n", res);
+ return res;
+ }
+
+ free(buffer);
+
+ return 0;
+}
diff --git a/test/runtest.sh b/test/runtest.sh
new file mode 100755
index 0000000..1aa83c7
--- /dev/null
+++ b/test/runtest.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+TEST_PATH=$1
+
+${TEST_PATH}/test_parsepdf ~/Downloads/HiKey_User_Guide_Rev0.2.pdf