From a65babe2fbd341f9d12a50a9530682ef1e015b58 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Sun, 31 Dec 2017 18:03:41 +0000 Subject: add metadata interface --- Makefile | 17 +++++++-- include/nspdf/document.h | 1 + include/nspdf/meta.h | 25 +++++++++++++ src/Makefile | 2 +- src/cos_object.c | 38 +++++++++++++++++++- src/cos_object.h | 93 ++++++++++++++++++++++++++++++++++++++++++++---- src/document.c | 6 ++-- src/meta.c | 60 +++++++++++++++++++++++++++++++ test/parsepdf.c | 9 +++++ test/runtest.sh | 2 +- 10 files changed, 238 insertions(+), 15 deletions(-) create mode 100644 include/nspdf/meta.h create mode 100644 src/meta.c diff --git a/Makefile b/Makefile index 470505f..7024de4 100644 --- a/Makefile +++ b/Makefile @@ -34,6 +34,17 @@ else endif CFLAGS := $(CFLAGS) -D_POSIX_C_SOURCE=200809L +# wapcaplet +ifneq ($(findstring clean,$(MAKECMDGOALS)),clean) + ifneq ($(PKGCONFIG),) + CFLAGS := $(CFLAGS) $(shell $(PKGCONFIG) libwapcaplet --cflags) + LDFLAGS := $(LDFLAGS) $(shell $(PKGCONFIG) libwapcaplet --libs) + else + CFLAGS := $(CFLAGS) -I$(PREFIX)/include + LDFLAGS := $(LDFLAGS) -lwapcaplet + endif +endif + REQUIRED_LIBS := nspdf TESTCFLAGS := -g -O2 @@ -42,7 +53,9 @@ TESTLDFLAGS := -l$(COMPONENT) $(TESTLDFLAGS) include $(NSBUILD)/Makefile.top # Extra installation rules -I := /$(INCLUDEDIR) -INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf.h +I := /$(INCLUDEDIR)/nspdf +INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf/document.h +INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf/meta.h +INSTALL_ITEMS := $(INSTALL_ITEMS) $(I):include/nspdf/errors.h INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR)/pkgconfig:lib$(COMPONENT).pc.in INSTALL_ITEMS := $(INSTALL_ITEMS) /$(LIBDIR):$(OUTPUT) diff --git a/include/nspdf/document.h b/include/nspdf/document.h index 4e4931d..d7cbb0f 100644 --- a/include/nspdf/document.h +++ b/include/nspdf/document.h @@ -15,6 +15,7 @@ #ifndef NSPDF_DOCUMENT_H_ #define NSPDF_DOCUMENT_H_ +#include #include struct nspdf_doc; diff --git a/include/nspdf/meta.h b/include/nspdf/meta.h new file mode 100644 index 0000000..a91183e --- /dev/null +++ b/include/nspdf/meta.h @@ -0,0 +1,25 @@ +/* + * Copyright 2018 Vincent Sanders + * + * This file is part of libnspdf. + * + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + */ + +/** + * \file + * NetSurf PDF library meta data about document. + */ + +#ifndef NSPDF_META_H_ +#define NSPDF_META_H_ + +#include + +struct nspdf_doc; +struct lwc_string_s; + +nspdferror nspdf_get_title(struct nspdf_doc *doc, struct lwc_string_s **title); + +#endif /* NSPDF_META_H_ */ diff --git a/src/Makefile b/src/Makefile index ed0b4ba..c4ddc7f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,3 +1,3 @@ -DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c +DIR_SOURCES := document.c byte_class.c cos_parse.c cos_object.c pdf_doc.c meta.c include $(NSBUILD)/Makefile.subdir diff --git a/src/cos_object.c b/src/cos_object.c index 2fa3a93..494c7ff 100644 --- a/src/cos_object.c +++ b/src/cos_object.c @@ -172,6 +172,22 @@ cos_get_dictionary_name(struct nspdf_doc *doc, return cos_get_name(doc, dict_value, value_out); } +nspdferror +cos_get_dictionary_string(struct nspdf_doc *doc, + struct cos_object *dict, + const char *key, + struct cos_string **string_out) +{ + nspdferror res; + struct cos_object *dict_value; + + res = cos_get_dictionary_value(doc, dict, key, &dict_value); + if (res != NSPDFERROR_OK) { + return res; + } + return cos_get_string(doc, dict_value, string_out); +} + nspdferror cos_get_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, @@ -278,7 +294,6 @@ cos_get_name(struct nspdf_doc *doc, } - nspdferror cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, @@ -297,6 +312,7 @@ cos_get_dictionary(struct nspdf_doc *doc, return res; } + nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, @@ -315,6 +331,26 @@ cos_get_array(struct nspdf_doc *doc, return res; } + +nspdferror +cos_get_string(struct nspdf_doc *doc, + struct cos_object *cobj, + struct cos_string **string_out) +{ + nspdferror res; + + res = xref_get_referenced(doc, &cobj); + if (res == NSPDFERROR_OK) { + if (cobj->type != COS_TYPE_STRING) { + res = NSPDFERROR_TYPE; + } else { + *string_out = cobj->u.s; + } + } + return res; +} + + /* * get a value for a key from a dictionary */ diff --git a/src/cos_object.h b/src/cos_object.h index a40c691..d0bd5ea 100644 --- a/src/cos_object.h +++ b/src/cos_object.h @@ -85,6 +85,7 @@ struct cos_object { nspdferror cos_free_object(struct cos_object *cos_obj); + /** * extract a value for a key from a dictionary * @@ -100,6 +101,7 @@ nspdferror cos_free_object(struct cos_object *cos_obj); */ nspdferror cos_extract_dictionary_value(struct cos_object *dict, const char *key, struct cos_object **value_out); + /** * get a value for a key from a dictionary * @@ -118,27 +120,104 @@ nspdferror cos_get_dictionary_int(struct nspdf_doc *doc, struct cos_object *dict nspdferror cos_get_dictionary_name(struct nspdf_doc *doc, struct cos_object *dict, const char *key, const char **value_out); + +nspdferror cos_get_dictionary_string(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_string **string_out); + + nspdferror cos_get_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); + nspdferror cos_heritable_dictionary_dictionary(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); + nspdferror cos_get_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); + nspdferror cos_heritable_dictionary_array(struct nspdf_doc *doc, struct cos_object *dict, const char *key, struct cos_object **value_out); -nspdferror cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out); +nspdferror cos_get_array_size(struct nspdf_doc *doc, struct cos_object *cobj, unsigned int *size_out); -nspdferror cos_get_name(struct nspdf_doc *doc, struct cos_object *cobj, const char **value_out); +nspdferror cos_get_array_value(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out); -nspdferror cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); +nspdferror cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out); -nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); +/** + * get the integer value of a cos object. + * + * Get the value from a cos object, if the object is an object reference it + * will be dereferenced first. The dereferencing will parse any previously + * unreferenced indirect objects as required. + * + * \param doc The document the cos object belongs to. + * \param cobj A cos object of integer type. + * \param value_out The result value. + * \return NSERROR_OK and \p value_out updated, + * NSERROR_TYPE if the \p cobj is not an integer + */ +nspdferror cos_get_int(struct nspdf_doc *doc, struct cos_object *cobj, int64_t *value_out); -nspdferror cos_get_array_size(struct nspdf_doc *doc, struct cos_object *cobj, unsigned int *size_out); +/** + * get the name value of a cos object. + * + * Get the value from a cos object, if the object is an object reference it + * will be dereferenced first. The dereferencing will parse any previously + * unreferenced indirect objects as required. + * + * \param doc The document the cos object belongs to. + * \param cobj A cos object of name type. + * \param name_out The result value. + * \return NSERROR_OK and \p value_out updated, + * NSERROR_TYPE if the \p cobj is not a name + */ +nspdferror cos_get_name(struct nspdf_doc *doc, struct cos_object *cobj, const char **name_out); -nspdferror cos_get_array_value(struct nspdf_doc *doc, struct cos_object *array, unsigned int index, struct cos_object **value_out); -nspdferror cos_get_array_dictionary(struct nspdf_doc *doc, struct cos_object *arrau, unsigned int index, struct cos_object **value_out); +/** + * get the string value of a cos object. + * + * Get the value from a cos object, if the object is an object reference it + * will be dereferenced first. The dereferencing will parse any previously + * unreferenced indirect objects as required. + * + * \param doc The document the cos object belongs to. + * \param cobj A cos object of string type. + * \param string_out The result value. + * \return NSERROR_OK and \p value_out updated, + * NSERROR_TYPE if the \p cobj is not a string + */ +nspdferror cos_get_string(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_string **string_out); + + +/** + * get the dictionary value of a cos object. + * + * Get the value from a cos object, if the object is an object reference it + * will be dereferenced first. The dereferencing will parse any previously + * unreferenced indirect objects as required. + * + * \param doc The document the cos object belongs to. + * \param cobj A cos object of dictionary type. + * \param value_out The result value. + * \return NSERROR_OK and \p value_out updated, + * NSERROR_TYPE if the \p cobj is not a dictionary + */ +nspdferror cos_get_dictionary(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); + + +/** + * get the array value of a cos object. + * + * Get the value from a cos object, if the object is an object reference it + * will be dereferenced first. The dereferencing will parse any previously + * unreferenced indirect objects as required. + * + * \param doc The document the cos object belongs to. + * \param cobj A cos object of array type. + * \param value_out The result value. + * \return NSERROR_OK and \p value_out updated, + * NSERROR_TYPE if the \p cobj is not a array + */ +nspdferror cos_get_array(struct nspdf_doc *doc, struct cos_object *cobj, struct cos_object **value_out); diff --git a/src/document.c b/src/document.c index 9be0ab5..cef2c1a 100644 --- a/src/document.c +++ b/src/document.c @@ -419,9 +419,9 @@ decode_xref_trailer_failed: /** * decode non-linear pdf trailer data * - * PDF have a structure nominally defined as header, body, cross reference table - * and trailer. The body, cross reference table and trailer sections may be - * repeated in a scheme known as "incremental updates" + * PDF have a structure nominally defined as header, body, cross reference + * table and trailer. The body, cross reference table and trailer sections may + * be repeated in a scheme known as "incremental updates" * * The strategy used here is to locate the end of the last trailer block which * contains a startxref token followed by a byte offset into the file of the diff --git a/src/meta.c b/src/meta.c new file mode 100644 index 0000000..02566b2 --- /dev/null +++ b/src/meta.c @@ -0,0 +1,60 @@ +/* + * Copyright 2018 Vincent Sanders + * + * This file is part of libnspdf. + * + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + */ + +#include + +#include + +#include "cos_object.h" +#include "pdf_doc.h" + +static nspdferror lwc2nspdferr(lwc_error ret) +{ + nspdferror res; + + switch (ret) { + case lwc_error_ok: + res = NSPDFERROR_OK; + break; + + case lwc_error_oom: + res = NSPDFERROR_NOMEM; + break; + + case lwc_error_range: + res = NSPDFERROR_RANGE; + break; + + default: + res = NSPDFERROR_NOTFOUND; + break; + } + return res; +} + +nspdferror nspdf_get_title(struct nspdf_doc *doc, struct lwc_string_s **title) +{ + struct cos_string *cos_title; + nspdferror res; + + if (doc->info == NULL) { + return NSPDFERROR_NOTFOUND; + } + + res = cos_get_dictionary_string(doc, doc->info, "Title", &cos_title); + if (res != NSPDFERROR_OK) { + return res; + } + + res = lwc2nspdferr(lwc_intern_string((const char *)cos_title->data, + cos_title->length, + title)); + + return res; +} diff --git a/test/parsepdf.c b/test/parsepdf.c index 3482af5..1d7a086 100644 --- a/test/parsepdf.c +++ b/test/parsepdf.c @@ -14,7 +14,10 @@ #include #include +#include + #include +#include static nspdferror read_whole_pdf(const char *fname, uint8_t **buffer, uint64_t *buffer_length) @@ -58,6 +61,7 @@ int main(int argc, char **argv) uint64_t buffer_length; struct nspdf_doc *doc; nspdferror res; + struct lwc_string_s *title; if (argc < 2) { fprintf(stderr, "Usage %s \n", argv[0]); @@ -82,6 +86,11 @@ int main(int argc, char **argv) return res; } + res = nspdf_get_title(doc, &title); + if (res == NSPDFERROR_OK) { + printf("Title:%s\n", lwc_string_data(title)); + } + res = nspdf_document_destroy(doc); if (res != NSPDFERROR_OK) { printf("failed to destroy document (%d)\n", res); diff --git a/test/runtest.sh b/test/runtest.sh index 1aa83c7..c62ec11 100755 --- a/test/runtest.sh +++ b/test/runtest.sh @@ -1,4 +1,4 @@ #!/bin/sh TEST_PATH=$1 -${TEST_PATH}/test_parsepdf ~/Downloads/HiKey_User_Guide_Rev0.2.pdf +${TEST_PATH}/test_parsepdf test/files/sn74ls173a.pdf -- cgit v1.2.3