summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Makefile6
-rw-r--r--src/cos_decode.c799
-rw-r--r--src/cos_object.c139
-rw-r--r--src/cos_object.h98
-rw-r--r--src/pdf_doc.c47
-rw-r--r--src/pdf_doc.h42
-rw-r--r--src/xref.c1076
7 files changed, 1139 insertions, 1068 deletions
diff --git a/src/Makefile b/src/Makefile
index f9ca22c..af806f3 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -2,11 +2,13 @@
CFLAGS+=-g -Wall -Wextra
+OBJS=xref.o byte_class.o cos_decode.o cos_object.o pdf_doc.o
+
.PHONY:all clean
all:xref
-xref:xref.o byte_class.o
+xref:$(OBJS)
clean:
- ${RM} xref xref.o
+ ${RM} xref $(OBJS)
diff --git a/src/cos_decode.c b/src/cos_decode.c
new file mode 100644
index 0000000..3936e05
--- /dev/null
+++ b/src/cos_decode.c
@@ -0,0 +1,799 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "byte_class.h"
+#include "nspdferror.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+#define COS_STRING_ALLOC 32
+
+nspdferror
+cos_string_append(struct cos_string *s, uint8_t c)
+{
+ //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc);
+ if (s->length == s->alloc) {
+ uint8_t *ns;
+ ns = realloc(s->data, s->alloc + COS_STRING_ALLOC);
+ if (ns == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ s->data = ns;
+ s->alloc += COS_STRING_ALLOC;
+ }
+ s->data[s->length++] = c;
+ return NSPDFERROR_OK;
+}
+
+uint8_t xtoi(uint8_t x)
+{
+ if (x >= '0' && x <= '9') {
+ x = x - '0';
+ } else if (x >= 'a' && x <='f') {
+ x = x - 'a' + 10;
+ } else if (x >= 'A' && x <='F') {
+ x = x - 'A' + 10;
+ }
+ return x;
+}
+
+int cos_decode_number(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ struct cos_object *cosobj;
+ uint8_t c; /* current byte from source data */
+ unsigned int len; /* number of decimal places in number */
+ uint8_t num[21]; /* temporary buffer for decimal values */
+ uint64_t offset; /* current offset of source data */
+
+ offset = *offset_out;
+
+ for (len = 0; len < sizeof(num); len++) {
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_DCML) != BC_DCML) {
+ int64_t result = 0; /* parsed result */
+ uint64_t tens;
+
+ if (len == 0) {
+ return -2; /* parse error no decimals in input */
+ }
+ /* sum value from each place */
+ for (tens = 1; len > 0; tens = tens * 10, len--) {
+ result += (num[len - 1] * tens);
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_INT;
+ cosobj->u.i = result;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+ }
+ num[len] = c - '0';
+ offset++;
+ }
+ return -1; /* number too long */
+}
+
+
+/**
+ * literal string processing
+ *
+ */
+nspdferror
+cos_decode_string(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ unsigned int pdepth = 1; /* depth of open parens */
+ struct cos_string *cstring;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if (c != '(') {
+ return NSPDFERROR_SYNTAX;
+ }
+
+ cstring = calloc(1, sizeof(*cstring));
+ if (cstring == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ cosobj = calloc(1, sizeof(*cosobj));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_STRING;
+ cosobj->u.s = cstring;
+
+ while (pdepth > 0) {
+ c = DOC_BYTE(doc, offset++);
+
+ if (c == ')') {
+ pdepth--;
+ if (pdepth == 0) {
+ break;
+ }
+ } else if (c == '(') {
+ pdepth++;
+ } else if ((bclass[c] & BC_EOLM ) != 0) {
+ /* unescaped end of line characters are translated to a single
+ * newline
+ */
+ c = DOC_BYTE(doc, offset);
+ while ((bclass[c] & BC_EOLM) != 0) {
+ offset++;
+ c = DOC_BYTE(doc, offset);
+ }
+ c = '\n';
+ } else if (c == '\\') {
+ /* escaped chars */
+ c = DOC_BYTE(doc, offset++);
+ switch (c) {
+ case 'n':
+ c = '\n';
+ break;
+
+ case 'r':
+ c = '\r';
+ break;
+
+ case 't':
+ c = '\t';
+ break;
+
+ case 'b':
+ c = '\b';
+ break;
+
+ case 'f':
+ c = '\f';
+ break;
+
+ case '(':
+ c = '(';
+ break;
+
+ case ')':
+ c = ')';
+ break;
+
+ case '\\':
+ c = '\\';
+ break;
+
+ default:
+
+ if ((bclass[c] & BC_EOLM) != 0) {
+ /* escaped end of line, swallow it */
+ c = DOC_BYTE(doc, offset++);
+ while ((bclass[c] & BC_EOLM) != 0) {
+ c = DOC_BYTE(doc, offset++);
+ }
+ } else if ((bclass[c] & BC_OCTL) != 0) {
+ /* octal value */
+ uint8_t val;
+ val = (c - '0');
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_OCTL) != 0) {
+ offset++;
+ val = (val << 3) | (c - '0');
+ c = DOC_BYTE(doc, offset);
+ if ((bclass[c] & BC_OCTL) != 0) {
+ offset++;
+ val = (val << 3) | (c - '0');
+ c = val;
+ }
+ }
+ } /* else invalid (skip backslash) */
+ break;
+ }
+ }
+
+ /* c contains the character to add to the string */
+ cos_string_append(cstring, c);
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+}
+
+
+nspdferror
+cos_decode_hex_string(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ uint8_t value = 0;
+ struct cos_string *cstring;
+ bool first = true;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if (c != '<') {
+ return NSPDFERROR_SYNTAX;
+ }
+
+ cstring = calloc(1, sizeof(*cstring));
+ if (cstring == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+
+ cosobj = calloc(1, sizeof(*cosobj));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_STRING;
+ cosobj->u.s = cstring;
+
+ for (; offset < doc->length; offset++) {
+ c = DOC_BYTE(doc, offset);
+ if (c == '>') {
+ if (first == false) {
+ cos_string_append(cstring, value);
+ }
+ offset++;
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return NSPDFERROR_OK;
+ } else if ((bclass[c] & BC_HEXL) != 0) {
+ if (first) {
+ value = xtoi(c) << 4;
+ first = false;
+ } else {
+ value |= xtoi(c);
+ first = true;
+ cos_string_append(cstring, value);
+ }
+ } else if ((bclass[c] & BC_WSPC) == 0) {
+ break; /* unknown byte value in string */
+ }
+ }
+ return NSPDFERROR_SYNTAX;
+}
+
+
+int cos_decode_dictionary(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ struct cos_dictionary_entry *entry;
+ struct cos_object *key;
+ struct cos_object *value;
+ int res;
+
+ offset = *offset_out;
+
+ if ((DOC_BYTE(doc, offset) != '<') ||
+ (DOC_BYTE(doc, offset + 1) != '<')) {
+ return -1; /* syntax error */
+ }
+ offset += 2;
+ doc_skip_ws(doc, &offset);
+
+ printf("found a dictionary\n");
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+ cosobj->type = COS_TYPE_DICTIONARY;
+
+ while ((DOC_BYTE(doc, offset) != '>') &&
+ (DOC_BYTE(doc, offset + 1) != '>')) {
+
+ res = cos_decode_object(doc, &offset, &key);
+ if (res != 0) {
+ /* todo free up any dictionary entries already created */
+ printf("key object decode failed\n");
+ return res;
+ }
+ if (key->type != COS_TYPE_NAME) {
+ /* key value pairs without a name */
+ printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
+ return -1; /* syntax error */
+ }
+ printf("key: %s\n", key->u.n);
+
+ res = cos_decode_object(doc, &offset, &value);
+ if (res != 0) {
+ printf("Unable to decode value object in dictionary\n");
+ /* todo free up any dictionary entries already created */
+ return res;
+ }
+
+ /* add dictionary entry */
+ entry = calloc(1, sizeof(struct cos_dictionary_entry));
+ if (entry == NULL) {
+ /* todo free up any dictionary entries already created */
+ return -1; /* memory error */
+ }
+
+ entry->key = key;
+ entry->value = value;
+ entry->next = cosobj->u.dictionary;
+
+ cosobj->u.dictionary = entry;
+
+ }
+ offset += 2; /* skip closing >> */
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return 0;
+}
+
+
+nspdferror
+cos_decode_list(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ struct cos_array_entry *entry;
+ struct cos_object *value;
+ nspdferror res;
+
+ offset = *offset_out;
+
+ /* sanity check first token is list open */
+ if (DOC_BYTE(doc, offset) != '[') {
+ printf("not a [\n");
+ return NSPDFERROR_SYNTAX; /* syntax error */
+ }
+ offset++;
+
+ /* advance offset to next token */
+ res = doc_skip_ws(doc, &offset);
+ if (res != NSPDFERROR_OK) {
+ return res;
+ }
+
+ printf("found a list\n");
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return NSPDFERROR_NOMEM;
+ }
+ cosobj->type = COS_TYPE_ARRAY;
+
+ while (DOC_BYTE(doc, offset) != ']') {
+
+ res = cos_decode_object(doc, &offset, &value);
+ if (res != NSPDFERROR_OK) {
+ cos_free_object(cosobj);
+ printf("Unable to decode value object in list\n");
+ return res;
+ }
+
+ /* add entry to array */
+ entry = calloc(1, sizeof(struct cos_array_entry));
+ if (entry == NULL) {
+ cos_free_object(cosobj);
+ return NSPDFERROR_NOMEM;
+ }
+
+ entry->value = value;
+ entry->next = cosobj->u.array;
+
+ cosobj->u.array = entry;
+ }
+ offset++; /* skip closing ] */
+
+ doc_skip_ws(doc, &offset);
+
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+
+ return 0;
+}
+
+#define NAME_MAX_LENGTH 127
+
+/**
+ * decode a name object
+ *
+ * \todo deal with # symbols on pdf versions 1.2 and later
+ */
+int cos_decode_name(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ char name[NAME_MAX_LENGTH + 1];
+ int idx = 0;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if (c != '/') {
+ return -1; /* names must be prefixed with a / */
+ }
+ printf("found a name\n");
+
+ c = DOC_BYTE(doc, offset);
+ while ((idx <= NAME_MAX_LENGTH) &&
+ ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
+ offset++;
+ //printf("%c", c);
+ name[idx++] = c;
+ c = DOC_BYTE(doc, offset);
+ }
+ //printf("\nidx: %d\n", idx);
+ if (idx > NAME_MAX_LENGTH) {
+ /* name length exceeded implementation limit */
+ return -1;
+ }
+ name[idx] = 0;
+
+ //printf("name: %s\n", name);
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_NAME;
+ cosobj->u.n = strdup(name);
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+}
+
+
+int cos_decode_boolean(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+ bool value;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c == 't') || (c == 'T')) {
+ /* true branch */
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'r') && (c != 'R')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'u') && (c != 'U')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'e') && (c != 'E')) {
+ return -1; /* syntax error */
+ }
+ value = true;
+
+ } else if ((c == 'f') || (c == 'F')) {
+ /* false branch */
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'a') && (c != 'A')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 's') && (c != 'S')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'e') && (c != 'E')) {
+ return -1; /* syntax error */
+ }
+
+ value = false;
+
+ } else {
+ return -1; /* syntax error */
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_BOOL;
+ cosobj->u.b = value;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+
+}
+
+int cos_decode_null(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj;
+ uint8_t c;
+
+ offset = *offset_out;
+
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'n') && (c != 'N')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'u') && (c != 'U')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+ c = DOC_BYTE(doc, offset++);
+ if ((c != 'l') && (c != 'L')) {
+ return -1; /* syntax error */
+ }
+
+ doc_skip_ws(doc, &offset);
+
+ cosobj = calloc(1, sizeof(struct cos_object));
+ if (cosobj == NULL) {
+ return -1; /* memory error */
+ }
+
+ cosobj->type = COS_TYPE_NULL;
+ *offset_out = offset;
+
+ return 0;
+}
+
+/**
+ * attempt to decode the stream into a reference
+ *
+ * The stream has already had a positive integer decoded from it. if another
+ * positive integer follows and a R character after that it is a reference,
+ * otherwise bail, but not finding a ref is not an error!
+ *
+ * \param doc the pdf document
+ * \param offset_out offset of current cursor in stream
+ * \param cosobj_out the object to return into, on input contains the first
+ * integer
+ */
+int cos_attempt_decode_reference(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ struct cos_object *cosobj; /* possible generation object */
+ uint8_t c;
+ int res;
+ struct cos_reference *nref; /* new reference */
+
+ offset = *offset_out;
+
+ res = cos_decode_number(doc, &offset, &cosobj);
+ if (res != 0) {
+ return 0; /* no error if object could not be decoded */
+ }
+
+ if (cosobj->type != COS_TYPE_INT) {
+ /* next object was not an integer so not a reference */
+ cos_free_object(cosobj);
+ return 0;
+ }
+
+ if (cosobj->u.i < 0) {
+ /* integer was negative so not a reference (generations must be
+ * non-negative
+ */
+ cos_free_object(cosobj);
+ return 0;
+
+ }
+
+ /* two int in a row, look for the R */
+ c = DOC_BYTE(doc, offset++);
+ if (c != 'R') {
+ /* no R so not a reference */
+ cos_free_object(cosobj);
+ return 0;
+ }
+
+ /* found reference */
+
+ printf("found reference\n");
+ doc_skip_ws(doc, &offset);
+
+ nref = calloc(1, sizeof(struct cos_reference));
+ if (nref == NULL) {
+ /* todo free objects */
+ return -1; /* memory error */
+ }
+
+ nref->id = (*cosobj_out)->u.i;
+ nref->generation = cosobj->u.i;
+
+ cos_free_object(*cosobj_out);
+
+ cosobj->type = COS_TYPE_REFERENCE;
+ cosobj->u.reference = nref;
+
+ *cosobj_out = cosobj;
+
+ *offset_out = offset;
+
+ return 0;
+}
+
+/**
+ * Decode input stream into an object
+ *
+ * lex and parse a byte stream to generate COS objects
+ *
+ * lexing the input.
+ * check first character:
+ *
+ * < either a hex string or a dictionary
+ * second char < means dictionary else hex string
+ * - either an integer or real
+ * + either an integer or real
+ * 0-9 an integer, unsigned integer or real
+ * . a real number
+ * ( a string
+ * / a name
+ * [ a list
+ * t|T boolean true
+ * f|F boolean false
+ * n|N null
+ *
+ * Grammar is:
+ * cos_object:
+ * TOK_NULL |
+ * TOK_BOOLEAN |
+ * TOK_INT |
+ * TOK_REAL |
+ * TOK_NAME |
+ * TOK_STRING |
+ * list |
+ * dictionary |
+ * object_reference;
+ *
+ * list:
+ * '[' listargs ']';
+ *
+ * listargs:
+ * cos_object
+ * |
+ * listargs cos_object
+ * ;
+ *
+ * object_reference:
+ * TOK_UINT TOK_UINT 'R';
+ */
+int cos_decode_object(struct pdf_doc *doc,
+ uint64_t *offset_out,
+ struct cos_object **cosobj_out)
+{
+ uint64_t offset;
+ int res;
+ struct cos_object *cosobj;
+
+ offset = *offset_out;
+
+ /* object could be any type use first char to try and select */
+ switch (DOC_BYTE(doc, offset)) {
+
+ case '-':
+ case '+':
+ case '.':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ res = cos_decode_number(doc, &offset, &cosobj);
+ /* if type is positive integer try to check for reference */
+ if ((res == 0) &&
+ (cosobj->type == COS_TYPE_INT) &&
+ (cosobj->u.i > 0)) {
+ res = cos_attempt_decode_reference(doc, &offset, &cosobj);
+ }
+ break;
+
+ case '<':
+ if (DOC_BYTE(doc, offset + 1) == '<') {
+ res = cos_decode_dictionary(doc, &offset, &cosobj);
+ } else {
+ res = cos_decode_hex_string(doc, &offset, &cosobj);
+ }
+ break;
+
+ case '(':
+ res = cos_decode_string(doc, &offset, &cosobj);
+ break;
+
+ case '/':
+ res = cos_decode_name(doc, &offset, &cosobj);
+ break;
+
+ case '[':
+ res = cos_decode_list(doc, &offset, &cosobj);
+ break;
+
+ case 't':
+ case 'T':
+ case 'f':
+ case 'F':
+ res = cos_decode_boolean(doc, &offset, &cosobj);
+ break;
+
+ case 'n':
+ case 'N':
+ res = cos_decode_null(doc, &offset, &cosobj);
+ break;
+
+ default:
+ res = -1; /* syntax error */
+ }
+
+
+ if (res == 0) {
+ *cosobj_out = cosobj;
+ *offset_out = offset;
+ }
+
+ return res;
+}
diff --git a/src/cos_object.c b/src/cos_object.c
new file mode 100644
index 0000000..96c669e
--- /dev/null
+++ b/src/cos_object.c
@@ -0,0 +1,139 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "nspdferror.h"
+#include "cos_object.h"
+
+
+nspdferror cos_free_object(struct cos_object *cos_obj)
+{
+ struct cos_dictionary_entry *dentry;
+ struct cos_array_entry *aentry;
+
+ switch (cos_obj->type) {
+ case COS_TYPE_NAME:
+ free(cos_obj->u.n);
+ break;
+
+ case COS_TYPE_STRING:
+ free(cos_obj->u.s->data);
+ free(cos_obj->u.s);
+ break;
+
+ case COS_TYPE_DICTIONARY:
+ dentry = cos_obj->u.dictionary;
+ while (dentry != NULL) {
+ struct cos_dictionary_entry *odentry;
+
+ cos_free_object(dentry->key);
+ cos_free_object(dentry->value);
+
+ odentry = dentry;
+ dentry = dentry->next;
+ free(odentry);
+ }
+ break;
+
+ case COS_TYPE_ARRAY:
+ aentry = cos_obj->u.array;
+ while (aentry != NULL) {
+ struct cos_array_entry *oaentry;
+
+ cos_free_object(aentry->value);
+
+ oaentry = aentry;
+ aentry = aentry->next;
+ free(oaentry);
+ }
+
+ case COS_TYPE_STREAM:
+ free(cos_obj->u.stream);
+ break;
+
+ }
+ free(cos_obj);
+
+ return NSPDFERROR_OK;
+}
+
+nspdferror
+cos_dictionary_get_value(struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ struct cos_dictionary_entry *entry;
+
+ if (dict->type != COS_TYPE_DICTIONARY) {
+ return NSPDFERROR_TYPE;
+ }
+
+ entry = dict->u.dictionary;
+ while (entry != NULL) {
+ if (strcmp(entry->key->u.n, key) == 0) {
+ *value_out = entry->value;
+ return NSPDFERROR_OK;
+ }
+ entry = entry->next;
+ }
+ return NSPDFERROR_NOTFOUND;
+}
+
+/**
+ * extracts a value for a key in a dictionary.
+ *
+ * this finds and returns a value for a given key removing it from a dictionary
+ */
+nspdferror
+cos_dictionary_extract_value(struct cos_object *dict,
+ const char *key,
+ struct cos_object **value_out)
+{
+ struct cos_dictionary_entry *entry;
+ struct cos_dictionary_entry **prev;
+
+ if (dict->type != COS_TYPE_DICTIONARY) {
+ return NSPDFERROR_TYPE;
+ }
+
+ prev = &dict->u.dictionary;
+ entry = *prev;
+ while (entry != NULL) {
+ if (strcmp(entry->key->u.n, key) == 0) {
+ *value_out = entry->value;
+ *prev = entry->next;
+ cos_free_object(entry->key);
+ free(entry);
+ return NSPDFERROR_OK;
+ }
+ prev = &entry->next;
+ entry = *prev;
+ }
+ return NSPDFERROR_NOTFOUND;
+}
+
+nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out)
+{
+ if (cobj->type != COS_TYPE_INT) {
+ return NSPDFERROR_TYPE;
+ }
+ *value_out = cobj->u.i;
+ return NSPDFERROR_OK;
+}
+
+nspdferror
+cos_get_dictionary(struct cos_object *cobj,
+ struct cos_object **value_out)
+{
+ if (cobj->type == COS_TYPE_REFERENCE) {
+
+ }
+ if (cobj->type != COS_TYPE_DICTIONARY) {
+ return NSPDFERROR_TYPE;
+ }
+ *value_out = cobj;
+ return NSPDFERROR_OK;
+}
diff --git a/src/cos_object.h b/src/cos_object.h
new file mode 100644
index 0000000..65b3ed5
--- /dev/null
+++ b/src/cos_object.h
@@ -0,0 +1,98 @@
+struct pdf_doc;
+
+enum cos_type {
+ COS_TYPE_NULL,
+ COS_TYPE_BOOL,
+ COS_TYPE_INT,
+ COS_TYPE_REAL,
+ COS_TYPE_NAME,
+ COS_TYPE_STRING,
+ COS_TYPE_ARRAY,
+ COS_TYPE_DICTIONARY,
+ COS_TYPE_NAMETREE,
+ COS_TYPE_NUMBERTREE,
+ COS_TYPE_STREAM,
+ COS_TYPE_REFERENCE,
+};
+
+struct cos_object;
+
+struct cos_dictionary_entry {
+ /** next key/value in dictionary */
+ struct cos_dictionary_entry *next;
+
+ /** key (name) */
+ struct cos_object *key;
+
+ /** value */
+ struct cos_object *value;
+};
+
+struct cos_array_entry {
+ /** next value in array */
+ struct cos_array_entry *next;
+
+ /** value */
+ struct cos_object *value;
+};
+
+struct cos_string {
+ uint8_t *data;
+ size_t length;
+ size_t alloc;
+};
+
+struct cos_reference {
+ /** id of indirect object */
+ uint64_t id;
+
+ /* generation of indirect object */
+ uint64_t generation;
+};
+
+struct cos_object {
+ int type;
+ union {
+ /** boolean */
+ bool b;
+
+ /** integer */
+ int64_t i;
+
+ /** real */
+ double r;
+
+ /** name */
+ char *n;
+
+ /** string */
+ struct cos_string *s;
+
+ /** stream data */
+ uint8_t *stream;
+
+ /* dictionary */
+ struct cos_dictionary_entry *dictionary;
+
+ /* array */
+ struct cos_array_entry *array;
+
+ /** reference */
+ struct cos_reference *reference;
+
+ } u;
+};
+
+int cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
+
+nspdferror cos_free_object(struct cos_object *cos_obj);
+
+nspdferror cos_dictionary_get_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+nspdferror cos_dictionary_extract_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out);
+
+nspdferror cos_get_dictionary(struct cos_object *cobj, struct cos_object **value_out);
+
+
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
new file mode 100644
index 0000000..9b92bd0
--- /dev/null
+++ b/src/pdf_doc.c
@@ -0,0 +1,47 @@
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include "nspdferror.h"
+#include "byte_class.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+/**
+ * move offset to next non whitespace byte
+ */
+int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
+{
+ uint8_t c;
+ /* TODO sort out keeping offset in range */
+ c = DOC_BYTE(doc, *offset);
+ while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
+ (*offset)++;
+ /* skip comments */
+ if ((bclass[c] & BC_CMNT) != 0) {
+ c = DOC_BYTE(doc, *offset);
+ while ((bclass[c] & BC_EOLM ) == 0) {
+ (*offset)++;
+ c = DOC_BYTE(doc, *offset);
+ }
+ }
+ c = DOC_BYTE(doc, *offset);
+ }
+ return 0;
+}
+
+/**
+ * move offset to next non eol byte
+ */
+int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
+{
+ uint8_t c;
+ /* TODO sort out keeping offset in range */
+ c = DOC_BYTE(doc, *offset);
+ while ((bclass[c] & BC_EOLM) != 0) {
+ (*offset)++;
+ c = DOC_BYTE(doc, *offset);
+ }
+ return 0;
+}
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
new file mode 100644
index 0000000..696c121
--- /dev/null
+++ b/src/pdf_doc.h
@@ -0,0 +1,42 @@
+/** indirect object */
+struct xref_table_entry {
+ /* reference identifier */
+ struct cos_reference ref;
+
+ /** offset of object */
+ uint64_t offset;
+
+ /* indirect object if already decoded */
+ struct cos_object *o;
+};
+
+
+/** pdf document */
+struct pdf_doc {
+ uint8_t *buffer;
+ uint64_t buffer_length;
+
+ uint8_t *start; /* start of pdf document in input stream */
+ uint64_t length;
+
+ int major;
+ int minor;
+
+ /**
+ * Indirect object cross reference table
+ */
+ uint64_t xref_size;
+ struct xref_table_entry *xref_table;
+
+ struct cos_object *root;
+ struct cos_object *encrypt;
+ struct cos_object *info;
+ struct cos_object *id;
+
+};
+
+/* byte data acessory, allows for more complex buffer handling in future */
+#define DOC_BYTE(doc, offset) (doc->start[(offset)])
+
+int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset);
+int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset);
diff --git a/src/xref.c b/src/xref.c
index d6a07b8..5e5ac8b 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -7,136 +7,12 @@
#include "nspdferror.h"
#include "byte_class.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
#define SLEN(x) (sizeof((x)) - 1)
-enum cos_type {
- COS_TYPE_NULL,
- COS_TYPE_BOOL,
- COS_TYPE_INT,
- COS_TYPE_REAL,
- COS_TYPE_NAME,
- COS_TYPE_STRING,
- COS_TYPE_ARRAY,
- COS_TYPE_DICTIONARY,
- COS_TYPE_NAMETREE,
- COS_TYPE_NUMBERTREE,
- COS_TYPE_STREAM,
- COS_TYPE_REFERENCE,
-};
-
-struct cos_object;
-
-struct cos_dictionary_entry {
- /** next key/value in dictionary */
- struct cos_dictionary_entry *next;
-
- /** key (name) */
- struct cos_object *key;
-
- /** value */
- struct cos_object *value;
-};
-
-struct cos_array_entry {
- /** next value in array */
- struct cos_array_entry *next;
-
- /** value */
- struct cos_object *value;
-};
-
-struct cos_string {
- uint8_t *data;
- size_t length;
- size_t alloc;
-};
-
-struct cos_reference {
- /** id of indirect object */
- uint64_t id;
-
- /* generation of indirect object */
- uint64_t generation;
-};
-
-struct cos_object {
- int type;
- union {
- /** boolean */
- bool b;
-
- /** integer */
- int64_t i;
-
- /** real */
- double r;
-
- /** name */
- char *n;
-
- /** string */
- struct cos_string *s;
-
- /** stream data */
- uint8_t *stream;
-
- /* dictionary */
- struct cos_dictionary_entry *dictionary;
-
- /* array */
- struct cos_array_entry *array;
-
- /** reference */
- struct cos_reference *reference;
-
- } u;
-};
-
-
-/** indirect object */
-struct cos_indirect_object {
- /* reference identifier */
- struct cos_reference ref;
-
- /** offset of object */
- uint64_t offset;
-
- /* direct object if already decoded */
- struct cos_object *o;
-};
-
-
-/** pdf document */
-struct pdf_doc {
- uint8_t *buffer;
- uint64_t buffer_length;
-
- uint8_t *start; /* start of pdf document in input stream */
- uint64_t length;
-
- int major;
- int minor;
-
- /**
- * Indirect object cross reference table
- */
- uint64_t xref_size;
- struct cos_indirect_object *xref_table;
-
- struct cos_object *root;
- struct cos_object *encrypt;
- struct cos_object *info;
- struct cos_object *id;
-
-};
-
-
-int cos_decode_object(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out);
-
int
read_whole_pdf(struct pdf_doc *doc, const char *fname)
{
@@ -178,46 +54,7 @@ read_whole_pdf(struct pdf_doc *doc, const char *fname)
#define STARTXREF_SEARCH_SIZE 1024
-/* byte data acessory, allows for more complex buffer handling in future */
-#define DOC_BYTE(doc, offset) (doc->start[(offset)])
-
-/**
- * move offset to next non whitespace byte
- */
-static int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
-{
- uint8_t c;
- /* TODO sort out keeping offset in range */
- c = DOC_BYTE(doc, *offset);
- while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
- (*offset)++;
- /* skip comments */
- if ((bclass[c] & BC_CMNT) != 0) {
- c = DOC_BYTE(doc, *offset);
- while ((bclass[c] & BC_EOLM ) == 0) {
- (*offset)++;
- c = DOC_BYTE(doc, *offset);
- }
- }
- c = DOC_BYTE(doc, *offset);
- }
- return 0;
-}
-/**
- * move offset to next non eol byte
- */
-static int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
-{
- uint8_t c;
- /* TODO sort out keeping offset in range */
- c = DOC_BYTE(doc, *offset);
- while ((bclass[c] & BC_EOLM) != 0) {
- (*offset)++;
- c = DOC_BYTE(doc, *offset);
- }
- return 0;
-}
static nspdferror
doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out)
@@ -388,842 +225,7 @@ int check_header(struct pdf_doc *doc)
}
-nspdferror cos_free_object(struct cos_object *cos_obj)
-{
- struct cos_dictionary_entry *dentry;
- struct cos_array_entry *aentry;
-
- switch (cos_obj->type) {
- case COS_TYPE_NAME:
- free(cos_obj->u.n);
- break;
-
- case COS_TYPE_STRING:
- free(cos_obj->u.s->data);
- free(cos_obj->u.s);
- break;
-
- case COS_TYPE_DICTIONARY:
- dentry = cos_obj->u.dictionary;
- while (dentry != NULL) {
- struct cos_dictionary_entry *odentry;
-
- cos_free_object(dentry->key);
- cos_free_object(dentry->value);
-
- odentry = dentry;
- dentry = dentry->next;
- free(odentry);
- }
- break;
- case COS_TYPE_ARRAY:
- aentry = cos_obj->u.array;
- while (aentry != NULL) {
- struct cos_array_entry *oaentry;
-
- cos_free_object(aentry->value);
-
- oaentry = aentry;
- aentry = aentry->next;
- free(oaentry);
- }
-
- case COS_TYPE_STREAM:
- free(cos_obj->u.stream);
- break;
-
- }
- free(cos_obj);
-
- return NSPDFERROR_OK;
-}
-
-int cos_decode_number(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- struct cos_object *cosobj;
- uint8_t c; /* current byte from source data */
- unsigned int len; /* number of decimal places in number */
- uint8_t num[21]; /* temporary buffer for decimal values */
- uint64_t offset; /* current offset of source data */
-
- offset = *offset_out;
-
- for (len = 0; len < sizeof(num); len++) {
- c = DOC_BYTE(doc, offset);
- if ((bclass[c] & BC_DCML) != BC_DCML) {
- int64_t result = 0; /* parsed result */
- uint64_t tens;
-
- if (len == 0) {
- return -2; /* parse error no decimals in input */
- }
- /* sum value from each place */
- for (tens = 1; len > 0; tens = tens * 10, len--) {
- result += (num[len - 1] * tens);
- }
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return -1; /* memory error */
- }
-
- cosobj->type = COS_TYPE_INT;
- cosobj->u.i = result;
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return 0;
- }
- num[len] = c - '0';
- offset++;
- }
- return -1; /* number too long */
-}
-
-#define COS_STRING_ALLOC 32
-
-nspdferror
-cos_string_append(struct cos_string *s, uint8_t c)
-{
- //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc);
- if (s->length == s->alloc) {
- uint8_t *ns;
- ns = realloc(s->data, s->alloc + COS_STRING_ALLOC);
- if (ns == NULL) {
- return NSPDFERROR_NOMEM;
- }
- s->data = ns;
- s->alloc += COS_STRING_ALLOC;
- }
- s->data[s->length++] = c;
- return NSPDFERROR_OK;
-}
-
-/**
- * literal string processing
- *
- */
-nspdferror
-cos_decode_string(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- unsigned int pdepth = 1; /* depth of open parens */
- struct cos_string *cstring;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if (c != '(') {
- return NSPDFERROR_SYNTAX;
- }
-
- cstring = calloc(1, sizeof(*cstring));
- if (cstring == NULL) {
- return NSPDFERROR_NOMEM;
- }
-
- cosobj = calloc(1, sizeof(*cosobj));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM;
- }
- cosobj->type = COS_TYPE_STRING;
- cosobj->u.s = cstring;
-
- while (pdepth > 0) {
- c = DOC_BYTE(doc, offset++);
-
- if (c == ')') {
- pdepth--;
- if (pdepth == 0) {
- break;
- }
- } else if (c == '(') {
- pdepth++;
- } else if ((bclass[c] & BC_EOLM ) != 0) {
- /* unescaped end of line characters are translated to a single
- * newline
- */
- c = DOC_BYTE(doc, offset);
- while ((bclass[c] & BC_EOLM) != 0) {
- offset++;
- c = DOC_BYTE(doc, offset);
- }
- c = '\n';
- } else if (c == '\\') {
- /* escaped chars */
- c = DOC_BYTE(doc, offset++);
- switch (c) {
- case 'n':
- c = '\n';
- break;
-
- case 'r':
- c = '\r';
- break;
-
- case 't':
- c = '\t';
- break;
-
- case 'b':
- c = '\b';
- break;
-
- case 'f':
- c = '\f';
- break;
-
- case '(':
- c = '(';
- break;
-
- case ')':
- c = ')';
- break;
-
- case '\\':
- c = '\\';
- break;
-
- default:
-
- if ((bclass[c] & BC_EOLM) != 0) {
- /* escaped end of line, swallow it */
- c = DOC_BYTE(doc, offset++);
- while ((bclass[c] & BC_EOLM) != 0) {
- c = DOC_BYTE(doc, offset++);
- }
- } else if ((bclass[c] & BC_OCTL) != 0) {
- /* octal value */
- uint8_t val;
- val = (c - '0');
- c = DOC_BYTE(doc, offset);
- if ((bclass[c] & BC_OCTL) != 0) {
- offset++;
- val = (val << 3) | (c - '0');
- c = DOC_BYTE(doc, offset);
- if ((bclass[c] & BC_OCTL) != 0) {
- offset++;
- val = (val << 3) | (c - '0');
- c = val;
- }
- }
- } /* else invalid (skip backslash) */
- break;
- }
- }
-
- /* c contains the character to add to the string */
- cos_string_append(cstring, c);
- }
-
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return NSPDFERROR_OK;
-}
-
-uint8_t xtoi(uint8_t x)
-{
- if (x >= '0' && x <= '9') {
- x = x - '0';
- } else if (x >= 'a' && x <='f') {
- x = x - 'a' + 10;
- } else if (x >= 'A' && x <='F') {
- x = x - 'A' + 10;
- }
- return x;
-}
-
-nspdferror
-cos_decode_hex_string(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- uint8_t value = 0;
- struct cos_string *cstring;
- bool first = true;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if (c != '<') {
- return NSPDFERROR_SYNTAX;
- }
-
- cstring = calloc(1, sizeof(*cstring));
- if (cstring == NULL) {
- return NSPDFERROR_NOMEM;
- }
-
- cosobj = calloc(1, sizeof(*cosobj));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM;
- }
- cosobj->type = COS_TYPE_STRING;
- cosobj->u.s = cstring;
-
- for (; offset < doc->length; offset++) {
- c = DOC_BYTE(doc, offset);
- if (c == '>') {
- if (first == false) {
- cos_string_append(cstring, value);
- }
- offset++;
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return NSPDFERROR_OK;
- } else if ((bclass[c] & BC_HEXL) != 0) {
- if (first) {
- value = xtoi(c) << 4;
- first = false;
- } else {
- value |= xtoi(c);
- first = true;
- cos_string_append(cstring, value);
- }
- } else if ((bclass[c] & BC_WSPC) == 0) {
- break; /* unknown byte value in string */
- }
- }
- return NSPDFERROR_SYNTAX;
-}
-
-
-int cos_decode_dictionary(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- struct cos_dictionary_entry *entry;
- struct cos_object *key;
- struct cos_object *value;
- int res;
-
- offset = *offset_out;
-
- if ((DOC_BYTE(doc, offset) != '<') ||
- (DOC_BYTE(doc, offset + 1) != '<')) {
- return -1; /* syntax error */
- }
- offset += 2;
- doc_skip_ws(doc, &offset);
-
- printf("found a dictionary\n");
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return -1; /* memory error */
- }
- cosobj->type = COS_TYPE_DICTIONARY;
-
- while ((DOC_BYTE(doc, offset) != '>') &&
- (DOC_BYTE(doc, offset + 1) != '>')) {
-
- res = cos_decode_object(doc, &offset, &key);
- if (res != 0) {
- /* todo free up any dictionary entries already created */
- printf("key object decode failed\n");
- return res;
- }
- if (key->type != COS_TYPE_NAME) {
- /* key value pairs without a name */
- printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
- return -1; /* syntax error */
- }
- printf("key: %s\n", key->u.n);
-
- res = cos_decode_object(doc, &offset, &value);
- if (res != 0) {
- printf("Unable to decode value object in dictionary\n");
- /* todo free up any dictionary entries already created */
- return res;
- }
-
- /* add dictionary entry */
- entry = calloc(1, sizeof(struct cos_dictionary_entry));
- if (entry == NULL) {
- /* todo free up any dictionary entries already created */
- return -1; /* memory error */
- }
-
- entry->key = key;
- entry->value = value;
- entry->next = cosobj->u.dictionary;
-
- cosobj->u.dictionary = entry;
-
- }
- offset += 2; /* skip closing >> */
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return 0;
-}
-
-
-nspdferror
-cos_decode_list(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- struct cos_array_entry *entry;
- struct cos_object *value;
- nspdferror res;
-
- offset = *offset_out;
-
- /* sanity check first token is list open */
- if (DOC_BYTE(doc, offset) != '[') {
- printf("not a [\n");
- return NSPDFERROR_SYNTAX; /* syntax error */
- }
- offset++;
-
- /* advance offset to next token */
- res = doc_skip_ws(doc, &offset);
- if (res != NSPDFERROR_OK) {
- return res;
- }
-
- printf("found a list\n");
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return NSPDFERROR_NOMEM;
- }
- cosobj->type = COS_TYPE_ARRAY;
-
- while (DOC_BYTE(doc, offset) != ']') {
-
- res = cos_decode_object(doc, &offset, &value);
- if (res != NSPDFERROR_OK) {
- cos_free_object(cosobj);
- printf("Unable to decode value object in list\n");
- return res;
- }
-
- /* add entry to array */
- entry = calloc(1, sizeof(struct cos_array_entry));
- if (entry == NULL) {
- cos_free_object(cosobj);
- return NSPDFERROR_NOMEM;
- }
-
- entry->value = value;
- entry->next = cosobj->u.array;
-
- cosobj->u.array = entry;
- }
- offset++; /* skip closing ] */
-
- doc_skip_ws(doc, &offset);
-
- *cosobj_out = cosobj;
- *offset_out = offset;
-
- return 0;
-}
-
-#define NAME_MAX_LENGTH 127
-
-/**
- * decode a name object
- *
- * \todo deal with # symbols on pdf versions 1.2 and later
- */
-int cos_decode_name(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- char name[NAME_MAX_LENGTH + 1];
- int idx = 0;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if (c != '/') {
- return -1; /* names must be prefixed with a / */
- }
- printf("found a name\n");
-
- c = DOC_BYTE(doc, offset);
- while ((idx <= NAME_MAX_LENGTH) &&
- ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
- offset++;
- //printf("%c", c);
- name[idx++] = c;
- c = DOC_BYTE(doc, offset);
- }
- //printf("\nidx: %d\n", idx);
- if (idx > NAME_MAX_LENGTH) {
- /* name length exceeded implementation limit */
- return -1;
- }
- name[idx] = 0;
-
- //printf("name: %s\n", name);
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return -1; /* memory error */
- }
-
- cosobj->type = COS_TYPE_NAME;
- cosobj->u.n = strdup(name);
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return 0;
-}
-
-
-int cos_decode_boolean(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
- bool value;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if ((c == 't') || (c == 'T')) {
- /* true branch */
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'r') && (c != 'R')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'u') && (c != 'U')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'e') && (c != 'E')) {
- return -1; /* syntax error */
- }
- value = true;
-
- } else if ((c == 'f') || (c == 'F')) {
- /* false branch */
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'a') && (c != 'A')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 's') && (c != 'S')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'e') && (c != 'E')) {
- return -1; /* syntax error */
- }
-
- value = false;
-
- } else {
- return -1; /* syntax error */
- }
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return -1; /* memory error */
- }
-
- cosobj->type = COS_TYPE_BOOL;
- cosobj->u.b = value;
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return 0;
-
-}
-
-int cos_decode_null(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj;
- uint8_t c;
-
- offset = *offset_out;
-
- c = DOC_BYTE(doc, offset++);
- if ((c != 'n') && (c != 'N')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'u') && (c != 'U')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
- }
- c = DOC_BYTE(doc, offset++);
- if ((c != 'l') && (c != 'L')) {
- return -1; /* syntax error */
- }
-
- doc_skip_ws(doc, &offset);
-
- cosobj = calloc(1, sizeof(struct cos_object));
- if (cosobj == NULL) {
- return -1; /* memory error */
- }
-
- cosobj->type = COS_TYPE_NULL;
- *offset_out = offset;
-
- return 0;
-}
-
-/**
- * attempt to decode the stream into a reference
- *
- * The stream has already had a positive integer decoded from it. if another
- * positive integer follows and a R character after that it is a reference,
- * otherwise bail, but not finding a ref is not an error!
- *
- * \param doc the pdf document
- * \param offset_out offset of current cursor in stream
- * \param cosobj_out the object to return into, on input contains the first
- * integer
- */
-int cos_attempt_decode_reference(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- struct cos_object *cosobj; /* possible generation object */
- uint8_t c;
- int res;
- struct cos_reference *nref; /* new reference */
-
- offset = *offset_out;
-
- res = cos_decode_number(doc, &offset, &cosobj);
- if (res != 0) {
- return 0; /* no error if object could not be decoded */
- }
-
- if (cosobj->type != COS_TYPE_INT) {
- /* next object was not an integer so not a reference */
- cos_free_object(cosobj);
- return 0;
- }
-
- if (cosobj->u.i < 0) {
- /* integer was negative so not a reference (generations must be
- * non-negative
- */
- cos_free_object(cosobj);
- return 0;
-
- }
-
- /* two int in a row, look for the R */
- c = DOC_BYTE(doc, offset++);
- if (c != 'R') {
- /* no R so not a reference */
- cos_free_object(cosobj);
- return 0;
- }
-
- /* found reference */
-
- printf("found reference\n");
- doc_skip_ws(doc, &offset);
-
- nref = calloc(1, sizeof(struct cos_reference));
- if (nref == NULL) {
- /* todo free objects */
- return -1; /* memory error */
- }
-
- nref->id = (*cosobj_out)->u.i;
- nref->generation = cosobj->u.i;
-
- cos_free_object(*cosobj_out);
-
- cosobj->type = COS_TYPE_REFERENCE;
- cosobj->u.reference = nref;
-
- *cosobj_out = cosobj;
-
- *offset_out = offset;
-
- return 0;
-}
-
-/**
- * Decode input stream into an object
- *
- * lex and parse a byte stream to generate COS objects
- *
- * lexing the input.
- * check first character:
- *
- * < either a hex string or a dictionary
- * second char < means dictionary else hex string
- * - either an integer or real
- * + either an integer or real
- * 0-9 an integer, unsigned integer or real
- * . a real number
- * ( a string
- * / a name
- * [ a list
- * t|T boolean true
- * f|F boolean false
- * n|N null
- *
- * Grammar is:
- * cos_object:
- * TOK_NULL |
- * TOK_BOOLEAN |
- * TOK_INT |
- * TOK_REAL |
- * TOK_NAME |
- * TOK_STRING |
- * list |
- * dictionary |
- * object_reference;
- *
- * list:
- * '[' listargs ']';
- *
- * listargs:
- * cos_object
- * |
- * listargs cos_object
- * ;
- *
- * object_reference:
- * TOK_UINT TOK_UINT 'R';
- */
-int cos_decode_object(struct pdf_doc *doc,
- uint64_t *offset_out,
- struct cos_object **cosobj_out)
-{
- uint64_t offset;
- int res;
- struct cos_object *cosobj;
-
- offset = *offset_out;
-
- /* object could be any type use first char to try and select */
- switch (DOC_BYTE(doc, offset)) {
-
- case '-':
- case '+':
- case '.':
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- res = cos_decode_number(doc, &offset, &cosobj);
- /* if type is positive integer try to check for reference */
- if ((res == 0) &&
- (cosobj->type == COS_TYPE_INT) &&
- (cosobj->u.i > 0)) {
- res = cos_attempt_decode_reference(doc, &offset, &cosobj);
- }
- break;
-
- case '<':
- if (DOC_BYTE(doc, offset + 1) == '<') {
- res = cos_decode_dictionary(doc, &offset, &cosobj);
- } else {
- res = cos_decode_hex_string(doc, &offset, &cosobj);
- }
- break;
-
- case '(':
- res = cos_decode_string(doc, &offset, &cosobj);
- break;
-
- case '/':
- res = cos_decode_name(doc, &offset, &cosobj);
- break;
-
- case '[':
- res = cos_decode_list(doc, &offset, &cosobj);
- break;
-
- case 't':
- case 'T':
- case 'f':
- case 'F':
- res = cos_decode_boolean(doc, &offset, &cosobj);
- break;
-
- case 'n':
- case 'N':
- res = cos_decode_null(doc, &offset, &cosobj);
- break;
-
- default:
- res = -1; /* syntax error */
- }
-
-
- if (res == 0) {
- *cosobj_out = cosobj;
- *offset_out = offset;
- }
-
- return res;
-}
@@ -1335,7 +337,7 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
if ((DOC_BYTE(doc, offset++) == 'n')) {
if (objnumber < doc->xref_size) {
- struct cos_indirect_object *indobj;
+ struct xref_table_entry *indobj;
indobj = doc->xref_table + objnumber;
indobj->ref.id = objnumber;
@@ -1357,69 +359,6 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
return NSPDFERROR_OK;
}
-nspdferror
-cos_dictionary_get_value(struct cos_object *dict,
- const char *key,
- struct cos_object **value_out)
-{
- struct cos_dictionary_entry *entry;
-
- if (dict->type != COS_TYPE_DICTIONARY) {
- return NSPDFERROR_TYPE;
- }
-
- entry = dict->u.dictionary;
- while (entry != NULL) {
- if (strcmp(entry->key->u.n, key) == 0) {
- *value_out = entry->value;
- return NSPDFERROR_OK;
- }
- entry = entry->next;
- }
- return NSPDFERROR_NOTFOUND;
-}
-
-/**
- * extracts a value for a key in a dictionary.
- *
- * this finds and returns a value for a given key removing it from a dictionary
- */
-nspdferror
-cos_dictionary_extract_value(struct cos_object *dict,
- const char *key,
- struct cos_object **value_out)
-{
- struct cos_dictionary_entry *entry;
- struct cos_dictionary_entry **prev;
-
- if (dict->type != COS_TYPE_DICTIONARY) {
- return NSPDFERROR_TYPE;
- }
-
- prev = &dict->u.dictionary;
- entry = *prev;
- while (entry != NULL) {
- if (strcmp(entry->key->u.n, key) == 0) {
- *value_out = entry->value;
- *prev = entry->next;
- cos_free_object(entry->key);
- free(entry);
- return NSPDFERROR_OK;
- }
- prev = &entry->next;
- entry = *prev;
- }
- return NSPDFERROR_NOTFOUND;
-}
-
-nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out)
-{
- if (cobj->type != COS_TYPE_INT) {
- return NSPDFERROR_TYPE;
- }
- *value_out = cobj->u.i;
- return NSPDFERROR_OK;
-}
/**
* recursively parse trailers and xref tables
@@ -1480,7 +419,7 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
goto decode_xref_trailer_failed;
}
- doc->xref_table = calloc(size, sizeof(struct cos_indirect_object));
+ doc->xref_table = calloc(size, sizeof(struct xref_table_entry));
if (doc->xref_table == NULL) {
res = NSPDFERROR_NOMEM;
goto decode_xref_trailer_failed;
@@ -1580,7 +519,12 @@ nspdferror decode_trailers(struct pdf_doc *doc)
nspdferror decode_catalog(struct pdf_doc *doc)
{
- return NSPDFERROR_OK;
+ nspdferror res;
+ struct cos_object *catalog;
+
+ res = cos_get_dictionary(doc->root, &catalog);
+
+ return res;
}
nspdferror new_pdf_doc(struct pdf_doc **doc_out)