7 files changed, 1139 insertions, 1068 deletions
diff --git a/src/Makefile b/src/Makefile
index f9ca22c..af806f3 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -2,11 +2,13 @@
 
 CFLAGS+=-g -Wall -Wextra
 
+OBJS=xref.o byte_class.o cos_decode.o cos_object.o pdf_doc.o
+
 .PHONY:all clean
 
 all:xref
 
-xref:xref.o byte_class.o
+xref:$(OBJS)
 
 clean:
-	${RM} xref xref.o
+	${RM} xref $(OBJS)
diff --git a/src/cos_decode.c b/src/cos_decode.c
new file mode 100644
index 0000000..3936e05
--- /dev/null
+++ b/src/cos_decode.c
@@ -0,0 +1,799 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "byte_class.h"
+#include "nspdferror.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+#define COS_STRING_ALLOC 32
+
+nspdferror
+cos_string_append(struct cos_string *s, uint8_t c)
+{
+    //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc);
+    if (s->length == s->alloc) {
+        uint8_t *ns;
+        ns = realloc(s->data, s->alloc + COS_STRING_ALLOC);
+        if (ns == NULL) {
+            return NSPDFERROR_NOMEM;
+        }
+        s->data = ns;
+        s->alloc += COS_STRING_ALLOC;
+    }
+    s->data[s->length++] = c;
+    return NSPDFERROR_OK;
+}
+
+uint8_t xtoi(uint8_t x)
+{
+    if (x >= '0' && x <= '9') {
+        x = x - '0';
+    } else if (x >= 'a' && x <='f') {
+        x = x - 'a' + 10;
+    } else if (x >= 'A' && x <='F') {
+        x = x - 'A' + 10;
+    }
+    return x;
+}
+
+int cos_decode_number(struct pdf_doc *doc,
+                      uint64_t *offset_out,
+                      struct cos_object **cosobj_out)
+{
+    struct cos_object *cosobj;
+    uint8_t c; /* current byte from source data */
+    unsigned int len; /* number of decimal places in number */
+    uint8_t num[21]; /* temporary buffer for decimal values */
+    uint64_t offset; /* current offset of source data */
+
+    offset = *offset_out;
+
+    for (len = 0; len < sizeof(num); len++) {
+        c = DOC_BYTE(doc, offset);
+        if ((bclass[c] & BC_DCML) != BC_DCML) {
+            int64_t result = 0; /* parsed result */
+            uint64_t tens;
+
+            if (len == 0) {
+                return -2; /* parse error no decimals in input */
+            }
+            /* sum value from each place */
+            for (tens = 1; len > 0; tens = tens * 10, len--) {
+                result += (num[len - 1] * tens);
+            }
+
+            doc_skip_ws(doc, &offset);
+
+            cosobj = calloc(1, sizeof(struct cos_object));
+            if (cosobj == NULL) {
+                return -1; /* memory error */
+            }
+
+            cosobj->type = COS_TYPE_INT;
+            cosobj->u.i = result;
+
+            *cosobj_out = cosobj;
+
+            *offset_out = offset;
+
+            return 0;
+        }
+        num[len] = c - '0';
+        offset++;
+    }
+    return -1; /* number too long */
+}
+
+
+/**
+ * literal string processing
+ *
+ */
+nspdferror
+cos_decode_string(struct pdf_doc *doc,
+                  uint64_t *offset_out,
+                  struct cos_object **cosobj_out)
+{
+    uint64_t offset;
+    struct cos_object *cosobj;
+    uint8_t c;
+    unsigned int pdepth = 1; /* depth of open parens */
+    struct cos_string *cstring;
+
+    offset = *offset_out;
+
+    c = DOC_BYTE(doc, offset++);
+    if (c != '(') {
+        return NSPDFERROR_SYNTAX;
+    }
+
+    cstring = calloc(1, sizeof(*cstring));
+    if (cstring == NULL) {
+        return NSPDFERROR_NOMEM;
+    }
+
+    cosobj = calloc(1, sizeof(*cosobj));
+    if (cosobj == NULL) {
+        return NSPDFERROR_NOMEM;
+    }
+    cosobj->type = COS_TYPE_STRING;
+    cosobj->u.s = cstring;
+
+    while (pdepth > 0) {
+        c = DOC_BYTE(doc, offset++);
+
+        if (c == ')') {
+            pdepth--;
+            if (pdepth == 0) {
+                break;
+            }
+        } else if (c == '(') {
+            pdepth++;
+        } else if ((bclass[c] & BC_EOLM ) != 0) {
+            /* unescaped end of line characters are translated to a single
+             * newline
+             */
+            c = DOC_BYTE(doc, offset);
+            while ((bclass[c] & BC_EOLM) != 0) {
+                offset++;
+                c = DOC_BYTE(doc, offset);
+            }
+            c = '\n';
+        } else if (c == '\\') {
+            /* escaped chars */
+            c = DOC_BYTE(doc, offset++);
+            switch (c) {
+            case 'n':
+                c = '\n';
+                break;
+
+            case 'r':
+                c = '\r';
+                break;
+
+            case 't':
+                c = '\t';
+                break;
+
+            case 'b':
+                c = '\b';
+                break;
+
+            case 'f':
+                c = '\f';
+                break;
+
+            case '(':
+                c = '(';
+                break;
+
+            case ')':
+                c = ')';
+                break;
+
+            case '\\':
+                c = '\\';
+                break;
+
+            default:
+
+                if ((bclass[c] & BC_EOLM) != 0) {
+                    /* escaped end of line, swallow it */
+                    c = DOC_BYTE(doc, offset++);
+                    while ((bclass[c] & BC_EOLM) != 0) {
+                        c = DOC_BYTE(doc, offset++);
+                    }
+                } else if ((bclass[c] & BC_OCTL) != 0) {
+                    /* octal value */
+                    uint8_t val;
+                    val = (c - '0');
+                    c = DOC_BYTE(doc, offset);
+                    if ((bclass[c] & BC_OCTL) != 0) {
+                        offset++;
+                        val = (val << 3) | (c - '0');
+                        c = DOC_BYTE(doc, offset);
+                        if ((bclass[c] & BC_OCTL) != 0) {
+                            offset++;
+                            val = (val << 3) | (c - '0');
+                            c = val;
+                        }
+                    }
+                } /* else invalid (skip backslash) */
+                break;
+            }
+        }
+
+        /* c contains the character to add to the string */
+        cos_string_append(cstring, c);
+    }
+
+    doc_skip_ws(doc, &offset);
+
+    *cosobj_out = cosobj;
+    *offset_out = offset;
+
+    return NSPDFERROR_OK;
+}
+
+
+nspdferror
+cos_decode_hex_string(struct pdf_doc *doc,
+                      uint64_t *offset_out,
+                      struct cos_object **cosobj_out)
+{
+    uint64_t offset;
+    struct cos_object *cosobj;
+    uint8_t c;
+    uint8_t value = 0;
+    struct cos_string *cstring;
+    bool first = true;
+
+    offset = *offset_out;
+
+    c = DOC_BYTE(doc, offset++);
+    if (c != '<') {
+        return NSPDFERROR_SYNTAX;
+    }
+
+    cstring = calloc(1, sizeof(*cstring));
+    if (cstring == NULL) {
+        return NSPDFERROR_NOMEM;
+    }
+
+    cosobj = calloc(1, sizeof(*cosobj));
+    if (cosobj == NULL) {
+        return NSPDFERROR_NOMEM;
+    }
+    cosobj->type = COS_TYPE_STRING;
+    cosobj->u.s = cstring;
+
+    for (; offset < doc->length; offset++) {
+        c = DOC_BYTE(doc, offset);
+        if (c == '>') {
+            if (first == false) {
+                cos_string_append(cstring, value);
+            }
+            offset++;
+            doc_skip_ws(doc, &offset);
+
+            *cosobj_out = cosobj;
+            *offset_out = offset;
+
+            return NSPDFERROR_OK;
+        } else if ((bclass[c] & BC_HEXL) != 0) {
+            if (first) {
+                value = xtoi(c) << 4;
+                first = false;
+            } else {
+                value |= xtoi(c);
+                first = true;
+                cos_string_append(cstring, value);
+            }
+        } else if ((bclass[c] & BC_WSPC) == 0) {
+            break; /* unknown byte value in string */
+        }
+    }
+    return NSPDFERROR_SYNTAX;
+}
+
+
+int cos_decode_dictionary(struct pdf_doc *doc,
+                      uint64_t *offset_out,
+                      struct cos_object **cosobj_out)
+{
+    uint64_t offset;
+    struct cos_object *cosobj;
+    struct cos_dictionary_entry *entry;
+    struct cos_object *key;
+    struct cos_object *value;
+    int res;
+
+    offset = *offset_out;
+
+    if ((DOC_BYTE(doc, offset) != '<') ||
+        (DOC_BYTE(doc, offset + 1) != '<')) {
+        return -1; /* syntax error */
+    }
+    offset += 2;
+    doc_skip_ws(doc, &offset);
+
+    printf("found a dictionary\n");
+
+    cosobj = calloc(1, sizeof(struct cos_object));
+    if (cosobj == NULL) {
+        return -1; /* memory error */
+    }
+    cosobj->type = COS_TYPE_DICTIONARY;
+
+    while ((DOC_BYTE(doc, offset) != '>') &&
+           (DOC_BYTE(doc, offset + 1) != '>')) {
+
+        res = cos_decode_object(doc, &offset, &key);
+        if (res != 0) {
+            /* todo free up any dictionary entries already created */
+            printf("key object decode failed\n");
+            return res;
+        }
+        if (key->type != COS_TYPE_NAME) {
+            /* key value pairs without a name */
+            printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
+            return -1; /* syntax error */
+        }
+        printf("key: %s\n", key->u.n);
+
+        res = cos_decode_object(doc, &offset, &value);
+        if (res != 0) {
+            printf("Unable to decode value object in dictionary\n");
+            /* todo free up any dictionary entries already created */
+            return res;
+        }
+
+        /* add dictionary entry */
+        entry = calloc(1, sizeof(struct cos_dictionary_entry));
+        if (entry == NULL) {
+            /* todo free up any dictionary entries already created */
+            return -1; /* memory error */
+        }
+
+        entry->key = key;
+        entry->value = value;
+        entry->next = cosobj->u.dictionary;
+
+        cosobj->u.dictionary = entry;
+
+    }
+    offset += 2; /* skip closing >> */
+    doc_skip_ws(doc, &offset);
+
+    *cosobj_out = cosobj;
+    *offset_out = offset;
+
+    return 0;
+}
+
+
+nspdferror
+cos_decode_list(struct pdf_doc *doc,
+                uint64_t *offset_out,
+                struct cos_object **cosobj_out)
+{
+    uint64_t offset;
+    struct cos_object *cosobj;
+    struct cos_array_entry *entry;
+    struct cos_object *value;
+    nspdferror res;
+
+    offset = *offset_out;
+
+    /* sanity check first token is list open */
+    if (DOC_BYTE(doc, offset) != '[') {
+        printf("not a [\n");
+        return NSPDFERROR_SYNTAX; /* syntax error */
+    }
+    offset++;
+
+    /* advance offset to next token */
+    res = doc_skip_ws(doc, &offset);
+    if (res != NSPDFERROR_OK) {
+        return res;
+    }
+
+    printf("found a list\n");
+
+    cosobj = calloc(1, sizeof(struct cos_object));
+    if (cosobj == NULL) {
+        return NSPDFERROR_NOMEM;
+    }
+    cosobj->type = COS_TYPE_ARRAY;
+
+    while (DOC_BYTE(doc, offset) != ']') {
+
+        res = cos_decode_object(doc, &offset, &value);
+        if (res != NSPDFERROR_OK) {
+            cos_free_object(cosobj);
+            printf("Unable to decode value object in list\n");
+            return res;
+        }
+
+        /* add entry to array */
+        entry = calloc(1, sizeof(struct cos_array_entry));
+        if (entry == NULL) {
+            cos_free_object(cosobj);
+            return NSPDFERROR_NOMEM;
+        }
+
+        entry->value = value;
+        entry->next = cosobj->u.array;
+
+        cosobj->u.array = entry;
+    }
+    offset++; /* skip closing ] */
+
+    doc_skip_ws(doc, &offset);
+
+    *cosobj_out = cosobj;
+    *offset_out = offset;
+
+    return 0;
+}
+
+#define NAME_MAX_LENGTH 127
+
+/**
+ * decode a name object
+ *
+ * \todo deal with # symbols on pdf versions 1.2 and later
+ */
+int cos_decode_name(struct pdf_doc *doc,
+                      uint64_t *offset_out,
+                      struct cos_object **cosobj_out)
+{
+    uint64_t offset;
+    struct cos_object *cosobj;
+    uint8_t c;
+    char name[NAME_MAX_LENGTH + 1];
+    int idx = 0;
+
+    offset = *offset_out;
+
+    c = DOC_BYTE(doc, offset++);
+    if (c != '/') {
+        return -1; /* names must be prefixed with a / */
+    }
+    printf("found a name\n");
+
+    c = DOC_BYTE(doc, offset);
+    while ((idx <= NAME_MAX_LENGTH) &&
+           ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
+        offset++;
+        //printf("%c", c);
+        name[idx++] = c;
+        c = DOC_BYTE(doc, offset);
+    }
+    //printf("\nidx: %d\n", idx);
+    if (idx > NAME_MAX_LENGTH) {
+        /* name length exceeded implementation limit */
+        return -1;
+    }
+    name[idx] = 0;
+
+    //printf("name: %s\n", name);
+
+    doc_skip_ws(doc, &offset);
+
+    cosobj = calloc(1, sizeof(struct cos_object));
+    if (cosobj == NULL) {
+        return -1; /* memory error */
+    }
+
+    cosobj->type = COS_TYPE_NAME;
+    cosobj->u.n = strdup(name);
+
+    *cosobj_out = cosobj;
+
+    *offset_out = offset;
+
+    return 0;
+}
+
+
+int cos_decode_boolean(struct pdf_doc *doc,
+                      uint64_t *offset_out,
+                      struct cos_object **cosobj_out)
+{
+    uint64_t offset;
+    struct cos_object *cosobj;
+    uint8_t c;
+    bool value;
+
+    offset = *offset_out;
+
+    c = DOC_BYTE(doc, offset++);
+    if ((c == 't') || (c == 'T')) {
+        /* true branch */
+
+        c = DOC_BYTE(doc, offset++);
+        if ((c != 'r') && (c != 'R')) {
+            return -1; /* syntax error */
+        }
+        c = DOC_BYTE(doc, offset++);
+        if ((c != 'u') && (c != 'U')) {
+            return -1; /* syntax error */
+        }
+        c = DOC_BYTE(doc, offset++);
+        if ((c != 'e') && (c != 'E')) {
+            return -1; /* syntax error */
+        }
+        value = true;
+
+    } else if ((c == 'f') || (c == 'F')) {
+        /* false branch */
+
+        c = DOC_BYTE(doc, offset++);
+        if ((c != 'a') && (c != 'A')) {
+            return -1; /* syntax error */
+        }
+        c = DOC_BYTE(doc, offset++);
+        if ((c != 'l') && (c != 'L')) {
+            return -1; /* syntax error */
+        }
+        c = DOC_BYTE(doc, offset++);
+        if ((c != 's') && (c != 'S')) {
+            return -1; /* syntax error */
+        }
+        c = DOC_BYTE(doc, offset++);
+        if ((c != 'e') && (c != 'E')) {
+            return -1; /* syntax error */
+        }
+
+        value = false;
+
+    } else {
+        return -1; /* syntax error */
+    }
+
+    doc_skip_ws(doc, &offset);
+
+    cosobj = calloc(1, sizeof(struct cos_object));
+    if (cosobj == NULL) {
+        return -1; /* memory error */
+    }
+
+    cosobj->type = COS_TYPE_BOOL;
+    cosobj->u.b = value;
+
+    *cosobj_out = cosobj;
+
+    *offset_out = offset;
+
+    return 0;
+
+}
+
+int cos_decode_null(struct pdf_doc *doc,
+                      uint64_t *offset_out,
+                      struct cos_object **cosobj_out)
+{
+    uint64_t offset;
+    struct cos_object *cosobj;
+    uint8_t c;
+
+    offset = *offset_out;
+
+    c = DOC_BYTE(doc, offset++);
+    if ((c != 'n') && (c != 'N')) {
+        return -1; /* syntax error */
+    }
+    c = DOC_BYTE(doc, offset++);
+    if ((c != 'u') && (c != 'U')) {
+        return -1; /* syntax error */
+    }
+    c = DOC_BYTE(doc, offset++);
+    if ((c != 'l') && (c != 'L')) {
+        return -1; /* syntax error */
+    }
+    c = DOC_BYTE(doc, offset++);
+    if ((c != 'l') && (c != 'L')) {
+        return -1; /* syntax error */
+    }
+
+    doc_skip_ws(doc, &offset);
+
+    cosobj = calloc(1, sizeof(struct cos_object));
+    if (cosobj == NULL) {
+        return -1; /* memory error */
+    }
+
+    cosobj->type = COS_TYPE_NULL;
+    *offset_out = offset;
+
+    return 0;
+}
+
+/**
+ * attempt to decode the stream into a reference
+ *
+ * The stream has already had a positive integer decoded from it. if another
+ * positive integer follows and a R character after that it is a reference,
+ * otherwise bail, but not finding a ref is not an error!
+ *
+ * \param doc the pdf document
+ * \param offset_out offset of current cursor in stream
+ * \param cosobj_out the object to return into, on input contains the first
+ * integer
+ */
+int cos_attempt_decode_reference(struct pdf_doc *doc,
+                      uint64_t *offset_out,
+                      struct cos_object **cosobj_out)
+{
+    uint64_t offset;
+    struct cos_object *cosobj; /* possible generation object */
+    uint8_t c;
+    int res;
+    struct cos_reference *nref; /* new reference */
+
+    offset = *offset_out;
+
+    res = cos_decode_number(doc, &offset, &cosobj);
+    if (res != 0) {
+        return 0; /* no error if object could not be decoded */
+    }
+
+    if (cosobj->type != COS_TYPE_INT) {
+        /* next object was not an integer so not a reference */
+        cos_free_object(cosobj);
+        return 0;
+    }
+
+    if (cosobj->u.i < 0) {
+        /* integer was negative so not a reference (generations must be
+         * non-negative
+         */
+        cos_free_object(cosobj);
+        return 0;
+
+    }
+
+    /* two int in a row, look for the R */
+    c = DOC_BYTE(doc, offset++);
+    if (c != 'R') {
+        /* no R so not a reference */
+        cos_free_object(cosobj);
+        return 0;
+    }
+
+    /* found reference */
+
+    printf("found reference\n");
+    doc_skip_ws(doc, &offset);
+
+    nref = calloc(1, sizeof(struct cos_reference));
+    if (nref == NULL) {
+        /* todo free objects */
+        return -1; /* memory error */
+    }
+
+    nref->id = (*cosobj_out)->u.i;
+    nref->generation = cosobj->u.i;
+
+    cos_free_object(*cosobj_out);
+
+    cosobj->type = COS_TYPE_REFERENCE;
+    cosobj->u.reference = nref;
+
+    *cosobj_out = cosobj;
+
+    *offset_out = offset;
+
+    return 0;
+}
+
+/**
+ * Decode input stream into an object
+ *
+ * lex and parse a byte stream to generate COS objects
+ *
+ * lexing the input.
+ *  check first character:
+ *
+ * < either a hex string or a dictionary
+ *     second char < means dictionary else hex string
+ * - either an integer or real
+ * + either an integer or real
+ * 0-9 an integer, unsigned integer or real
+ * . a real number
+ * ( a string
+ * / a name
+ * [ a list
+ * t|T boolean true
+ * f|F boolean false
+ * n|N null
+ *
+ * Grammar is:
+ * cos_object:
+ *   TOK_NULL |
+ *   TOK_BOOLEAN |
+ *   TOK_INT |
+ *   TOK_REAL |
+ *   TOK_NAME |
+ *   TOK_STRING |
+ *   list |
+ *   dictionary |
+ *   object_reference;
+ *
+ * list:
+ *   '[' listargs ']';
+ *
+ * listargs:
+ *   cos_object
+ *   |
+ *   listargs cos_object
+ *   ;
+ *
+ * object_reference:
+ *   TOK_UINT TOK_UINT 'R';
+ */
+int cos_decode_object(struct pdf_doc *doc,
+                      uint64_t *offset_out,
+                      struct cos_object **cosobj_out)
+{
+    uint64_t offset;
+    int res;
+    struct cos_object *cosobj;
+
+    offset = *offset_out;
+
+    /* object could be any type use first char to try and select */
+    switch (DOC_BYTE(doc, offset)) {
+
+    case '-':
+    case '+':
+    case '.':
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+        res = cos_decode_number(doc, &offset, &cosobj);
+        /* if type is positive integer try to check for reference */
+        if ((res == 0) &&
+            (cosobj->type == COS_TYPE_INT) &&
+            (cosobj->u.i > 0)) {
+            res = cos_attempt_decode_reference(doc, &offset, &cosobj);
+        }
+        break;
+
+    case '<':
+        if (DOC_BYTE(doc, offset + 1) == '<') {
+            res = cos_decode_dictionary(doc, &offset, &cosobj);
+        } else {
+            res = cos_decode_hex_string(doc, &offset, &cosobj);
+        }
+        break;
+
+    case '(':
+        res = cos_decode_string(doc, &offset, &cosobj);
+        break;
+
+    case '/':
+        res = cos_decode_name(doc, &offset, &cosobj);
+        break;
+
+    case '[':
+        res = cos_decode_list(doc, &offset, &cosobj);
+        break;
+
+    case 't':
+    case 'T':
+    case 'f':
+    case 'F':
+        res = cos_decode_boolean(doc, &offset, &cosobj);
+        break;
+
+    case 'n':
+    case 'N':
+        res = cos_decode_null(doc, &offset, &cosobj);
+        break;
+
+    default:
+        res = -1; /* syntax error */
+    }
+
+
+    if (res == 0) {
+        *cosobj_out = cosobj;
+        *offset_out = offset;
+    }
+
+    return res;
+}
diff --git a/src/cos_object.c b/src/cos_object.c
new file mode 100644
index 0000000..96c669e
--- /dev/null
+++ b/src/cos_object.c
@@ -0,0 +1,139 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "nspdferror.h"
+#include "cos_object.h"
+
+
+nspdferror cos_free_object(struct cos_object *cos_obj)
+{
+    struct cos_dictionary_entry *dentry;
+    struct cos_array_entry *aentry;
+
+    switch (cos_obj->type) {
+    case COS_TYPE_NAME:
+        free(cos_obj->u.n);
+        break;
+
+    case COS_TYPE_STRING:
+        free(cos_obj->u.s->data);
+        free(cos_obj->u.s);
+        break;
+
+    case COS_TYPE_DICTIONARY:
+        dentry = cos_obj->u.dictionary;
+        while (dentry != NULL) {
+            struct cos_dictionary_entry *odentry;
+
+            cos_free_object(dentry->key);
+            cos_free_object(dentry->value);
+
+            odentry = dentry;
+            dentry = dentry->next;
+            free(odentry);
+        }
+        break;
+
+    case COS_TYPE_ARRAY:
+        aentry = cos_obj->u.array;
+        while (aentry != NULL) {
+            struct cos_array_entry *oaentry;
+
+            cos_free_object(aentry->value);
+
+            oaentry = aentry;
+            aentry = aentry->next;
+            free(oaentry);
+        }
+
+    case COS_TYPE_STREAM:
+        free(cos_obj->u.stream);
+        break;
+
+    }
+    free(cos_obj);
+
+    return NSPDFERROR_OK;
+}
+
+nspdferror
+cos_dictionary_get_value(struct cos_object *dict,
+                         const char *key,
+                         struct cos_object **value_out)
+{
+    struct cos_dictionary_entry *entry;
+
+    if (dict->type != COS_TYPE_DICTIONARY) {
+        return NSPDFERROR_TYPE;
+    }
+
+    entry = dict->u.dictionary;
+    while (entry != NULL) {
+        if (strcmp(entry->key->u.n, key) == 0) {
+            *value_out = entry->value;
+            return NSPDFERROR_OK;
+        }
+        entry = entry->next;
+    }
+    return NSPDFERROR_NOTFOUND;
+}
+
+/**
+ * extracts a value for a key in a dictionary.
+ *
+ * this finds and returns a value for a given key removing it from a dictionary
+ */
+nspdferror
+cos_dictionary_extract_value(struct cos_object *dict,
+                             const char *key,
+                             struct cos_object **value_out)
+{
+    struct cos_dictionary_entry *entry;
+    struct cos_dictionary_entry **prev;
+
+    if (dict->type != COS_TYPE_DICTIONARY) {
+        return NSPDFERROR_TYPE;
+    }
+
+    prev = &dict->u.dictionary;
+    entry = *prev;
+    while (entry != NULL) {
+        if (strcmp(entry->key->u.n, key) == 0) {
+            *value_out = entry->value;
+            *prev = entry->next;
+            cos_free_object(entry->key);
+            free(entry);
+            return NSPDFERROR_OK;
+        }
+        prev = &entry->next;
+        entry = *prev;
+    }
+    return NSPDFERROR_NOTFOUND;
+}
+
+nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out)
+{
+    if (cobj->type != COS_TYPE_INT) {
+        return NSPDFERROR_TYPE;
+    }
+    *value_out = cobj->u.i;
+    return NSPDFERROR_OK;
+}
+
+nspdferror
+cos_get_dictionary(struct cos_object *cobj,
+                   struct cos_object **value_out)
+{
+    if (cobj->type == COS_TYPE_REFERENCE) {
+        
+    }
+    if (cobj->type != COS_TYPE_DICTIONARY) {
+        return NSPDFERROR_TYPE;
+    }
+    *value_out = cobj;
+    return NSPDFERROR_OK;
+}
diff --git a/src/cos_object.h b/src/cos_object.h
new file mode 100644
index 0000000..65b3ed5
--- /dev/null
+++ b/src/cos_object.h
@@ -0,0 +1,98 @@
+struct pdf_doc;
+
+enum cos_type {
+    COS_TYPE_NULL,
+    COS_TYPE_BOOL,
+    COS_TYPE_INT,
+    COS_TYPE_REAL,
+    COS_TYPE_NAME,
+    COS_TYPE_STRING,
+    COS_TYPE_ARRAY,
+    COS_TYPE_DICTIONARY,
+    COS_TYPE_NAMETREE,
+    COS_TYPE_NUMBERTREE,
+    COS_TYPE_STREAM,
+    COS_TYPE_REFERENCE,
+};
+
+struct cos_object;
+
+struct cos_dictionary_entry {
+    /** next key/value in dictionary */
+    struct cos_dictionary_entry *next;
+
+    /** key (name) */
+    struct cos_object *key;
+
+    /** value */
+    struct cos_object *value;
+};
+
+struct cos_array_entry {
+    /** next value in array */
+    struct cos_array_entry *next;
+
+    /** value */
+    struct cos_object *value;
+};
+
+struct cos_string {
+    uint8_t *data;
+    size_t length;
+    size_t alloc;
+};
+
+struct cos_reference {
+    /** id of indirect object */
+    uint64_t id;
+
+    /* generation of indirect object */
+    uint64_t generation;
+};
+
+struct cos_object {
+    int type;
+    union {
+        /** boolean */
+        bool b;
+
+        /** integer */
+        int64_t i;
+
+        /** real */
+        double r;
+
+        /** name */
+        char *n;
+
+        /** string */
+        struct cos_string *s;
+
+        /** stream data */
+        uint8_t *stream;
+
+        /* dictionary */
+        struct cos_dictionary_entry *dictionary;
+
+        /* array */
+        struct cos_array_entry *array;
+
+        /** reference */
+        struct cos_reference *reference;
+
+    } u;
+};
+
+int cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out);
+
+nspdferror cos_free_object(struct cos_object *cos_obj);
+
+nspdferror cos_dictionary_get_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+nspdferror cos_dictionary_extract_value(struct cos_object *dict, const char *key, struct cos_object **value_out);
+
+nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out);
+
+nspdferror cos_get_dictionary(struct cos_object *cobj, struct cos_object **value_out);
+
+
diff --git a/src/pdf_doc.c b/src/pdf_doc.c
new file mode 100644
index 0000000..9b92bd0
--- /dev/null
+++ b/src/pdf_doc.c
@@ -0,0 +1,47 @@
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include "nspdferror.h"
+#include "byte_class.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
+
+/**
+ * move offset to next non whitespace byte
+ */
+int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
+{
+    uint8_t c;
+    /* TODO sort out keeping offset in range */
+    c = DOC_BYTE(doc, *offset);
+    while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
+        (*offset)++;
+        /* skip comments */
+        if ((bclass[c] & BC_CMNT) != 0) {
+            c = DOC_BYTE(doc, *offset);
+            while ((bclass[c] & BC_EOLM ) == 0) {
+                (*offset)++;
+                c = DOC_BYTE(doc, *offset);
+            }
+        }
+        c = DOC_BYTE(doc, *offset);
+    }
+    return 0;
+}
+
+/**
+ * move offset to next non eol byte
+ */
+int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
+{
+    uint8_t c;
+    /* TODO sort out keeping offset in range */
+    c = DOC_BYTE(doc, *offset);
+    while ((bclass[c] & BC_EOLM) != 0) {
+        (*offset)++;
+        c = DOC_BYTE(doc, *offset);
+    }
+    return 0;
+}
diff --git a/src/pdf_doc.h b/src/pdf_doc.h
new file mode 100644
index 0000000..696c121
--- /dev/null
+++ b/src/pdf_doc.h
@@ -0,0 +1,42 @@
+/** indirect object */
+struct xref_table_entry {
+    /* reference identifier */
+    struct cos_reference ref;
+
+    /** offset of object */
+    uint64_t offset;
+
+    /* indirect object if already decoded */
+    struct cos_object *o;
+};
+
+
+/** pdf document */
+struct pdf_doc {
+    uint8_t *buffer;
+    uint64_t buffer_length;
+
+    uint8_t *start; /* start of pdf document in input stream */
+    uint64_t length;
+
+    int major;
+    int minor;
+
+    /**
+     * Indirect object cross reference table
+     */
+    uint64_t xref_size;
+    struct xref_table_entry *xref_table;
+
+    struct cos_object *root;
+    struct cos_object *encrypt;
+    struct cos_object *info;
+    struct cos_object *id;
+
+};
+
+/* byte data acessory, allows for more complex buffer handling in future */
+#define DOC_BYTE(doc, offset) (doc->start[(offset)])
+
+int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset);
+int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset);
diff --git a/src/xref.c b/src/xref.c
index d6a07b8..5e5ac8b 100644
--- a/src/xref.c
+++ b/src/xref.c
@@ -7,136 +7,12 @@
 
 #include "nspdferror.h"
 #include "byte_class.h"
+#include "cos_object.h"
+#include "pdf_doc.h"
 
 #define SLEN(x) (sizeof((x)) - 1)
 
 
-enum cos_type {
-    COS_TYPE_NULL,
-    COS_TYPE_BOOL,
-    COS_TYPE_INT,
-    COS_TYPE_REAL,
-    COS_TYPE_NAME,
-    COS_TYPE_STRING,
-    COS_TYPE_ARRAY,
-    COS_TYPE_DICTIONARY,
-    COS_TYPE_NAMETREE,
-    COS_TYPE_NUMBERTREE,
-    COS_TYPE_STREAM,
-    COS_TYPE_REFERENCE,
-};
-
-struct cos_object;
-
-struct cos_dictionary_entry {
-    /** next key/value in dictionary */
-    struct cos_dictionary_entry *next;
-
-    /** key (name) */
-    struct cos_object *key;
-
-    /** value */
-    struct cos_object *value;
-};
-
-struct cos_array_entry {
-    /** next value in array */
-    struct cos_array_entry *next;
-
-    /** value */
-    struct cos_object *value;
-};
-
-struct cos_string {
-    uint8_t *data;
-    size_t length;
-    size_t alloc;
-};
-
-struct cos_reference {
-    /** id of indirect object */
-    uint64_t id;
-
-    /* generation of indirect object */
-    uint64_t generation;
-};
-
-struct cos_object {
-    int type;
-    union {
-        /** boolean */
-        bool b;
-
-        /** integer */
-        int64_t i;
-
-        /** real */
-        double r;
-
-        /** name */
-        char *n;
-
-        /** string */
-        struct cos_string *s;
-
-        /** stream data */
-        uint8_t *stream;
-
-        /* dictionary */
-        struct cos_dictionary_entry *dictionary;
-
-        /* array */
-        struct cos_array_entry *array;
-
-        /** reference */
-        struct cos_reference *reference;
-
-    } u;
-};
-
-
-/** indirect object */
-struct cos_indirect_object {
-    /* reference identifier */
-    struct cos_reference ref;
-
-    /** offset of object */
-    uint64_t offset;
-
-    /* direct object if already decoded */
-    struct cos_object *o;
-};
-
-
-/** pdf document */
-struct pdf_doc {
-    uint8_t *buffer;
-    uint64_t buffer_length;
-
-    uint8_t *start; /* start of pdf document in input stream */
-    uint64_t length;
-
-    int major;
-    int minor;
-
-    /**
-     * Indirect object cross reference table
-     */
-    uint64_t xref_size;
-    struct cos_indirect_object *xref_table;
-
-    struct cos_object *root;
-    struct cos_object *encrypt;
-    struct cos_object *info;
-    struct cos_object *id;
-
-};
-
-
-int cos_decode_object(struct pdf_doc *doc,
-                      uint64_t *offset_out,
-                      struct cos_object **cosobj_out);
-
 int
 read_whole_pdf(struct pdf_doc *doc, const char *fname)
 {
@@ -178,46 +54,7 @@ read_whole_pdf(struct pdf_doc *doc, const char *fname)
 #define STARTXREF_SEARCH_SIZE 1024
 
 
-/* byte data acessory, allows for more complex buffer handling in future */
-#define DOC_BYTE(doc, offset) (doc->start[(offset)])
-
-/**
- * move offset to next non whitespace byte
- */
-static int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset)
-{
-    uint8_t c;
-    /* TODO sort out keeping offset in range */
-    c = DOC_BYTE(doc, *offset);
-    while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) {
-        (*offset)++;
-        /* skip comments */
-        if ((bclass[c] & BC_CMNT) != 0) {
-            c = DOC_BYTE(doc, *offset);
-            while ((bclass[c] & BC_EOLM ) == 0) {
-                (*offset)++;
-                c = DOC_BYTE(doc, *offset);
-            }
-        }
-        c = DOC_BYTE(doc, *offset);
-    }
-    return 0;
-}
 
-/**
- * move offset to next non eol byte
- */
-static int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset)
-{
-    uint8_t c;
-    /* TODO sort out keeping offset in range */
-    c = DOC_BYTE(doc, *offset);
-    while ((bclass[c] & BC_EOLM) != 0) {
-        (*offset)++;
-        c = DOC_BYTE(doc, *offset);
-    }
-    return 0;
-}
 
 static nspdferror
 doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out)
@@ -388,842 +225,7 @@ int check_header(struct pdf_doc *doc)
 }
 
 
-nspdferror cos_free_object(struct cos_object *cos_obj)
-{
-    struct cos_dictionary_entry *dentry;
-    struct cos_array_entry *aentry;
-
-    switch (cos_obj->type) {
-    case COS_TYPE_NAME:
-        free(cos_obj->u.n);
-        break;
-
-    case COS_TYPE_STRING:
-        free(cos_obj->u.s->data);
-        free(cos_obj->u.s);
-        break;
-
-    case COS_TYPE_DICTIONARY:
-        dentry = cos_obj->u.dictionary;
-        while (dentry != NULL) {
-            struct cos_dictionary_entry *odentry;
-
-            cos_free_object(dentry->key);
-            cos_free_object(dentry->value);
-
-            odentry = dentry;
-            dentry = dentry->next;
-            free(odentry);
-        }
-        break;
 
-    case COS_TYPE_ARRAY:
-        aentry = cos_obj->u.array;
-        while (aentry != NULL) {
-            struct cos_array_entry *oaentry;
-
-            cos_free_object(aentry->value);
-
-            oaentry = aentry;
-            aentry = aentry->next;
-            free(oaentry);
-        }
-
-    case COS_TYPE_STREAM:
-        free(cos_obj->u.stream);
-        break;
-
-    }
-    free(cos_obj);
-
-    return NSPDFERROR_OK;
-}
-
-int cos_decode_number(struct pdf_doc *doc,
-                      uint64_t *offset_out,
-                      struct cos_object **cosobj_out)
-{
-    struct cos_object *cosobj;
-    uint8_t c; /* current byte from source data */
-    unsigned int len; /* number of decimal places in number */
-    uint8_t num[21]; /* temporary buffer for decimal values */
-    uint64_t offset; /* current offset of source data */
-
-    offset = *offset_out;
-
-    for (len = 0; len < sizeof(num); len++) {
-        c = DOC_BYTE(doc, offset);
-        if ((bclass[c] & BC_DCML) != BC_DCML) {
-            int64_t result = 0; /* parsed result */
-            uint64_t tens;
-
-            if (len == 0) {
-                return -2; /* parse error no decimals in input */
-            }
-            /* sum value from each place */
-            for (tens = 1; len > 0; tens = tens * 10, len--) {
-                result += (num[len - 1] * tens);
-            }
-
-            doc_skip_ws(doc, &offset);
-
-            cosobj = calloc(1, sizeof(struct cos_object));
-            if (cosobj == NULL) {
-                return -1; /* memory error */
-            }
-
-            cosobj->type = COS_TYPE_INT;
-            cosobj->u.i = result;
-
-            *cosobj_out = cosobj;
-
-            *offset_out = offset;
-
-            return 0;
-        }
-        num[len] = c - '0';
-        offset++;
-    }
-    return -1; /* number too long */
-}
-
-#define COS_STRING_ALLOC 32
-
-nspdferror
-cos_string_append(struct cos_string *s, uint8_t c)
-{
-    //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc);
-    if (s->length == s->alloc) {
-        uint8_t *ns;
-        ns = realloc(s->data, s->alloc + COS_STRING_ALLOC);
-        if (ns == NULL) {
-            return NSPDFERROR_NOMEM;
-        }
-        s->data = ns;
-        s->alloc += COS_STRING_ALLOC;
-    }
-    s->data[s->length++] = c;
-    return NSPDFERROR_OK;
-}
-
-/**
- * literal string processing
- *
- */
-nspdferror
-cos_decode_string(struct pdf_doc *doc,
-                  uint64_t *offset_out,
-                  struct cos_object **cosobj_out)
-{
-    uint64_t offset;
-    struct cos_object *cosobj;
-    uint8_t c;
-    unsigned int pdepth = 1; /* depth of open parens */
-    struct cos_string *cstring;
-
-    offset = *offset_out;
-
-    c = DOC_BYTE(doc, offset++);
-    if (c != '(') {
-        return NSPDFERROR_SYNTAX;
-    }
-
-    cstring = calloc(1, sizeof(*cstring));
-    if (cstring == NULL) {
-        return NSPDFERROR_NOMEM;
-    }
-
-    cosobj = calloc(1, sizeof(*cosobj));
-    if (cosobj == NULL) {
-        return NSPDFERROR_NOMEM;
-    }
-    cosobj->type = COS_TYPE_STRING;
-    cosobj->u.s = cstring;
-
-    while (pdepth > 0) {
-        c = DOC_BYTE(doc, offset++);
-
-        if (c == ')') {
-            pdepth--;
-            if (pdepth == 0) {
-                break;
-            }
-        } else if (c == '(') {
-            pdepth++;
-        } else if ((bclass[c] & BC_EOLM ) != 0) {
-            /* unescaped end of line characters are translated to a single
-             * newline
-             */
-            c = DOC_BYTE(doc, offset);
-            while ((bclass[c] & BC_EOLM) != 0) {
-                offset++;
-                c = DOC_BYTE(doc, offset);
-            }
-            c = '\n';
-        } else if (c == '\\') {
-            /* escaped chars */
-            c = DOC_BYTE(doc, offset++);
-            switch (c) {
-            case 'n':
-                c = '\n';
-                break;
-
-            case 'r':
-                c = '\r';
-                break;
-
-            case 't':
-                c = '\t';
-                break;
-
-            case 'b':
-                c = '\b';
-                break;
-
-            case 'f':
-                c = '\f';
-                break;
-
-            case '(':
-                c = '(';
-                break;
-
-            case ')':
-                c = ')';
-                break;
-
-            case '\\':
-                c = '\\';
-                break;
-
-            default:
-
-                if ((bclass[c] & BC_EOLM) != 0) {
-                    /* escaped end of line, swallow it */
-                    c = DOC_BYTE(doc, offset++);
-                    while ((bclass[c] & BC_EOLM) != 0) {
-                        c = DOC_BYTE(doc, offset++);
-                    }
-                } else if ((bclass[c] & BC_OCTL) != 0) {
-                    /* octal value */
-                    uint8_t val;
-                    val = (c - '0');
-                    c = DOC_BYTE(doc, offset);
-                    if ((bclass[c] & BC_OCTL) != 0) {
-                        offset++;
-                        val = (val << 3) | (c - '0');
-                        c = DOC_BYTE(doc, offset);
-                        if ((bclass[c] & BC_OCTL) != 0) {
-                            offset++;
-                            val = (val << 3) | (c - '0');
-                            c = val;
-                        }
-                    }
-                } /* else invalid (skip backslash) */
-                break;
-            }
-        }
-
-        /* c contains the character to add to the string */
-        cos_string_append(cstring, c);
-    }
-
-    doc_skip_ws(doc, &offset);
-
-    *cosobj_out = cosobj;
-    *offset_out = offset;
-
-    return NSPDFERROR_OK;
-}
-
-uint8_t xtoi(uint8_t x)
-{
-    if (x >= '0' && x <= '9') {
-        x = x - '0';
-    } else if (x >= 'a' && x <='f') {
-        x = x - 'a' + 10;
-    } else if (x >= 'A' && x <='F') {
-        x = x - 'A' + 10;
-    }
-    return x;
-}
-
-nspdferror
-cos_decode_hex_string(struct pdf_doc *doc,
-                      uint64_t *offset_out,
-                      struct cos_object **cosobj_out)
-{
-    uint64_t offset;
-    struct cos_object *cosobj;
-    uint8_t c;
-    uint8_t value = 0;
-    struct cos_string *cstring;
-    bool first = true;
-
-    offset = *offset_out;
-
-    c = DOC_BYTE(doc, offset++);
-    if (c != '<') {
-        return NSPDFERROR_SYNTAX;
-    }
-
-    cstring = calloc(1, sizeof(*cstring));
-    if (cstring == NULL) {
-        return NSPDFERROR_NOMEM;
-    }
-
-    cosobj = calloc(1, sizeof(*cosobj));
-    if (cosobj == NULL) {
-        return NSPDFERROR_NOMEM;
-    }
-    cosobj->type = COS_TYPE_STRING;
-    cosobj->u.s = cstring;
-
-    for (; offset < doc->length; offset++) {
-        c = DOC_BYTE(doc, offset);
-        if (c == '>') {
-            if (first == false) {
-                cos_string_append(cstring, value);
-            }
-            offset++;
-            doc_skip_ws(doc, &offset);
-
-            *cosobj_out = cosobj;
-            *offset_out = offset;
-
-            return NSPDFERROR_OK;
-        } else if ((bclass[c] & BC_HEXL) != 0) {
-            if (first) {
-                value = xtoi(c) << 4;
-                first = false;
-            } else {
-                value |= xtoi(c);
-                first = true;
-                cos_string_append(cstring, value);
-            }
-        } else if ((bclass[c] & BC_WSPC) == 0) {
-            break; /* unknown byte value in string */
-        }
-    }
-    return NSPDFERROR_SYNTAX;
-}
-
-
-int cos_decode_dictionary(struct pdf_doc *doc,
-                      uint64_t *offset_out,
-                      struct cos_object **cosobj_out)
-{
-    uint64_t offset;
-    struct cos_object *cosobj;
-    struct cos_dictionary_entry *entry;
-    struct cos_object *key;
-    struct cos_object *value;
-    int res;
-
-    offset = *offset_out;
-
-    if ((DOC_BYTE(doc, offset) != '<') ||
-        (DOC_BYTE(doc, offset + 1) != '<')) {
-        return -1; /* syntax error */
-    }
-    offset += 2;
-    doc_skip_ws(doc, &offset);
-
-    printf("found a dictionary\n");
-
-    cosobj = calloc(1, sizeof(struct cos_object));
-    if (cosobj == NULL) {
-        return -1; /* memory error */
-    }
-    cosobj->type = COS_TYPE_DICTIONARY;
-
-    while ((DOC_BYTE(doc, offset) != '>') &&
-           (DOC_BYTE(doc, offset + 1) != '>')) {
-
-        res = cos_decode_object(doc, &offset, &key);
-        if (res != 0) {
-            /* todo free up any dictionary entries already created */
-            printf("key object decode failed\n");
-            return res;
-        }
-        if (key->type != COS_TYPE_NAME) {
-            /* key value pairs without a name */
-            printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME);
-            return -1; /* syntax error */
-        }
-        printf("key: %s\n", key->u.n);
-
-        res = cos_decode_object(doc, &offset, &value);
-        if (res != 0) {
-            printf("Unable to decode value object in dictionary\n");
-            /* todo free up any dictionary entries already created */
-            return res;
-        }
-
-        /* add dictionary entry */
-        entry = calloc(1, sizeof(struct cos_dictionary_entry));
-        if (entry == NULL) {
-            /* todo free up any dictionary entries already created */
-            return -1; /* memory error */
-        }
-
-        entry->key = key;
-        entry->value = value;
-        entry->next = cosobj->u.dictionary;
-
-        cosobj->u.dictionary = entry;
-
-    }
-    offset += 2; /* skip closing >> */
-    doc_skip_ws(doc, &offset);
-
-    *cosobj_out = cosobj;
-    *offset_out = offset;
-
-    return 0;
-}
-
-
-nspdferror
-cos_decode_list(struct pdf_doc *doc,
-                uint64_t *offset_out,
-                struct cos_object **cosobj_out)
-{
-    uint64_t offset;
-    struct cos_object *cosobj;
-    struct cos_array_entry *entry;
-    struct cos_object *value;
-    nspdferror res;
-
-    offset = *offset_out;
-
-    /* sanity check first token is list open */
-    if (DOC_BYTE(doc, offset) != '[') {
-        printf("not a [\n");
-        return NSPDFERROR_SYNTAX; /* syntax error */
-    }
-    offset++;
-
-    /* advance offset to next token */
-    res = doc_skip_ws(doc, &offset);
-    if (res != NSPDFERROR_OK) {
-        return res;
-    }
-
-    printf("found a list\n");
-
-    cosobj = calloc(1, sizeof(struct cos_object));
-    if (cosobj == NULL) {
-        return NSPDFERROR_NOMEM;
-    }
-    cosobj->type = COS_TYPE_ARRAY;
-
-    while (DOC_BYTE(doc, offset) != ']') {
-
-        res = cos_decode_object(doc, &offset, &value);
-        if (res != NSPDFERROR_OK) {
-            cos_free_object(cosobj);
-            printf("Unable to decode value object in list\n");
-            return res;
-        }
-
-        /* add entry to array */
-        entry = calloc(1, sizeof(struct cos_array_entry));
-        if (entry == NULL) {
-            cos_free_object(cosobj);
-            return NSPDFERROR_NOMEM;
-        }
-
-        entry->value = value;
-        entry->next = cosobj->u.array;
-
-        cosobj->u.array = entry;
-    }
-    offset++; /* skip closing ] */
-
-    doc_skip_ws(doc, &offset);
-
-    *cosobj_out = cosobj;
-    *offset_out = offset;
-
-    return 0;
-}
-
-#define NAME_MAX_LENGTH 127
-
-/**
- * decode a name object
- *
- * \todo deal with # symbols on pdf versions 1.2 and later
- */
-int cos_decode_name(struct pdf_doc *doc,
-                      uint64_t *offset_out,
-                      struct cos_object **cosobj_out)
-{
-    uint64_t offset;
-    struct cos_object *cosobj;
-    uint8_t c;
-    char name[NAME_MAX_LENGTH + 1];
-    int idx = 0;
-
-    offset = *offset_out;
-
-    c = DOC_BYTE(doc, offset++);
-    if (c != '/') {
-        return -1; /* names must be prefixed with a / */
-    }
-    printf("found a name\n");
-
-    c = DOC_BYTE(doc, offset);
-    while ((idx <= NAME_MAX_LENGTH) &&
-           ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) {
-        offset++;
-        //printf("%c", c);
-        name[idx++] = c;
-        c = DOC_BYTE(doc, offset);
-    }
-    //printf("\nidx: %d\n", idx);
-    if (idx > NAME_MAX_LENGTH) {
-        /* name length exceeded implementation limit */
-        return -1;
-    }
-    name[idx] = 0;
-
-    //printf("name: %s\n", name);
-
-    doc_skip_ws(doc, &offset);
-
-    cosobj = calloc(1, sizeof(struct cos_object));
-    if (cosobj == NULL) {
-        return -1; /* memory error */
-    }
-
-    cosobj->type = COS_TYPE_NAME;
-    cosobj->u.n = strdup(name);
-
-    *cosobj_out = cosobj;
-
-    *offset_out = offset;
-
-    return 0;
-}
-
-
-int cos_decode_boolean(struct pdf_doc *doc,
-                      uint64_t *offset_out,
-                      struct cos_object **cosobj_out)
-{
-    uint64_t offset;
-    struct cos_object *cosobj;
-    uint8_t c;
-    bool value;
-
-    offset = *offset_out;
-
-    c = DOC_BYTE(doc, offset++);
-    if ((c == 't') || (c == 'T')) {
-        /* true branch */
-
-        c = DOC_BYTE(doc, offset++);
-        if ((c != 'r') && (c != 'R')) {
-            return -1; /* syntax error */
-        }
-        c = DOC_BYTE(doc, offset++);
-        if ((c != 'u') && (c != 'U')) {
-            return -1; /* syntax error */
-        }
-        c = DOC_BYTE(doc, offset++);
-        if ((c != 'e') && (c != 'E')) {
-            return -1; /* syntax error */
-        }
-        value = true;
-
-    } else if ((c == 'f') || (c == 'F')) {
-        /* false branch */
-
-        c = DOC_BYTE(doc, offset++);
-        if ((c != 'a') && (c != 'A')) {
-            return -1; /* syntax error */
-        }
-        c = DOC_BYTE(doc, offset++);
-        if ((c != 'l') && (c != 'L')) {
-            return -1; /* syntax error */
-        }
-        c = DOC_BYTE(doc, offset++);
-        if ((c != 's') && (c != 'S')) {
-            return -1; /* syntax error */
-        }
-        c = DOC_BYTE(doc, offset++);
-        if ((c != 'e') && (c != 'E')) {
-            return -1; /* syntax error */
-        }
-
-        value = false;
-
-    } else {
-        return -1; /* syntax error */
-    }
-
-    doc_skip_ws(doc, &offset);
-
-    cosobj = calloc(1, sizeof(struct cos_object));
-    if (cosobj == NULL) {
-        return -1; /* memory error */
-    }
-
-    cosobj->type = COS_TYPE_BOOL;
-    cosobj->u.b = value;
-
-    *cosobj_out = cosobj;
-
-    *offset_out = offset;
-
-    return 0;
-
-}
-
-int cos_decode_null(struct pdf_doc *doc,
-                      uint64_t *offset_out,
-                      struct cos_object **cosobj_out)
-{
-    uint64_t offset;
-    struct cos_object *cosobj;
-    uint8_t c;
-
-    offset = *offset_out;
-
-    c = DOC_BYTE(doc, offset++);
-    if ((c != 'n') && (c != 'N')) {
-        return -1; /* syntax error */
-    }
-    c = DOC_BYTE(doc, offset++);
-    if ((c != 'u') && (c != 'U')) {
-        return -1; /* syntax error */
-    }
-    c = DOC_BYTE(doc, offset++);
-    if ((c != 'l') && (c != 'L')) {
-        return -1; /* syntax error */
-    }
-    c = DOC_BYTE(doc, offset++);
-    if ((c != 'l') && (c != 'L')) {
-        return -1; /* syntax error */
-    }
-
-    doc_skip_ws(doc, &offset);
-
-    cosobj = calloc(1, sizeof(struct cos_object));
-    if (cosobj == NULL) {
-        return -1; /* memory error */
-    }
-
-    cosobj->type = COS_TYPE_NULL;
-    *offset_out = offset;
-
-    return 0;
-}
-
-/**
- * attempt to decode the stream into a reference
- *
- * The stream has already had a positive integer decoded from it. if another
- * positive integer follows and a R character after that it is a reference,
- * otherwise bail, but not finding a ref is not an error!
- *
- * \param doc the pdf document
- * \param offset_out offset of current cursor in stream
- * \param cosobj_out the object to return into, on input contains the first
- * integer
- */
-int cos_attempt_decode_reference(struct pdf_doc *doc,
-                      uint64_t *offset_out,
-                      struct cos_object **cosobj_out)
-{
-    uint64_t offset;
-    struct cos_object *cosobj; /* possible generation object */
-    uint8_t c;
-    int res;
-    struct cos_reference *nref; /* new reference */
-
-    offset = *offset_out;
-
-    res = cos_decode_number(doc, &offset, &cosobj);
-    if (res != 0) {
-        return 0; /* no error if object could not be decoded */
-    }
-
-    if (cosobj->type != COS_TYPE_INT) {
-        /* next object was not an integer so not a reference */
-        cos_free_object(cosobj);
-        return 0;
-    }
-
-    if (cosobj->u.i < 0) {
-        /* integer was negative so not a reference (generations must be
-         * non-negative
-         */
-        cos_free_object(cosobj);
-        return 0;
-
-    }
-
-    /* two int in a row, look for the R */
-    c = DOC_BYTE(doc, offset++);
-    if (c != 'R') {
-        /* no R so not a reference */
-        cos_free_object(cosobj);
-        return 0;
-    }
-
-    /* found reference */
-
-    printf("found reference\n");
-    doc_skip_ws(doc, &offset);
-
-    nref = calloc(1, sizeof(struct cos_reference));
-    if (nref == NULL) {
-        /* todo free objects */
-        return -1; /* memory error */
-    }
-
-    nref->id = (*cosobj_out)->u.i;
-    nref->generation = cosobj->u.i;
-
-    cos_free_object(*cosobj_out);
-
-    cosobj->type = COS_TYPE_REFERENCE;
-    cosobj->u.reference = nref;
-
-    *cosobj_out = cosobj;
-
-    *offset_out = offset;
-
-    return 0;
-}
-
-/**
- * Decode input stream into an object
- *
- * lex and parse a byte stream to generate COS objects
- *
- * lexing the input.
- *  check first character:
- *
- * < either a hex string or a dictionary
- *     second char < means dictionary else hex string
- * - either an integer or real
- * + either an integer or real
- * 0-9 an integer, unsigned integer or real
- * . a real number
- * ( a string
- * / a name
- * [ a list
- * t|T boolean true
- * f|F boolean false
- * n|N null
- *
- * Grammar is:
- * cos_object:
- *   TOK_NULL |
- *   TOK_BOOLEAN |
- *   TOK_INT |
- *   TOK_REAL |
- *   TOK_NAME |
- *   TOK_STRING |
- *   list |
- *   dictionary |
- *   object_reference;
- *
- * list:
- *   '[' listargs ']';
- *
- * listargs:
- *   cos_object
- *   |
- *   listargs cos_object
- *   ;
- *
- * object_reference:
- *   TOK_UINT TOK_UINT 'R';
- */
-int cos_decode_object(struct pdf_doc *doc,
-                      uint64_t *offset_out,
-                      struct cos_object **cosobj_out)
-{
-    uint64_t offset;
-    int res;
-    struct cos_object *cosobj;
-
-    offset = *offset_out;
-
-    /* object could be any type use first char to try and select */
-    switch (DOC_BYTE(doc, offset)) {
-
-    case '-':
-    case '+':
-    case '.':
-    case '0':
-    case '1':
-    case '2':
-    case '3':
-    case '4':
-    case '5':
-    case '6':
-    case '7':
-    case '8':
-    case '9':
-        res = cos_decode_number(doc, &offset, &cosobj);
-        /* if type is positive integer try to check for reference */
-        if ((res == 0) &&
-            (cosobj->type == COS_TYPE_INT) &&
-            (cosobj->u.i > 0)) {
-            res = cos_attempt_decode_reference(doc, &offset, &cosobj);
-        }
-        break;
-
-    case '<':
-        if (DOC_BYTE(doc, offset + 1) == '<') {
-            res = cos_decode_dictionary(doc, &offset, &cosobj);
-        } else {
-            res = cos_decode_hex_string(doc, &offset, &cosobj);
-        }
-        break;
-
-    case '(':
-        res = cos_decode_string(doc, &offset, &cosobj);
-        break;
-
-    case '/':
-        res = cos_decode_name(doc, &offset, &cosobj);
-        break;
-
-    case '[':
-        res = cos_decode_list(doc, &offset, &cosobj);
-        break;
-
-    case 't':
-    case 'T':
-    case 'f':
-    case 'F':
-        res = cos_decode_boolean(doc, &offset, &cosobj);
-        break;
-
-    case 'n':
-    case 'N':
-        res = cos_decode_null(doc, &offset, &cosobj);
-        break;
-
-    default:
-        res = -1; /* syntax error */
-    }
-
-
-    if (res == 0) {
-        *cosobj_out = cosobj;
-        *offset_out = offset;
-    }
-
-    return res;
-}
 
 
 
@@ -1335,7 +337,7 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
 
             if ((DOC_BYTE(doc, offset++) == 'n')) {
                 if (objnumber < doc->xref_size) {
-                    struct cos_indirect_object *indobj;
+                    struct xref_table_entry *indobj;
                     indobj = doc->xref_table + objnumber;
 
                     indobj->ref.id = objnumber;
@@ -1357,69 +359,6 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out)
     return NSPDFERROR_OK;
 }
 
-nspdferror
-cos_dictionary_get_value(struct cos_object *dict,
-                         const char *key,
-                         struct cos_object **value_out)
-{
-    struct cos_dictionary_entry *entry;
-
-    if (dict->type != COS_TYPE_DICTIONARY) {
-        return NSPDFERROR_TYPE;
-    }
-
-    entry = dict->u.dictionary;
-    while (entry != NULL) {
-        if (strcmp(entry->key->u.n, key) == 0) {
-            *value_out = entry->value;
-            return NSPDFERROR_OK;
-        }
-        entry = entry->next;
-    }
-    return NSPDFERROR_NOTFOUND;
-}
-
-/**
- * extracts a value for a key in a dictionary.
- *
- * this finds and returns a value for a given key removing it from a dictionary
- */
-nspdferror
-cos_dictionary_extract_value(struct cos_object *dict,
-                             const char *key,
-                             struct cos_object **value_out)
-{
-    struct cos_dictionary_entry *entry;
-    struct cos_dictionary_entry **prev;
-
-    if (dict->type != COS_TYPE_DICTIONARY) {
-        return NSPDFERROR_TYPE;
-    }
-
-    prev = &dict->u.dictionary;
-    entry = *prev;
-    while (entry != NULL) {
-        if (strcmp(entry->key->u.n, key) == 0) {
-            *value_out = entry->value;
-            *prev = entry->next;
-            cos_free_object(entry->key);
-            free(entry);
-            return NSPDFERROR_OK;
-        }
-        prev = &entry->next;
-        entry = *prev;
-    }
-    return NSPDFERROR_NOTFOUND;
-}
-
-nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out)
-{
-    if (cobj->type != COS_TYPE_INT) {
-        return NSPDFERROR_TYPE;
-    }
-    *value_out = cobj->u.i;
-    return NSPDFERROR_OK;
-}
 
 /**
  * recursively parse trailers and xref tables
@@ -1480,7 +419,7 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset)
             goto decode_xref_trailer_failed;
         }
 
-        doc->xref_table = calloc(size, sizeof(struct cos_indirect_object));
+        doc->xref_table = calloc(size, sizeof(struct xref_table_entry));
         if (doc->xref_table == NULL) {
             res = NSPDFERROR_NOMEM;
             goto decode_xref_trailer_failed;
@@ -1580,7 +519,12 @@ nspdferror decode_trailers(struct pdf_doc *doc)
 
 nspdferror decode_catalog(struct pdf_doc *doc)
 {
-    return NSPDFERROR_OK;
+    nspdferror res;
+    struct cos_object *catalog;
+
+    res = cos_get_dictionary(doc->root, &catalog);
+    
+    return res;
 }
 
 nspdferror new_pdf_doc(struct pdf_doc **doc_out)