diff options
-rw-r--r-- | src/Makefile | 6 | ||||
-rw-r--r-- | src/cos_decode.c | 799 | ||||
-rw-r--r-- | src/cos_object.c | 139 | ||||
-rw-r--r-- | src/cos_object.h | 98 | ||||
-rw-r--r-- | src/pdf_doc.c | 47 | ||||
-rw-r--r-- | src/pdf_doc.h | 42 | ||||
-rw-r--r-- | src/xref.c | 1076 |
7 files changed, 1139 insertions, 1068 deletions
diff --git a/src/Makefile b/src/Makefile index f9ca22c..af806f3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -2,11 +2,13 @@ CFLAGS+=-g -Wall -Wextra +OBJS=xref.o byte_class.o cos_decode.o cos_object.o pdf_doc.o + .PHONY:all clean all:xref -xref:xref.o byte_class.o +xref:$(OBJS) clean: - ${RM} xref xref.o + ${RM} xref $(OBJS) diff --git a/src/cos_decode.c b/src/cos_decode.c new file mode 100644 index 0000000..3936e05 --- /dev/null +++ b/src/cos_decode.c @@ -0,0 +1,799 @@ +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "byte_class.h" +#include "nspdferror.h" +#include "cos_object.h" +#include "pdf_doc.h" + +#define COS_STRING_ALLOC 32 + +nspdferror +cos_string_append(struct cos_string *s, uint8_t c) +{ + //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc); + if (s->length == s->alloc) { + uint8_t *ns; + ns = realloc(s->data, s->alloc + COS_STRING_ALLOC); + if (ns == NULL) { + return NSPDFERROR_NOMEM; + } + s->data = ns; + s->alloc += COS_STRING_ALLOC; + } + s->data[s->length++] = c; + return NSPDFERROR_OK; +} + +uint8_t xtoi(uint8_t x) +{ + if (x >= '0' && x <= '9') { + x = x - '0'; + } else if (x >= 'a' && x <='f') { + x = x - 'a' + 10; + } else if (x >= 'A' && x <='F') { + x = x - 'A' + 10; + } + return x; +} + +int cos_decode_number(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) +{ + struct cos_object *cosobj; + uint8_t c; /* current byte from source data */ + unsigned int len; /* number of decimal places in number */ + uint8_t num[21]; /* temporary buffer for decimal values */ + uint64_t offset; /* current offset of source data */ + + offset = *offset_out; + + for (len = 0; len < sizeof(num); len++) { + c = DOC_BYTE(doc, offset); + if ((bclass[c] & BC_DCML) != BC_DCML) { + int64_t result = 0; /* parsed result */ + uint64_t tens; + + if (len == 0) { + return -2; /* parse error no decimals in input */ + } + /* sum value from each place */ + for (tens = 1; len > 0; tens = tens * 10, len--) { + result += (num[len - 1] * tens); + } + + doc_skip_ws(doc, &offset); + + cosobj = calloc(1, sizeof(struct cos_object)); + if (cosobj == NULL) { + return -1; /* memory error */ + } + + cosobj->type = COS_TYPE_INT; + cosobj->u.i = result; + + *cosobj_out = cosobj; + + *offset_out = offset; + + return 0; + } + num[len] = c - '0'; + offset++; + } + return -1; /* number too long */ +} + + +/** + * literal string processing + * + */ +nspdferror +cos_decode_string(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) +{ + uint64_t offset; + struct cos_object *cosobj; + uint8_t c; + unsigned int pdepth = 1; /* depth of open parens */ + struct cos_string *cstring; + + offset = *offset_out; + + c = DOC_BYTE(doc, offset++); + if (c != '(') { + return NSPDFERROR_SYNTAX; + } + + cstring = calloc(1, sizeof(*cstring)); + if (cstring == NULL) { + return NSPDFERROR_NOMEM; + } + + cosobj = calloc(1, sizeof(*cosobj)); + if (cosobj == NULL) { + return NSPDFERROR_NOMEM; + } + cosobj->type = COS_TYPE_STRING; + cosobj->u.s = cstring; + + while (pdepth > 0) { + c = DOC_BYTE(doc, offset++); + + if (c == ')') { + pdepth--; + if (pdepth == 0) { + break; + } + } else if (c == '(') { + pdepth++; + } else if ((bclass[c] & BC_EOLM ) != 0) { + /* unescaped end of line characters are translated to a single + * newline + */ + c = DOC_BYTE(doc, offset); + while ((bclass[c] & BC_EOLM) != 0) { + offset++; + c = DOC_BYTE(doc, offset); + } + c = '\n'; + } else if (c == '\\') { + /* escaped chars */ + c = DOC_BYTE(doc, offset++); + switch (c) { + case 'n': + c = '\n'; + break; + + case 'r': + c = '\r'; + break; + + case 't': + c = '\t'; + break; + + case 'b': + c = '\b'; + break; + + case 'f': + c = '\f'; + break; + + case '(': + c = '('; + break; + + case ')': + c = ')'; + break; + + case '\\': + c = '\\'; + break; + + default: + + if ((bclass[c] & BC_EOLM) != 0) { + /* escaped end of line, swallow it */ + c = DOC_BYTE(doc, offset++); + while ((bclass[c] & BC_EOLM) != 0) { + c = DOC_BYTE(doc, offset++); + } + } else if ((bclass[c] & BC_OCTL) != 0) { + /* octal value */ + uint8_t val; + val = (c - '0'); + c = DOC_BYTE(doc, offset); + if ((bclass[c] & BC_OCTL) != 0) { + offset++; + val = (val << 3) | (c - '0'); + c = DOC_BYTE(doc, offset); + if ((bclass[c] & BC_OCTL) != 0) { + offset++; + val = (val << 3) | (c - '0'); + c = val; + } + } + } /* else invalid (skip backslash) */ + break; + } + } + + /* c contains the character to add to the string */ + cos_string_append(cstring, c); + } + + doc_skip_ws(doc, &offset); + + *cosobj_out = cosobj; + *offset_out = offset; + + return NSPDFERROR_OK; +} + + +nspdferror +cos_decode_hex_string(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) +{ + uint64_t offset; + struct cos_object *cosobj; + uint8_t c; + uint8_t value = 0; + struct cos_string *cstring; + bool first = true; + + offset = *offset_out; + + c = DOC_BYTE(doc, offset++); + if (c != '<') { + return NSPDFERROR_SYNTAX; + } + + cstring = calloc(1, sizeof(*cstring)); + if (cstring == NULL) { + return NSPDFERROR_NOMEM; + } + + cosobj = calloc(1, sizeof(*cosobj)); + if (cosobj == NULL) { + return NSPDFERROR_NOMEM; + } + cosobj->type = COS_TYPE_STRING; + cosobj->u.s = cstring; + + for (; offset < doc->length; offset++) { + c = DOC_BYTE(doc, offset); + if (c == '>') { + if (first == false) { + cos_string_append(cstring, value); + } + offset++; + doc_skip_ws(doc, &offset); + + *cosobj_out = cosobj; + *offset_out = offset; + + return NSPDFERROR_OK; + } else if ((bclass[c] & BC_HEXL) != 0) { + if (first) { + value = xtoi(c) << 4; + first = false; + } else { + value |= xtoi(c); + first = true; + cos_string_append(cstring, value); + } + } else if ((bclass[c] & BC_WSPC) == 0) { + break; /* unknown byte value in string */ + } + } + return NSPDFERROR_SYNTAX; +} + + +int cos_decode_dictionary(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) +{ + uint64_t offset; + struct cos_object *cosobj; + struct cos_dictionary_entry *entry; + struct cos_object *key; + struct cos_object *value; + int res; + + offset = *offset_out; + + if ((DOC_BYTE(doc, offset) != '<') || + (DOC_BYTE(doc, offset + 1) != '<')) { + return -1; /* syntax error */ + } + offset += 2; + doc_skip_ws(doc, &offset); + + printf("found a dictionary\n"); + + cosobj = calloc(1, sizeof(struct cos_object)); + if (cosobj == NULL) { + return -1; /* memory error */ + } + cosobj->type = COS_TYPE_DICTIONARY; + + while ((DOC_BYTE(doc, offset) != '>') && + (DOC_BYTE(doc, offset + 1) != '>')) { + + res = cos_decode_object(doc, &offset, &key); + if (res != 0) { + /* todo free up any dictionary entries already created */ + printf("key object decode failed\n"); + return res; + } + if (key->type != COS_TYPE_NAME) { + /* key value pairs without a name */ + printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME); + return -1; /* syntax error */ + } + printf("key: %s\n", key->u.n); + + res = cos_decode_object(doc, &offset, &value); + if (res != 0) { + printf("Unable to decode value object in dictionary\n"); + /* todo free up any dictionary entries already created */ + return res; + } + + /* add dictionary entry */ + entry = calloc(1, sizeof(struct cos_dictionary_entry)); + if (entry == NULL) { + /* todo free up any dictionary entries already created */ + return -1; /* memory error */ + } + + entry->key = key; + entry->value = value; + entry->next = cosobj->u.dictionary; + + cosobj->u.dictionary = entry; + + } + offset += 2; /* skip closing >> */ + doc_skip_ws(doc, &offset); + + *cosobj_out = cosobj; + *offset_out = offset; + + return 0; +} + + +nspdferror +cos_decode_list(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) +{ + uint64_t offset; + struct cos_object *cosobj; + struct cos_array_entry *entry; + struct cos_object *value; + nspdferror res; + + offset = *offset_out; + + /* sanity check first token is list open */ + if (DOC_BYTE(doc, offset) != '[') { + printf("not a [\n"); + return NSPDFERROR_SYNTAX; /* syntax error */ + } + offset++; + + /* advance offset to next token */ + res = doc_skip_ws(doc, &offset); + if (res != NSPDFERROR_OK) { + return res; + } + + printf("found a list\n"); + + cosobj = calloc(1, sizeof(struct cos_object)); + if (cosobj == NULL) { + return NSPDFERROR_NOMEM; + } + cosobj->type = COS_TYPE_ARRAY; + + while (DOC_BYTE(doc, offset) != ']') { + + res = cos_decode_object(doc, &offset, &value); + if (res != NSPDFERROR_OK) { + cos_free_object(cosobj); + printf("Unable to decode value object in list\n"); + return res; + } + + /* add entry to array */ + entry = calloc(1, sizeof(struct cos_array_entry)); + if (entry == NULL) { + cos_free_object(cosobj); + return NSPDFERROR_NOMEM; + } + + entry->value = value; + entry->next = cosobj->u.array; + + cosobj->u.array = entry; + } + offset++; /* skip closing ] */ + + doc_skip_ws(doc, &offset); + + *cosobj_out = cosobj; + *offset_out = offset; + + return 0; +} + +#define NAME_MAX_LENGTH 127 + +/** + * decode a name object + * + * \todo deal with # symbols on pdf versions 1.2 and later + */ +int cos_decode_name(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) +{ + uint64_t offset; + struct cos_object *cosobj; + uint8_t c; + char name[NAME_MAX_LENGTH + 1]; + int idx = 0; + + offset = *offset_out; + + c = DOC_BYTE(doc, offset++); + if (c != '/') { + return -1; /* names must be prefixed with a / */ + } + printf("found a name\n"); + + c = DOC_BYTE(doc, offset); + while ((idx <= NAME_MAX_LENGTH) && + ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) { + offset++; + //printf("%c", c); + name[idx++] = c; + c = DOC_BYTE(doc, offset); + } + //printf("\nidx: %d\n", idx); + if (idx > NAME_MAX_LENGTH) { + /* name length exceeded implementation limit */ + return -1; + } + name[idx] = 0; + + //printf("name: %s\n", name); + + doc_skip_ws(doc, &offset); + + cosobj = calloc(1, sizeof(struct cos_object)); + if (cosobj == NULL) { + return -1; /* memory error */ + } + + cosobj->type = COS_TYPE_NAME; + cosobj->u.n = strdup(name); + + *cosobj_out = cosobj; + + *offset_out = offset; + + return 0; +} + + +int cos_decode_boolean(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) +{ + uint64_t offset; + struct cos_object *cosobj; + uint8_t c; + bool value; + + offset = *offset_out; + + c = DOC_BYTE(doc, offset++); + if ((c == 't') || (c == 'T')) { + /* true branch */ + + c = DOC_BYTE(doc, offset++); + if ((c != 'r') && (c != 'R')) { + return -1; /* syntax error */ + } + c = DOC_BYTE(doc, offset++); + if ((c != 'u') && (c != 'U')) { + return -1; /* syntax error */ + } + c = DOC_BYTE(doc, offset++); + if ((c != 'e') && (c != 'E')) { + return -1; /* syntax error */ + } + value = true; + + } else if ((c == 'f') || (c == 'F')) { + /* false branch */ + + c = DOC_BYTE(doc, offset++); + if ((c != 'a') && (c != 'A')) { + return -1; /* syntax error */ + } + c = DOC_BYTE(doc, offset++); + if ((c != 'l') && (c != 'L')) { + return -1; /* syntax error */ + } + c = DOC_BYTE(doc, offset++); + if ((c != 's') && (c != 'S')) { + return -1; /* syntax error */ + } + c = DOC_BYTE(doc, offset++); + if ((c != 'e') && (c != 'E')) { + return -1; /* syntax error */ + } + + value = false; + + } else { + return -1; /* syntax error */ + } + + doc_skip_ws(doc, &offset); + + cosobj = calloc(1, sizeof(struct cos_object)); + if (cosobj == NULL) { + return -1; /* memory error */ + } + + cosobj->type = COS_TYPE_BOOL; + cosobj->u.b = value; + + *cosobj_out = cosobj; + + *offset_out = offset; + + return 0; + +} + +int cos_decode_null(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) +{ + uint64_t offset; + struct cos_object *cosobj; + uint8_t c; + + offset = *offset_out; + + c = DOC_BYTE(doc, offset++); + if ((c != 'n') && (c != 'N')) { + return -1; /* syntax error */ + } + c = DOC_BYTE(doc, offset++); + if ((c != 'u') && (c != 'U')) { + return -1; /* syntax error */ + } + c = DOC_BYTE(doc, offset++); + if ((c != 'l') && (c != 'L')) { + return -1; /* syntax error */ + } + c = DOC_BYTE(doc, offset++); + if ((c != 'l') && (c != 'L')) { + return -1; /* syntax error */ + } + + doc_skip_ws(doc, &offset); + + cosobj = calloc(1, sizeof(struct cos_object)); + if (cosobj == NULL) { + return -1; /* memory error */ + } + + cosobj->type = COS_TYPE_NULL; + *offset_out = offset; + + return 0; +} + +/** + * attempt to decode the stream into a reference + * + * The stream has already had a positive integer decoded from it. if another + * positive integer follows and a R character after that it is a reference, + * otherwise bail, but not finding a ref is not an error! + * + * \param doc the pdf document + * \param offset_out offset of current cursor in stream + * \param cosobj_out the object to return into, on input contains the first + * integer + */ +int cos_attempt_decode_reference(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) +{ + uint64_t offset; + struct cos_object *cosobj; /* possible generation object */ + uint8_t c; + int res; + struct cos_reference *nref; /* new reference */ + + offset = *offset_out; + + res = cos_decode_number(doc, &offset, &cosobj); + if (res != 0) { + return 0; /* no error if object could not be decoded */ + } + + if (cosobj->type != COS_TYPE_INT) { + /* next object was not an integer so not a reference */ + cos_free_object(cosobj); + return 0; + } + + if (cosobj->u.i < 0) { + /* integer was negative so not a reference (generations must be + * non-negative + */ + cos_free_object(cosobj); + return 0; + + } + + /* two int in a row, look for the R */ + c = DOC_BYTE(doc, offset++); + if (c != 'R') { + /* no R so not a reference */ + cos_free_object(cosobj); + return 0; + } + + /* found reference */ + + printf("found reference\n"); + doc_skip_ws(doc, &offset); + + nref = calloc(1, sizeof(struct cos_reference)); + if (nref == NULL) { + /* todo free objects */ + return -1; /* memory error */ + } + + nref->id = (*cosobj_out)->u.i; + nref->generation = cosobj->u.i; + + cos_free_object(*cosobj_out); + + cosobj->type = COS_TYPE_REFERENCE; + cosobj->u.reference = nref; + + *cosobj_out = cosobj; + + *offset_out = offset; + + return 0; +} + +/** + * Decode input stream into an object + * + * lex and parse a byte stream to generate COS objects + * + * lexing the input. + * check first character: + * + * < either a hex string or a dictionary + * second char < means dictionary else hex string + * - either an integer or real + * + either an integer or real + * 0-9 an integer, unsigned integer or real + * . a real number + * ( a string + * / a name + * [ a list + * t|T boolean true + * f|F boolean false + * n|N null + * + * Grammar is: + * cos_object: + * TOK_NULL | + * TOK_BOOLEAN | + * TOK_INT | + * TOK_REAL | + * TOK_NAME | + * TOK_STRING | + * list | + * dictionary | + * object_reference; + * + * list: + * '[' listargs ']'; + * + * listargs: + * cos_object + * | + * listargs cos_object + * ; + * + * object_reference: + * TOK_UINT TOK_UINT 'R'; + */ +int cos_decode_object(struct pdf_doc *doc, + uint64_t *offset_out, + struct cos_object **cosobj_out) +{ + uint64_t offset; + int res; + struct cos_object *cosobj; + + offset = *offset_out; + + /* object could be any type use first char to try and select */ + switch (DOC_BYTE(doc, offset)) { + + case '-': + case '+': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + res = cos_decode_number(doc, &offset, &cosobj); + /* if type is positive integer try to check for reference */ + if ((res == 0) && + (cosobj->type == COS_TYPE_INT) && + (cosobj->u.i > 0)) { + res = cos_attempt_decode_reference(doc, &offset, &cosobj); + } + break; + + case '<': + if (DOC_BYTE(doc, offset + 1) == '<') { + res = cos_decode_dictionary(doc, &offset, &cosobj); + } else { + res = cos_decode_hex_string(doc, &offset, &cosobj); + } + break; + + case '(': + res = cos_decode_string(doc, &offset, &cosobj); + break; + + case '/': + res = cos_decode_name(doc, &offset, &cosobj); + break; + + case '[': + res = cos_decode_list(doc, &offset, &cosobj); + break; + + case 't': + case 'T': + case 'f': + case 'F': + res = cos_decode_boolean(doc, &offset, &cosobj); + break; + + case 'n': + case 'N': + res = cos_decode_null(doc, &offset, &cosobj); + break; + + default: + res = -1; /* syntax error */ + } + + + if (res == 0) { + *cosobj_out = cosobj; + *offset_out = offset; + } + + return res; +} diff --git a/src/cos_object.c b/src/cos_object.c new file mode 100644 index 0000000..96c669e --- /dev/null +++ b/src/cos_object.c @@ -0,0 +1,139 @@ +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "nspdferror.h" +#include "cos_object.h" + + +nspdferror cos_free_object(struct cos_object *cos_obj) +{ + struct cos_dictionary_entry *dentry; + struct cos_array_entry *aentry; + + switch (cos_obj->type) { + case COS_TYPE_NAME: + free(cos_obj->u.n); + break; + + case COS_TYPE_STRING: + free(cos_obj->u.s->data); + free(cos_obj->u.s); + break; + + case COS_TYPE_DICTIONARY: + dentry = cos_obj->u.dictionary; + while (dentry != NULL) { + struct cos_dictionary_entry *odentry; + + cos_free_object(dentry->key); + cos_free_object(dentry->value); + + odentry = dentry; + dentry = dentry->next; + free(odentry); + } + break; + + case COS_TYPE_ARRAY: + aentry = cos_obj->u.array; + while (aentry != NULL) { + struct cos_array_entry *oaentry; + + cos_free_object(aentry->value); + + oaentry = aentry; + aentry = aentry->next; + free(oaentry); + } + + case COS_TYPE_STREAM: + free(cos_obj->u.stream); + break; + + } + free(cos_obj); + + return NSPDFERROR_OK; +} + +nspdferror +cos_dictionary_get_value(struct cos_object *dict, + const char *key, + struct cos_object **value_out) +{ + struct cos_dictionary_entry *entry; + + if (dict->type != COS_TYPE_DICTIONARY) { + return NSPDFERROR_TYPE; + } + + entry = dict->u.dictionary; + while (entry != NULL) { + if (strcmp(entry->key->u.n, key) == 0) { + *value_out = entry->value; + return NSPDFERROR_OK; + } + entry = entry->next; + } + return NSPDFERROR_NOTFOUND; +} + +/** + * extracts a value for a key in a dictionary. + * + * this finds and returns a value for a given key removing it from a dictionary + */ +nspdferror +cos_dictionary_extract_value(struct cos_object *dict, + const char *key, + struct cos_object **value_out) +{ + struct cos_dictionary_entry *entry; + struct cos_dictionary_entry **prev; + + if (dict->type != COS_TYPE_DICTIONARY) { + return NSPDFERROR_TYPE; + } + + prev = &dict->u.dictionary; + entry = *prev; + while (entry != NULL) { + if (strcmp(entry->key->u.n, key) == 0) { + *value_out = entry->value; + *prev = entry->next; + cos_free_object(entry->key); + free(entry); + return NSPDFERROR_OK; + } + prev = &entry->next; + entry = *prev; + } + return NSPDFERROR_NOTFOUND; +} + +nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out) +{ + if (cobj->type != COS_TYPE_INT) { + return NSPDFERROR_TYPE; + } + *value_out = cobj->u.i; + return NSPDFERROR_OK; +} + +nspdferror +cos_get_dictionary(struct cos_object *cobj, + struct cos_object **value_out) +{ + if (cobj->type == COS_TYPE_REFERENCE) { + + } + if (cobj->type != COS_TYPE_DICTIONARY) { + return NSPDFERROR_TYPE; + } + *value_out = cobj; + return NSPDFERROR_OK; +} diff --git a/src/cos_object.h b/src/cos_object.h new file mode 100644 index 0000000..65b3ed5 --- /dev/null +++ b/src/cos_object.h @@ -0,0 +1,98 @@ +struct pdf_doc; + +enum cos_type { + COS_TYPE_NULL, + COS_TYPE_BOOL, + COS_TYPE_INT, + COS_TYPE_REAL, + COS_TYPE_NAME, + COS_TYPE_STRING, + COS_TYPE_ARRAY, + COS_TYPE_DICTIONARY, + COS_TYPE_NAMETREE, + COS_TYPE_NUMBERTREE, + COS_TYPE_STREAM, + COS_TYPE_REFERENCE, +}; + +struct cos_object; + +struct cos_dictionary_entry { + /** next key/value in dictionary */ + struct cos_dictionary_entry *next; + + /** key (name) */ + struct cos_object *key; + + /** value */ + struct cos_object *value; +}; + +struct cos_array_entry { + /** next value in array */ + struct cos_array_entry *next; + + /** value */ + struct cos_object *value; +}; + +struct cos_string { + uint8_t *data; + size_t length; + size_t alloc; +}; + +struct cos_reference { + /** id of indirect object */ + uint64_t id; + + /* generation of indirect object */ + uint64_t generation; +}; + +struct cos_object { + int type; + union { + /** boolean */ + bool b; + + /** integer */ + int64_t i; + + /** real */ + double r; + + /** name */ + char *n; + + /** string */ + struct cos_string *s; + + /** stream data */ + uint8_t *stream; + + /* dictionary */ + struct cos_dictionary_entry *dictionary; + + /* array */ + struct cos_array_entry *array; + + /** reference */ + struct cos_reference *reference; + + } u; +}; + +int cos_decode_object(struct pdf_doc *doc, uint64_t *offset_out, struct cos_object **cosobj_out); + +nspdferror cos_free_object(struct cos_object *cos_obj); + +nspdferror cos_dictionary_get_value(struct cos_object *dict, const char *key, struct cos_object **value_out); + +nspdferror cos_dictionary_extract_value(struct cos_object *dict, const char *key, struct cos_object **value_out); + +nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out); + +nspdferror cos_get_dictionary(struct cos_object *cobj, struct cos_object **value_out); + + diff --git a/src/pdf_doc.c b/src/pdf_doc.c new file mode 100644 index 0000000..9b92bd0 --- /dev/null +++ b/src/pdf_doc.c @@ -0,0 +1,47 @@ + +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> + +#include "nspdferror.h" +#include "byte_class.h" +#include "cos_object.h" +#include "pdf_doc.h" + +/** + * move offset to next non whitespace byte + */ +int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset) +{ + uint8_t c; + /* TODO sort out keeping offset in range */ + c = DOC_BYTE(doc, *offset); + while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) { + (*offset)++; + /* skip comments */ + if ((bclass[c] & BC_CMNT) != 0) { + c = DOC_BYTE(doc, *offset); + while ((bclass[c] & BC_EOLM ) == 0) { + (*offset)++; + c = DOC_BYTE(doc, *offset); + } + } + c = DOC_BYTE(doc, *offset); + } + return 0; +} + +/** + * move offset to next non eol byte + */ +int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset) +{ + uint8_t c; + /* TODO sort out keeping offset in range */ + c = DOC_BYTE(doc, *offset); + while ((bclass[c] & BC_EOLM) != 0) { + (*offset)++; + c = DOC_BYTE(doc, *offset); + } + return 0; +} diff --git a/src/pdf_doc.h b/src/pdf_doc.h new file mode 100644 index 0000000..696c121 --- /dev/null +++ b/src/pdf_doc.h @@ -0,0 +1,42 @@ +/** indirect object */ +struct xref_table_entry { + /* reference identifier */ + struct cos_reference ref; + + /** offset of object */ + uint64_t offset; + + /* indirect object if already decoded */ + struct cos_object *o; +}; + + +/** pdf document */ +struct pdf_doc { + uint8_t *buffer; + uint64_t buffer_length; + + uint8_t *start; /* start of pdf document in input stream */ + uint64_t length; + + int major; + int minor; + + /** + * Indirect object cross reference table + */ + uint64_t xref_size; + struct xref_table_entry *xref_table; + + struct cos_object *root; + struct cos_object *encrypt; + struct cos_object *info; + struct cos_object *id; + +}; + +/* byte data acessory, allows for more complex buffer handling in future */ +#define DOC_BYTE(doc, offset) (doc->start[(offset)]) + +int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset); +int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset); @@ -7,136 +7,12 @@ #include "nspdferror.h" #include "byte_class.h" +#include "cos_object.h" +#include "pdf_doc.h" #define SLEN(x) (sizeof((x)) - 1) -enum cos_type { - COS_TYPE_NULL, - COS_TYPE_BOOL, - COS_TYPE_INT, - COS_TYPE_REAL, - COS_TYPE_NAME, - COS_TYPE_STRING, - COS_TYPE_ARRAY, - COS_TYPE_DICTIONARY, - COS_TYPE_NAMETREE, - COS_TYPE_NUMBERTREE, - COS_TYPE_STREAM, - COS_TYPE_REFERENCE, -}; - -struct cos_object; - -struct cos_dictionary_entry { - /** next key/value in dictionary */ - struct cos_dictionary_entry *next; - - /** key (name) */ - struct cos_object *key; - - /** value */ - struct cos_object *value; -}; - -struct cos_array_entry { - /** next value in array */ - struct cos_array_entry *next; - - /** value */ - struct cos_object *value; -}; - -struct cos_string { - uint8_t *data; - size_t length; - size_t alloc; -}; - -struct cos_reference { - /** id of indirect object */ - uint64_t id; - - /* generation of indirect object */ - uint64_t generation; -}; - -struct cos_object { - int type; - union { - /** boolean */ - bool b; - - /** integer */ - int64_t i; - - /** real */ - double r; - - /** name */ - char *n; - - /** string */ - struct cos_string *s; - - /** stream data */ - uint8_t *stream; - - /* dictionary */ - struct cos_dictionary_entry *dictionary; - - /* array */ - struct cos_array_entry *array; - - /** reference */ - struct cos_reference *reference; - - } u; -}; - - -/** indirect object */ -struct cos_indirect_object { - /* reference identifier */ - struct cos_reference ref; - - /** offset of object */ - uint64_t offset; - - /* direct object if already decoded */ - struct cos_object *o; -}; - - -/** pdf document */ -struct pdf_doc { - uint8_t *buffer; - uint64_t buffer_length; - - uint8_t *start; /* start of pdf document in input stream */ - uint64_t length; - - int major; - int minor; - - /** - * Indirect object cross reference table - */ - uint64_t xref_size; - struct cos_indirect_object *xref_table; - - struct cos_object *root; - struct cos_object *encrypt; - struct cos_object *info; - struct cos_object *id; - -}; - - -int cos_decode_object(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out); - int read_whole_pdf(struct pdf_doc *doc, const char *fname) { @@ -178,46 +54,7 @@ read_whole_pdf(struct pdf_doc *doc, const char *fname) #define STARTXREF_SEARCH_SIZE 1024 -/* byte data acessory, allows for more complex buffer handling in future */ -#define DOC_BYTE(doc, offset) (doc->start[(offset)]) - -/** - * move offset to next non whitespace byte - */ -static int doc_skip_ws(struct pdf_doc *doc, uint64_t *offset) -{ - uint8_t c; - /* TODO sort out keeping offset in range */ - c = DOC_BYTE(doc, *offset); - while ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) { - (*offset)++; - /* skip comments */ - if ((bclass[c] & BC_CMNT) != 0) { - c = DOC_BYTE(doc, *offset); - while ((bclass[c] & BC_EOLM ) == 0) { - (*offset)++; - c = DOC_BYTE(doc, *offset); - } - } - c = DOC_BYTE(doc, *offset); - } - return 0; -} -/** - * move offset to next non eol byte - */ -static int doc_skip_eol(struct pdf_doc *doc, uint64_t *offset) -{ - uint8_t c; - /* TODO sort out keeping offset in range */ - c = DOC_BYTE(doc, *offset); - while ((bclass[c] & BC_EOLM) != 0) { - (*offset)++; - c = DOC_BYTE(doc, *offset); - } - return 0; -} static nspdferror doc_read_uint(struct pdf_doc *doc, uint64_t *offset_out, uint64_t *result_out) @@ -388,842 +225,7 @@ int check_header(struct pdf_doc *doc) } -nspdferror cos_free_object(struct cos_object *cos_obj) -{ - struct cos_dictionary_entry *dentry; - struct cos_array_entry *aentry; - - switch (cos_obj->type) { - case COS_TYPE_NAME: - free(cos_obj->u.n); - break; - - case COS_TYPE_STRING: - free(cos_obj->u.s->data); - free(cos_obj->u.s); - break; - - case COS_TYPE_DICTIONARY: - dentry = cos_obj->u.dictionary; - while (dentry != NULL) { - struct cos_dictionary_entry *odentry; - - cos_free_object(dentry->key); - cos_free_object(dentry->value); - - odentry = dentry; - dentry = dentry->next; - free(odentry); - } - break; - case COS_TYPE_ARRAY: - aentry = cos_obj->u.array; - while (aentry != NULL) { - struct cos_array_entry *oaentry; - - cos_free_object(aentry->value); - - oaentry = aentry; - aentry = aentry->next; - free(oaentry); - } - - case COS_TYPE_STREAM: - free(cos_obj->u.stream); - break; - - } - free(cos_obj); - - return NSPDFERROR_OK; -} - -int cos_decode_number(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) -{ - struct cos_object *cosobj; - uint8_t c; /* current byte from source data */ - unsigned int len; /* number of decimal places in number */ - uint8_t num[21]; /* temporary buffer for decimal values */ - uint64_t offset; /* current offset of source data */ - - offset = *offset_out; - - for (len = 0; len < sizeof(num); len++) { - c = DOC_BYTE(doc, offset); - if ((bclass[c] & BC_DCML) != BC_DCML) { - int64_t result = 0; /* parsed result */ - uint64_t tens; - - if (len == 0) { - return -2; /* parse error no decimals in input */ - } - /* sum value from each place */ - for (tens = 1; len > 0; tens = tens * 10, len--) { - result += (num[len - 1] * tens); - } - - doc_skip_ws(doc, &offset); - - cosobj = calloc(1, sizeof(struct cos_object)); - if (cosobj == NULL) { - return -1; /* memory error */ - } - - cosobj->type = COS_TYPE_INT; - cosobj->u.i = result; - - *cosobj_out = cosobj; - - *offset_out = offset; - - return 0; - } - num[len] = c - '0'; - offset++; - } - return -1; /* number too long */ -} - -#define COS_STRING_ALLOC 32 - -nspdferror -cos_string_append(struct cos_string *s, uint8_t c) -{ - //printf("appending 0x%x to %p len %d alloc %d\n", c, s->data, s->length, s->alloc); - if (s->length == s->alloc) { - uint8_t *ns; - ns = realloc(s->data, s->alloc + COS_STRING_ALLOC); - if (ns == NULL) { - return NSPDFERROR_NOMEM; - } - s->data = ns; - s->alloc += COS_STRING_ALLOC; - } - s->data[s->length++] = c; - return NSPDFERROR_OK; -} - -/** - * literal string processing - * - */ -nspdferror -cos_decode_string(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) -{ - uint64_t offset; - struct cos_object *cosobj; - uint8_t c; - unsigned int pdepth = 1; /* depth of open parens */ - struct cos_string *cstring; - - offset = *offset_out; - - c = DOC_BYTE(doc, offset++); - if (c != '(') { - return NSPDFERROR_SYNTAX; - } - - cstring = calloc(1, sizeof(*cstring)); - if (cstring == NULL) { - return NSPDFERROR_NOMEM; - } - - cosobj = calloc(1, sizeof(*cosobj)); - if (cosobj == NULL) { - return NSPDFERROR_NOMEM; - } - cosobj->type = COS_TYPE_STRING; - cosobj->u.s = cstring; - - while (pdepth > 0) { - c = DOC_BYTE(doc, offset++); - - if (c == ')') { - pdepth--; - if (pdepth == 0) { - break; - } - } else if (c == '(') { - pdepth++; - } else if ((bclass[c] & BC_EOLM ) != 0) { - /* unescaped end of line characters are translated to a single - * newline - */ - c = DOC_BYTE(doc, offset); - while ((bclass[c] & BC_EOLM) != 0) { - offset++; - c = DOC_BYTE(doc, offset); - } - c = '\n'; - } else if (c == '\\') { - /* escaped chars */ - c = DOC_BYTE(doc, offset++); - switch (c) { - case 'n': - c = '\n'; - break; - - case 'r': - c = '\r'; - break; - - case 't': - c = '\t'; - break; - - case 'b': - c = '\b'; - break; - - case 'f': - c = '\f'; - break; - - case '(': - c = '('; - break; - - case ')': - c = ')'; - break; - - case '\\': - c = '\\'; - break; - - default: - - if ((bclass[c] & BC_EOLM) != 0) { - /* escaped end of line, swallow it */ - c = DOC_BYTE(doc, offset++); - while ((bclass[c] & BC_EOLM) != 0) { - c = DOC_BYTE(doc, offset++); - } - } else if ((bclass[c] & BC_OCTL) != 0) { - /* octal value */ - uint8_t val; - val = (c - '0'); - c = DOC_BYTE(doc, offset); - if ((bclass[c] & BC_OCTL) != 0) { - offset++; - val = (val << 3) | (c - '0'); - c = DOC_BYTE(doc, offset); - if ((bclass[c] & BC_OCTL) != 0) { - offset++; - val = (val << 3) | (c - '0'); - c = val; - } - } - } /* else invalid (skip backslash) */ - break; - } - } - - /* c contains the character to add to the string */ - cos_string_append(cstring, c); - } - - doc_skip_ws(doc, &offset); - - *cosobj_out = cosobj; - *offset_out = offset; - - return NSPDFERROR_OK; -} - -uint8_t xtoi(uint8_t x) -{ - if (x >= '0' && x <= '9') { - x = x - '0'; - } else if (x >= 'a' && x <='f') { - x = x - 'a' + 10; - } else if (x >= 'A' && x <='F') { - x = x - 'A' + 10; - } - return x; -} - -nspdferror -cos_decode_hex_string(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) -{ - uint64_t offset; - struct cos_object *cosobj; - uint8_t c; - uint8_t value = 0; - struct cos_string *cstring; - bool first = true; - - offset = *offset_out; - - c = DOC_BYTE(doc, offset++); - if (c != '<') { - return NSPDFERROR_SYNTAX; - } - - cstring = calloc(1, sizeof(*cstring)); - if (cstring == NULL) { - return NSPDFERROR_NOMEM; - } - - cosobj = calloc(1, sizeof(*cosobj)); - if (cosobj == NULL) { - return NSPDFERROR_NOMEM; - } - cosobj->type = COS_TYPE_STRING; - cosobj->u.s = cstring; - - for (; offset < doc->length; offset++) { - c = DOC_BYTE(doc, offset); - if (c == '>') { - if (first == false) { - cos_string_append(cstring, value); - } - offset++; - doc_skip_ws(doc, &offset); - - *cosobj_out = cosobj; - *offset_out = offset; - - return NSPDFERROR_OK; - } else if ((bclass[c] & BC_HEXL) != 0) { - if (first) { - value = xtoi(c) << 4; - first = false; - } else { - value |= xtoi(c); - first = true; - cos_string_append(cstring, value); - } - } else if ((bclass[c] & BC_WSPC) == 0) { - break; /* unknown byte value in string */ - } - } - return NSPDFERROR_SYNTAX; -} - - -int cos_decode_dictionary(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) -{ - uint64_t offset; - struct cos_object *cosobj; - struct cos_dictionary_entry *entry; - struct cos_object *key; - struct cos_object *value; - int res; - - offset = *offset_out; - - if ((DOC_BYTE(doc, offset) != '<') || - (DOC_BYTE(doc, offset + 1) != '<')) { - return -1; /* syntax error */ - } - offset += 2; - doc_skip_ws(doc, &offset); - - printf("found a dictionary\n"); - - cosobj = calloc(1, sizeof(struct cos_object)); - if (cosobj == NULL) { - return -1; /* memory error */ - } - cosobj->type = COS_TYPE_DICTIONARY; - - while ((DOC_BYTE(doc, offset) != '>') && - (DOC_BYTE(doc, offset + 1) != '>')) { - - res = cos_decode_object(doc, &offset, &key); - if (res != 0) { - /* todo free up any dictionary entries already created */ - printf("key object decode failed\n"); - return res; - } - if (key->type != COS_TYPE_NAME) { - /* key value pairs without a name */ - printf("key was %d not a name %d\n", key->type, COS_TYPE_NAME); - return -1; /* syntax error */ - } - printf("key: %s\n", key->u.n); - - res = cos_decode_object(doc, &offset, &value); - if (res != 0) { - printf("Unable to decode value object in dictionary\n"); - /* todo free up any dictionary entries already created */ - return res; - } - - /* add dictionary entry */ - entry = calloc(1, sizeof(struct cos_dictionary_entry)); - if (entry == NULL) { - /* todo free up any dictionary entries already created */ - return -1; /* memory error */ - } - - entry->key = key; - entry->value = value; - entry->next = cosobj->u.dictionary; - - cosobj->u.dictionary = entry; - - } - offset += 2; /* skip closing >> */ - doc_skip_ws(doc, &offset); - - *cosobj_out = cosobj; - *offset_out = offset; - - return 0; -} - - -nspdferror -cos_decode_list(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) -{ - uint64_t offset; - struct cos_object *cosobj; - struct cos_array_entry *entry; - struct cos_object *value; - nspdferror res; - - offset = *offset_out; - - /* sanity check first token is list open */ - if (DOC_BYTE(doc, offset) != '[') { - printf("not a [\n"); - return NSPDFERROR_SYNTAX; /* syntax error */ - } - offset++; - - /* advance offset to next token */ - res = doc_skip_ws(doc, &offset); - if (res != NSPDFERROR_OK) { - return res; - } - - printf("found a list\n"); - - cosobj = calloc(1, sizeof(struct cos_object)); - if (cosobj == NULL) { - return NSPDFERROR_NOMEM; - } - cosobj->type = COS_TYPE_ARRAY; - - while (DOC_BYTE(doc, offset) != ']') { - - res = cos_decode_object(doc, &offset, &value); - if (res != NSPDFERROR_OK) { - cos_free_object(cosobj); - printf("Unable to decode value object in list\n"); - return res; - } - - /* add entry to array */ - entry = calloc(1, sizeof(struct cos_array_entry)); - if (entry == NULL) { - cos_free_object(cosobj); - return NSPDFERROR_NOMEM; - } - - entry->value = value; - entry->next = cosobj->u.array; - - cosobj->u.array = entry; - } - offset++; /* skip closing ] */ - - doc_skip_ws(doc, &offset); - - *cosobj_out = cosobj; - *offset_out = offset; - - return 0; -} - -#define NAME_MAX_LENGTH 127 - -/** - * decode a name object - * - * \todo deal with # symbols on pdf versions 1.2 and later - */ -int cos_decode_name(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) -{ - uint64_t offset; - struct cos_object *cosobj; - uint8_t c; - char name[NAME_MAX_LENGTH + 1]; - int idx = 0; - - offset = *offset_out; - - c = DOC_BYTE(doc, offset++); - if (c != '/') { - return -1; /* names must be prefixed with a / */ - } - printf("found a name\n"); - - c = DOC_BYTE(doc, offset); - while ((idx <= NAME_MAX_LENGTH) && - ((bclass[c] & (BC_WSPC | BC_DELM)) == 0)) { - offset++; - //printf("%c", c); - name[idx++] = c; - c = DOC_BYTE(doc, offset); - } - //printf("\nidx: %d\n", idx); - if (idx > NAME_MAX_LENGTH) { - /* name length exceeded implementation limit */ - return -1; - } - name[idx] = 0; - - //printf("name: %s\n", name); - - doc_skip_ws(doc, &offset); - - cosobj = calloc(1, sizeof(struct cos_object)); - if (cosobj == NULL) { - return -1; /* memory error */ - } - - cosobj->type = COS_TYPE_NAME; - cosobj->u.n = strdup(name); - - *cosobj_out = cosobj; - - *offset_out = offset; - - return 0; -} - - -int cos_decode_boolean(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) -{ - uint64_t offset; - struct cos_object *cosobj; - uint8_t c; - bool value; - - offset = *offset_out; - - c = DOC_BYTE(doc, offset++); - if ((c == 't') || (c == 'T')) { - /* true branch */ - - c = DOC_BYTE(doc, offset++); - if ((c != 'r') && (c != 'R')) { - return -1; /* syntax error */ - } - c = DOC_BYTE(doc, offset++); - if ((c != 'u') && (c != 'U')) { - return -1; /* syntax error */ - } - c = DOC_BYTE(doc, offset++); - if ((c != 'e') && (c != 'E')) { - return -1; /* syntax error */ - } - value = true; - - } else if ((c == 'f') || (c == 'F')) { - /* false branch */ - - c = DOC_BYTE(doc, offset++); - if ((c != 'a') && (c != 'A')) { - return -1; /* syntax error */ - } - c = DOC_BYTE(doc, offset++); - if ((c != 'l') && (c != 'L')) { - return -1; /* syntax error */ - } - c = DOC_BYTE(doc, offset++); - if ((c != 's') && (c != 'S')) { - return -1; /* syntax error */ - } - c = DOC_BYTE(doc, offset++); - if ((c != 'e') && (c != 'E')) { - return -1; /* syntax error */ - } - - value = false; - - } else { - return -1; /* syntax error */ - } - - doc_skip_ws(doc, &offset); - - cosobj = calloc(1, sizeof(struct cos_object)); - if (cosobj == NULL) { - return -1; /* memory error */ - } - - cosobj->type = COS_TYPE_BOOL; - cosobj->u.b = value; - - *cosobj_out = cosobj; - - *offset_out = offset; - - return 0; - -} - -int cos_decode_null(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) -{ - uint64_t offset; - struct cos_object *cosobj; - uint8_t c; - - offset = *offset_out; - - c = DOC_BYTE(doc, offset++); - if ((c != 'n') && (c != 'N')) { - return -1; /* syntax error */ - } - c = DOC_BYTE(doc, offset++); - if ((c != 'u') && (c != 'U')) { - return -1; /* syntax error */ - } - c = DOC_BYTE(doc, offset++); - if ((c != 'l') && (c != 'L')) { - return -1; /* syntax error */ - } - c = DOC_BYTE(doc, offset++); - if ((c != 'l') && (c != 'L')) { - return -1; /* syntax error */ - } - - doc_skip_ws(doc, &offset); - - cosobj = calloc(1, sizeof(struct cos_object)); - if (cosobj == NULL) { - return -1; /* memory error */ - } - - cosobj->type = COS_TYPE_NULL; - *offset_out = offset; - - return 0; -} - -/** - * attempt to decode the stream into a reference - * - * The stream has already had a positive integer decoded from it. if another - * positive integer follows and a R character after that it is a reference, - * otherwise bail, but not finding a ref is not an error! - * - * \param doc the pdf document - * \param offset_out offset of current cursor in stream - * \param cosobj_out the object to return into, on input contains the first - * integer - */ -int cos_attempt_decode_reference(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) -{ - uint64_t offset; - struct cos_object *cosobj; /* possible generation object */ - uint8_t c; - int res; - struct cos_reference *nref; /* new reference */ - - offset = *offset_out; - - res = cos_decode_number(doc, &offset, &cosobj); - if (res != 0) { - return 0; /* no error if object could not be decoded */ - } - - if (cosobj->type != COS_TYPE_INT) { - /* next object was not an integer so not a reference */ - cos_free_object(cosobj); - return 0; - } - - if (cosobj->u.i < 0) { - /* integer was negative so not a reference (generations must be - * non-negative - */ - cos_free_object(cosobj); - return 0; - - } - - /* two int in a row, look for the R */ - c = DOC_BYTE(doc, offset++); - if (c != 'R') { - /* no R so not a reference */ - cos_free_object(cosobj); - return 0; - } - - /* found reference */ - - printf("found reference\n"); - doc_skip_ws(doc, &offset); - - nref = calloc(1, sizeof(struct cos_reference)); - if (nref == NULL) { - /* todo free objects */ - return -1; /* memory error */ - } - - nref->id = (*cosobj_out)->u.i; - nref->generation = cosobj->u.i; - - cos_free_object(*cosobj_out); - - cosobj->type = COS_TYPE_REFERENCE; - cosobj->u.reference = nref; - - *cosobj_out = cosobj; - - *offset_out = offset; - - return 0; -} - -/** - * Decode input stream into an object - * - * lex and parse a byte stream to generate COS objects - * - * lexing the input. - * check first character: - * - * < either a hex string or a dictionary - * second char < means dictionary else hex string - * - either an integer or real - * + either an integer or real - * 0-9 an integer, unsigned integer or real - * . a real number - * ( a string - * / a name - * [ a list - * t|T boolean true - * f|F boolean false - * n|N null - * - * Grammar is: - * cos_object: - * TOK_NULL | - * TOK_BOOLEAN | - * TOK_INT | - * TOK_REAL | - * TOK_NAME | - * TOK_STRING | - * list | - * dictionary | - * object_reference; - * - * list: - * '[' listargs ']'; - * - * listargs: - * cos_object - * | - * listargs cos_object - * ; - * - * object_reference: - * TOK_UINT TOK_UINT 'R'; - */ -int cos_decode_object(struct pdf_doc *doc, - uint64_t *offset_out, - struct cos_object **cosobj_out) -{ - uint64_t offset; - int res; - struct cos_object *cosobj; - - offset = *offset_out; - - /* object could be any type use first char to try and select */ - switch (DOC_BYTE(doc, offset)) { - - case '-': - case '+': - case '.': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - res = cos_decode_number(doc, &offset, &cosobj); - /* if type is positive integer try to check for reference */ - if ((res == 0) && - (cosobj->type == COS_TYPE_INT) && - (cosobj->u.i > 0)) { - res = cos_attempt_decode_reference(doc, &offset, &cosobj); - } - break; - - case '<': - if (DOC_BYTE(doc, offset + 1) == '<') { - res = cos_decode_dictionary(doc, &offset, &cosobj); - } else { - res = cos_decode_hex_string(doc, &offset, &cosobj); - } - break; - - case '(': - res = cos_decode_string(doc, &offset, &cosobj); - break; - - case '/': - res = cos_decode_name(doc, &offset, &cosobj); - break; - - case '[': - res = cos_decode_list(doc, &offset, &cosobj); - break; - - case 't': - case 'T': - case 'f': - case 'F': - res = cos_decode_boolean(doc, &offset, &cosobj); - break; - - case 'n': - case 'N': - res = cos_decode_null(doc, &offset, &cosobj); - break; - - default: - res = -1; /* syntax error */ - } - - - if (res == 0) { - *cosobj_out = cosobj; - *offset_out = offset; - } - - return res; -} @@ -1335,7 +337,7 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out) if ((DOC_BYTE(doc, offset++) == 'n')) { if (objnumber < doc->xref_size) { - struct cos_indirect_object *indobj; + struct xref_table_entry *indobj; indobj = doc->xref_table + objnumber; indobj->ref.id = objnumber; @@ -1357,69 +359,6 @@ decode_xref(struct pdf_doc *doc, uint64_t *offset_out) return NSPDFERROR_OK; } -nspdferror -cos_dictionary_get_value(struct cos_object *dict, - const char *key, - struct cos_object **value_out) -{ - struct cos_dictionary_entry *entry; - - if (dict->type != COS_TYPE_DICTIONARY) { - return NSPDFERROR_TYPE; - } - - entry = dict->u.dictionary; - while (entry != NULL) { - if (strcmp(entry->key->u.n, key) == 0) { - *value_out = entry->value; - return NSPDFERROR_OK; - } - entry = entry->next; - } - return NSPDFERROR_NOTFOUND; -} - -/** - * extracts a value for a key in a dictionary. - * - * this finds and returns a value for a given key removing it from a dictionary - */ -nspdferror -cos_dictionary_extract_value(struct cos_object *dict, - const char *key, - struct cos_object **value_out) -{ - struct cos_dictionary_entry *entry; - struct cos_dictionary_entry **prev; - - if (dict->type != COS_TYPE_DICTIONARY) { - return NSPDFERROR_TYPE; - } - - prev = &dict->u.dictionary; - entry = *prev; - while (entry != NULL) { - if (strcmp(entry->key->u.n, key) == 0) { - *value_out = entry->value; - *prev = entry->next; - cos_free_object(entry->key); - free(entry); - return NSPDFERROR_OK; - } - prev = &entry->next; - entry = *prev; - } - return NSPDFERROR_NOTFOUND; -} - -nspdferror cos_get_int(struct cos_object *cobj, int64_t *value_out) -{ - if (cobj->type != COS_TYPE_INT) { - return NSPDFERROR_TYPE; - } - *value_out = cobj->u.i; - return NSPDFERROR_OK; -} /** * recursively parse trailers and xref tables @@ -1480,7 +419,7 @@ nspdferror decode_xref_trailer(struct pdf_doc *doc, uint64_t xref_offset) goto decode_xref_trailer_failed; } - doc->xref_table = calloc(size, sizeof(struct cos_indirect_object)); + doc->xref_table = calloc(size, sizeof(struct xref_table_entry)); if (doc->xref_table == NULL) { res = NSPDFERROR_NOMEM; goto decode_xref_trailer_failed; @@ -1580,7 +519,12 @@ nspdferror decode_trailers(struct pdf_doc *doc) nspdferror decode_catalog(struct pdf_doc *doc) { - return NSPDFERROR_OK; + nspdferror res; + struct cos_object *catalog; + + res = cos_get_dictionary(doc->root, &catalog); + + return res; } nspdferror new_pdf_doc(struct pdf_doc **doc_out) |