From e9d3ec85ad043523a47c0eef2a1662e79184e3b3 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Fri, 19 Jan 2018 23:55:30 +0000 Subject: convert to using stream offset type for stream offsets --- src/cos_object.c | 1 + src/cos_object.h | 14 +- src/cos_parse.c | 478 ++++++++++++++++++++++++++++++++++++++++++++++++------- src/cos_parse.h | 5 +- src/cos_stream.h | 36 +++++ src/document.c | 41 ++--- src/pdf_doc.c | 24 +-- src/pdf_doc.h | 30 +--- src/xref.c | 38 +++-- src/xref.h | 39 +++++ 10 files changed, 564 insertions(+), 142 deletions(-) create mode 100644 src/cos_stream.h create mode 100644 src/xref.h diff --git a/src/cos_object.c b/src/cos_object.c index 4398822..c7ec4e6 100644 --- a/src/cos_object.c +++ b/src/cos_object.c @@ -16,6 +16,7 @@ #include +#include "xref.h" #include "cos_object.h" #include "cos_parse.h" #include "pdf_doc.h" diff --git a/src/cos_object.h b/src/cos_object.h index 9b98694..c5b85fa 100644 --- a/src/cos_object.h +++ b/src/cos_object.h @@ -15,6 +15,8 @@ #ifndef NSPDF__COS_OBJECT_H_ #define NSPDF__COS_OBJECT_H_ +#include "cos_stream.h" + struct nspdf_doc; struct content_operation; @@ -75,12 +77,6 @@ struct cos_reference { uint64_t generation; /**< generation of indirect object */ }; -struct cos_stream { - unsigned int length; /**< decoded stream length */ - size_t alloc; /**< memory allocated for stream */ - const uint8_t *data; /**< decoded stream data */ -}; - /** * Synthetic parsed content object. @@ -102,7 +98,7 @@ struct cos_object { int64_t i; /** real */ - double r; + float real; /** name */ char *n; @@ -113,10 +109,10 @@ struct cos_object { /** stream data */ struct cos_stream *stream; - /* dictionary */ + /** dictionary */ struct cos_dictionary_entry *dictionary; - /* array */ + /** array */ struct cos_array *array; /** reference */ diff --git a/src/cos_parse.c b/src/cos_parse.c index d0e50f5..5c8c702 100644 --- a/src/cos_parse.c +++ b/src/cos_parse.c @@ -63,7 +63,7 @@ static uint8_t xtoi(uint8_t x) */ static nspdferror cos_parse_number(struct cos_stream *stream, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **cosobj_out) { nspdferror res; @@ -71,12 +71,21 @@ cos_parse_number(struct cos_stream *stream, uint8_t c; /* current byte from source data */ unsigned int len; /* number of decimal places in number */ uint8_t num[21]; /* temporary buffer for decimal values */ - uint64_t offset; /* current offset of source data */ + strmoff_t offset; /* current offset of source data */ + unsigned int point; + bool real = false; offset = *offset_out; for (len = 0; len < sizeof(num); len++) { c = stream_byte(stream, offset); + if (c == '.') { + real = true; + point = len; + offset++; + c = stream_byte(stream, offset); + } + if ((bclass[c] & BC_DCML) != BC_DCML) { int64_t result = 0; /* parsed result */ uint64_t tens; @@ -85,6 +94,9 @@ cos_parse_number(struct cos_stream *stream, /* parse error no decimals in input */ return NSPDFERROR_SYNTAX; } + + point = len - point; + /* sum value from each place */ for (tens = 1; len > 0; tens = tens * 10, len--) { result += (num[len - 1] * tens); @@ -100,8 +112,18 @@ cos_parse_number(struct cos_stream *stream, return NSPDFERROR_NOMEM; } - cosobj->type = COS_TYPE_INT; - cosobj->u.i = result; + if (real) { + unsigned int div = 1; + for (; point > 0;point--) { + div = div * 10; + } + cosobj->type = COS_TYPE_REAL; + cosobj->u.real = (float)result / div; + printf("real %d %f\n", result, cosobj->u.real); + } else { + cosobj->type = COS_TYPE_INT; + cosobj->u.i = result; + } *cosobj_out = cosobj; @@ -122,10 +144,10 @@ cos_parse_number(struct cos_stream *stream, */ static nspdferror cos_parse_string(struct cos_stream *stream, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **cosobj_out) { - uint64_t offset; + strmoff_t offset; struct cos_object *cosobj; uint8_t c; unsigned int pdepth = 1; /* depth of open parens */ @@ -251,10 +273,10 @@ cos_parse_string(struct cos_stream *stream, */ static nspdferror cos_parse_hex_string(struct cos_stream *stream, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **cosobj_out) { - uint64_t offset; + strmoff_t offset; struct cos_object *cosobj; uint8_t c; uint8_t value = 0; @@ -315,15 +337,15 @@ cos_parse_hex_string(struct cos_stream *stream, static nspdferror cos_parse_dictionary(struct nspdf_doc *doc, struct cos_stream *stream, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **cosobj_out) { - uint64_t offset; + nspdferror res; + strmoff_t offset; struct cos_object *cosobj; struct cos_dictionary_entry *entry; struct cos_object *key; struct cos_object *value; - int res; offset = *offset_out; @@ -404,10 +426,10 @@ cos_parse_dictionary_error: static nspdferror cos_parse_list(struct nspdf_doc *doc, struct cos_stream *stream, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **cosobj_out) { - uint64_t offset; + strmoff_t offset; struct cos_object *cosobj; struct cos_array *array; struct cos_object *value; @@ -485,10 +507,10 @@ cos_parse_list(struct nspdf_doc *doc, */ static nspdferror cos_parse_name(struct cos_stream *stream, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **cosobj_out) { - uint64_t offset; + strmoff_t offset; struct cos_object *cosobj; uint8_t c; char name[NAME_MAX_LENGTH + 1]; @@ -543,10 +565,10 @@ cos_parse_name(struct cos_stream *stream, */ static nspdferror cos_parse_boolean(struct cos_stream *stream, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **cosobj_out) { - uint64_t offset; + strmoff_t offset; struct cos_object *cosobj; uint8_t c; bool value; @@ -625,10 +647,10 @@ cos_parse_boolean(struct cos_stream *stream, */ static nspdferror cos_parse_null(struct cos_stream *stream, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **cosobj_out) { - uint64_t offset; + strmoff_t offset; struct cos_object *cosobj; uint8_t c; @@ -676,13 +698,13 @@ cos_parse_null(struct cos_stream *stream, static nspdferror cos_parse_stream(struct nspdf_doc *doc, struct cos_stream *stream_in, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **cosobj_out) { struct cos_object *cosobj; nspdferror res; struct cos_object *stream_dict; - uint64_t offset; + strmoff_t offset; struct cos_object *stream_filter; struct cos_stream *stream; int64_t stream_length; @@ -811,11 +833,11 @@ cos_parse_stream(struct nspdf_doc *doc, static nspdferror cos_attempt_parse_reference(struct nspdf_doc *doc, struct cos_stream *stream, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **cosobj_out) { nspdferror res; - uint64_t offset; + strmoff_t offset; uint8_t c; struct cos_object *generation; /* generation object */ @@ -992,10 +1014,10 @@ cos_attempt_parse_reference(struct nspdf_doc *doc, nspdferror cos_parse_object(struct nspdf_doc *doc, struct cos_stream *stream, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **cosobj_out) { - uint64_t offset; + strmoff_t offset; nspdferror res; struct cos_object *cosobj; @@ -1008,34 +1030,24 @@ cos_parse_object(struct nspdf_doc *doc, /* object could be any type use first char to try and select */ switch (stream_byte(stream, offset)) { - case '-': - case '+': - case '.': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': + case '-': case '+': case '.': case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': case '8': case '9': res = cos_parse_number(stream, &offset, &cosobj); /* if type is positive integer try to check for reference */ - if ((res == 0) && + if ((res == NSPDFERROR_OK) && (cosobj->type == COS_TYPE_INT) && (cosobj->u.i > 0)) { res = cos_attempt_parse_reference(doc, stream, &offset, &cosobj); } break; - case '<': - if (stream_byte(stream, offset + 1) == '<') { - res = cos_parse_dictionary(doc, stream, &offset, &cosobj); - } else { - res = cos_parse_hex_string(stream, &offset, &cosobj); - } + case 't': + case 'f': + res = cos_parse_boolean(stream, &offset, &cosobj); + break; + + case 'n': + res = cos_parse_null(stream, &offset, &cosobj); break; case '(': @@ -1046,46 +1058,392 @@ cos_parse_object(struct nspdf_doc *doc, res = cos_parse_name(stream, &offset, &cosobj); break; + case '<': + if (stream_byte(stream, offset + 1) == '<') { + res = cos_parse_dictionary(doc, stream, &offset, &cosobj); + } else { + res = cos_parse_hex_string(stream, &offset, &cosobj); + } + break; + case '[': res = cos_parse_list(doc, stream, &offset, &cosobj); break; - case 't': - case 'T': + default: + res = NSPDFERROR_SYNTAX; /* syntax error */ + } + + if (res == NSPDFERROR_OK) { + *cosobj_out = cosobj; + *offset_out = offset; + } + + return res; +} + + +static nspdferror +parse_operator(struct cos_stream *stream, + strmoff_t *offset_out, + enum content_operator *operator_out) +{ + nspdferror res; + strmoff_t offset; + enum content_operator operator; + uint8_t c; + + offset = *offset_out; + + switch (stream_byte(stream, offset++)) { + case 'b': + //CONTENT_OP_b + //CONTENT_OP_b_ + break; + + case 'B': + operator = CONTENT_OP_B; + c = stream_byte(stream, offset); + if ((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) { + switch (c) { + case '*': + operator = CONTENT_OP_B_; + offset++; + break; + + case 'I': + operator = CONTENT_OP_BI; + offset++; + break; + + case 'T': + operator = CONTENT_OP_BT; + offset++; + break; + + case 'X': + operator = CONTENT_OP_BX; + offset++; + break; + + case 'M': + if (stream_byte(stream, offset + 1) == 'C') { + operator = CONTENT_OP_BMC; + offset+=2; + } + break; + + case 'D': + if (stream_byte(stream, offset + 1) == 'C') { + operator = CONTENT_OP_BDC; + offset+=2; + } + break; + + default: + goto parse_operator_nomatch; + } + c = stream_byte(stream, offset); + } + break; + + case 'c': + //CONTENT_OP_c + //CONTENT_OP_cm + //CONTENT_OP_cs + break; + + case 'C': + //CONTENT_OP_CS + break; + + case 'd': + //CONTENT_OP_d + //CONTENT_OP_d0 + //CONTENT_OP_d1 + break; + + case 'D': + //CONTENT_OP_Do + //CONTENT_OP_DP + break; + + case 'E': + //CONTENT_OP_EI + //CONTENT_OP_EMC + //CONTENT_OP_ET + //CONTENT_OP_EX + break; + case 'f': + //CONTENT_OP_f + //CONTENT_OP_f_ + break; + case 'F': - res = cos_parse_boolean(stream, &offset, &cosobj); + //CONTENT_OP_F + break; + + case 'G': + //CONTENT_OP_G + break; + + case 'g': + operator = CONTENT_OP_g; + c = stream_byte(stream, offset); + if (((bclass[c] & (BC_WSPC | BC_CMNT) ) == 0) && (c == 's')) { + operator = CONTENT_OP_gs; + offset++; + } + c = stream_byte(stream, offset); + break; + + case 'h': + //CONTENT_OP_h + break; + + case 'i': + //CONTENT_OP_i + break; + + case 'I': + //CONTENT_OP_ID + break; + + case 'j': + //CONTENT_OP_j + break; + + case 'J': + //CONTENT_OP_J + break; + + case 'K': + operator = CONTENT_OP_K; + c = stream_byte(stream, offset); + break; + + case 'k': + operator = CONTENT_OP_k; + c = stream_byte(stream, offset); + break; + + case 'l': + operator = CONTENT_OP_l; + c = stream_byte(stream, offset); + break; + + case 'm': + break; + + case 'M': break; case 'n': - case 'N': - res = cos_parse_null(stream, &offset, &cosobj); + break; + + case 'q': + break; + + case 'Q': + break; + + case 'r': + break; + + case 'R': + break; + + case 's': + break; + + case 'S': + break; + + case 'T': + switch (stream_byte(stream, offset++)) { + case '*': + operator = CONTENT_OP_T_; + break; + + case 'c': + operator = CONTENT_OP_Tc; + break; + + case 'd': + operator = CONTENT_OP_Td; + break; + + case 'D': + operator = CONTENT_OP_TD; + break; + + case 'f': + operator = CONTENT_OP_Tf; + break; + + case 'j': + operator = CONTENT_OP_Tj; + break; + + case 'J': + operator = CONTENT_OP_TJ; + break; + + case 'L': + operator = CONTENT_OP_TL; + break; + + case 'm': + operator = CONTENT_OP_Tm; + break; + + case 'r': + operator = CONTENT_OP_Tr; + break; + + case 's': + operator = CONTENT_OP_Ts; + break; + + case 'w': + operator = CONTENT_OP_Tw; + break; + + case 'z': + operator = CONTENT_OP_Tz; + break; + + default: + goto parse_operator_nomatch; + } + + c = stream_byte(stream, offset); + break; + + case 'v': + break; + + case 'w': + break; + + case 'W': + break; + + case 'Y': + break; + + case '\'': + break; + + case '"': break; default: - res = NSPDFERROR_SYNTAX; /* syntax error */ + goto parse_operator_nomatch; } - if (res == NSPDFERROR_OK) { - *cosobj_out = cosobj; - *offset_out = offset; + /* matched prefix must be followed by a space */ + if ((bclass[c] & (BC_WSPC | BC_CMNT) ) != 0) { + res = nspdf__stream_skip_ws(stream, &offset); + if (res == NSPDFERROR_OK) { + *operator_out = operator; + *offset_out = offset; + } + return res; } - return res; +parse_operator_nomatch: + return NSPDFERROR_SYNTAX; } +#define MAX_OPERAND_COUNT 32 static nspdferror parse_content_operation(struct nspdf_doc *doc, struct cos_stream *stream, - unsigned int *offset_out, + strmoff_t *offset_out, struct content_operation *operation_out) { - unsigned int offset; + strmoff_t offset; + nspdferror res; + enum content_operator operator; + struct cos_object *operands[MAX_OPERAND_COUNT]; + unsigned int operand_idx = 0; offset = *offset_out; - offset+=stream->length; + res = parse_operator(stream, &offset, &operator); + while (res == NSPDFERROR_SYNTAX) { + /* was not an operator so check for what else it could have been */ + if (operand_idx >= MAX_OPERAND_COUNT) { + /** \todo free any stacked operands */ + printf("too many operands\n"); + return NSPDFERROR_SYNTAX; + } + + switch (stream_byte(stream, offset)) { + + case '-': case '+': case '.': case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': case '8': case '9': + res = cos_parse_number(stream, &offset, &operands[operand_idx]); + break; + + case 't': + case 'f': + res = cos_parse_boolean(stream, &offset, &operands[operand_idx]); + break; + + case 'n': + res = cos_parse_null(stream, &offset, &operands[operand_idx]); + break; + + case '(': + res = cos_parse_string(stream, &offset, &operands[operand_idx]); + break; + + case '/': + res = cos_parse_name(stream, &offset, &operands[operand_idx]); + break; + + case '[': + res = cos_parse_list(doc, stream, &offset, &operands[operand_idx]); + break; + + case '<': + if (stream_byte(stream, offset + 1) == '<') { + res = cos_parse_dictionary(doc, + stream, + &offset, + &operands[operand_idx]); + } else { + res = cos_parse_hex_string(stream, + &offset, + &operands[operand_idx]); + } + break; + + default: + printf("unknown operand type\n"); + res = NSPDFERROR_SYNTAX; /* syntax error */ + } + + if (res != NSPDFERROR_OK) { + /* parse error */ + /** \todo free any stacked operands */ + printf("operand parse failed at %c\n", + stream_byte(stream, offset)); + return res; + } + + /* move to next operand */ + operand_idx++; + + res = parse_operator(stream, &offset, &operator); + } + + operation_out->operator = operator; + printf("returning operator %d with %d operands\n", operator, operand_idx); *offset_out = offset; return NSPDFERROR_OK; @@ -1098,9 +1456,9 @@ cos_parse_content_stream(struct nspdf_doc *doc, { nspdferror res; struct cos_object *cosobj; - unsigned int offset; + strmoff_t offset; - //printf("%.*s", (int)stream->length, stream->data); + printf("%.*s", (int)stream->length, stream->data); cosobj = calloc(1, sizeof(struct cos_object)); if (cosobj == NULL) { diff --git a/src/cos_parse.h b/src/cos_parse.h index e7f1ce0..a9cb9c9 100644 --- a/src/cos_parse.h +++ b/src/cos_parse.h @@ -15,16 +15,17 @@ #ifndef NSPDF__COS_PARSE_H_ #define NSPDF__COS_PARSE_H_ +#include "cos_stream.h" + struct nspdf_doc; struct cos_object; -struct cos_stream; /** * Parse input stream into an object * * lex and parse a byte stream to generate a COS object. */ -nspdferror cos_parse_object(struct nspdf_doc *doc, struct cos_stream *stream, uint64_t *offset_out, struct cos_object **cosobj_out); +nspdferror cos_parse_object(struct nspdf_doc *doc, struct cos_stream *stream, strmoff_t *offset_out, struct cos_object **cosobj_out); /** * Parse content stream into content operations object diff --git a/src/cos_stream.h b/src/cos_stream.h new file mode 100644 index 0000000..0a4992c --- /dev/null +++ b/src/cos_stream.h @@ -0,0 +1,36 @@ +/* + * Copyright 2018 Vincent Sanders + * + * This file is part of libnspdf. + * + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + */ + +/** + * \file + * NetSurf PDF library COS stream + */ + +#ifndef NSPDF__COS_STREAM_H_ +#define NSPDF__COS_STREAM_H_ + +/* stream offset type */ +typedef unsigned int strmoff_t; + +/** + * stream of data. + */ +struct cos_stream { + strmoff_t length; /**< decoded stream length */ + size_t alloc; /**< memory allocated for stream */ + const uint8_t *data; /**< decoded stream data */ +}; + +static inline uint8_t +stream_byte(struct cos_stream *stream, strmoff_t offset) +{ + return *(stream->data + offset); +} + +#endif diff --git a/src/document.c b/src/document.c index 36d4c63..dcf8395 100644 --- a/src/document.c +++ b/src/document.c @@ -19,10 +19,14 @@ #include "cos_parse.h" #include "byte_class.h" #include "cos_object.h" +#include "xref.h" #include "pdf_doc.h" #define SLEN(x) (sizeof((x)) - 1) +/* byte data acessory, allows for more complex buffer handling in future */ +#define DOC_BYTE(doc, offset) (doc->start[(offset)]) + #define STARTXREF_TOK "startxref" /* Number of bytes to search back from file end to find xref start token, @@ -34,10 +38,11 @@ /** * finds the startxref marker at the end of input */ -static nspdferror find_startxref(struct nspdf_doc *doc, uint64_t *offset_out) +static nspdferror +find_startxref(struct nspdf_doc *doc, strmoff_t *offset_out) { - uint64_t offset; /* offset of characters being considered for startxref */ - uint64_t earliest; /* earliest offset to serch for startxref */ + strmoff_t offset; /* offset of characters being considered for startxref */ + unsigned int earliest; /* earliest offset to serch for startxref */ offset = doc->length - SLEN(STARTXREF_TOK); @@ -70,10 +75,10 @@ static nspdferror find_startxref(struct nspdf_doc *doc, uint64_t *offset_out) */ static nspdferror decode_startxref(struct nspdf_doc *doc, - uint64_t *offset_out, - uint64_t *start_xref_out) + strmoff_t *offset_out, + unsigned int *start_xref_out) { - uint64_t offset; /* offset of characters being considered for startxref */ + strmoff_t offset; /* offset of characters being considered for startxref */ uint64_t start_xref; nspdferror res; @@ -97,12 +102,12 @@ decode_startxref(struct nspdf_doc *doc, return res; } - res = doc_read_uint(doc, &offset, &start_xref); + res = nspdf__stream_read_uint(doc->stream, &offset, &start_xref); if (res != NSPDFERROR_OK) { return res; } - res = doc_skip_eol(doc, &offset); + res = nspdf__stream_skip_eol(doc->stream, &offset); if (res != NSPDFERROR_OK) { return res; } @@ -126,9 +131,9 @@ decode_startxref(struct nspdf_doc *doc, /** * finds the next trailer */ -static nspdferror find_trailer(struct nspdf_doc *doc, uint64_t *offset_out) +static nspdferror find_trailer(struct nspdf_doc *doc, strmoff_t *offset_out) { - uint64_t offset; /* offset of characters being considered for trailer */ + strmoff_t offset; /* offset of characters being considered for trailer */ for (offset = *offset_out;offset < doc->length; offset++) { if ((DOC_BYTE(doc, offset ) == 't') && @@ -148,12 +153,12 @@ static nspdferror find_trailer(struct nspdf_doc *doc, uint64_t *offset_out) static nspdferror decode_trailer(struct nspdf_doc *doc, - uint64_t *offset_out, + strmoff_t *offset_out, struct cos_object **trailer_out) { struct cos_object *trailer; int res; - uint64_t offset; + strmoff_t offset; offset = *offset_out; @@ -193,11 +198,11 @@ decode_trailer(struct nspdf_doc *doc, * recursively parse trailers and xref tables */ static nspdferror -decode_xref_trailer(struct nspdf_doc *doc, uint64_t xref_offset) +decode_xref_trailer(struct nspdf_doc *doc, unsigned int xref_offset) { nspdferror res; - uint64_t offset; /* the current data offset */ - uint64_t startxref; /* the value of the startxref field */ + strmoff_t offset; /* the current data offset */ + unsigned int startxref; /* the value of the startxref field */ struct cos_object *trailer; /* the current trailer */ int64_t prev; @@ -275,7 +280,7 @@ decode_xref_trailer(struct nspdf_doc *doc, uint64_t xref_offset) offset = xref_offset; /** @todo deal with XrefStm (number) in trailer */ - res = nspdf__xref_parse(doc, &offset); + res = nspdf__xref_parse(doc, doc->stream, &offset); if (res != NSPDFERROR_OK) { printf("failed to decode xref table\n"); goto decode_xref_trailer_failed; @@ -313,8 +318,8 @@ decode_xref_trailer_failed: static nspdferror decode_trailers(struct nspdf_doc *doc) { nspdferror res; - uint64_t offset; /* the current data offset */ - uint64_t startxref; /* the value of the first startxref field */ + strmoff_t offset; /* the current data offset */ + unsigned int startxref; /* the value of the first startxref field */ res = find_startxref(doc, &offset); if (res != NSPDFERROR_OK) { diff --git a/src/pdf_doc.c b/src/pdf_doc.c index 955f737..d7c7a0e 100644 --- a/src/pdf_doc.c +++ b/src/pdf_doc.c @@ -19,7 +19,8 @@ #include "cos_object.h" #include "pdf_doc.h" -nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, uint64_t *offset) +nspdferror +nspdf__stream_skip_ws(struct cos_stream *stream, strmoff_t *offset) { uint8_t c; /* TODO sort out keeping offset in range */ @@ -43,35 +44,36 @@ nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, uint64_t *offset) /** * move offset to next non eol byte */ -nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset) +nspdferror +nspdf__stream_skip_eol(struct cos_stream *stream, strmoff_t *offset) { uint8_t c; - /* TODO sort out keeping offset in range */ - c = DOC_BYTE(doc, *offset); + /** \todo sort out keeping offset in range */ + c = stream_byte(stream, *offset); while ((bclass[c] & BC_EOLM) != 0) { (*offset)++; - c = DOC_BYTE(doc, *offset); + c = stream_byte(stream, *offset); } return NSPDFERROR_OK; } nspdferror -doc_read_uint(struct nspdf_doc *doc, - uint64_t *offset_out, - uint64_t *result_out) +nspdf__stream_read_uint(struct cos_stream *stream, + strmoff_t *offset_out, + uint64_t *result_out) { uint8_t c; /* current byte from source data */ + strmoff_t offset; /* current offset of source data */ unsigned int len; /* number of decimal places in number */ uint8_t num[21]; /* temporary buffer for decimal values */ - uint64_t offset; /* current offset of source data */ uint64_t result=0; /* parsed result */ uint64_t tens; offset = *offset_out; for (len = 0; len < sizeof(num); len++) { - c = DOC_BYTE(doc, offset); + c = stream_byte(stream, offset); if ((bclass[c] & BC_DCML) != BC_DCML) { if (len == 0) { return -2; /* parse error no decimals in input */ @@ -89,5 +91,5 @@ doc_read_uint(struct nspdf_doc *doc, num[len] = c - '0'; offset++; } - return -1; /* number too long */ + return NSPDFERROR_RANGE; /* number too long */ } diff --git a/src/pdf_doc.h b/src/pdf_doc.h index 27a730a..4853170 100644 --- a/src/pdf_doc.h +++ b/src/pdf_doc.h @@ -15,6 +15,8 @@ #ifndef NSPDF__PDF_DOC_H_ #define NSPDF__PDF_DOC_H_ +#include "cos_stream.h" + struct xref_table_entry; struct page_table_entry; @@ -50,33 +52,11 @@ struct nspdf_doc { struct page_table_entry *page_table; }; -/* byte data acessory, allows for more complex buffer handling in future */ -#define DOC_BYTE(doc, offset) (doc->start[(offset)]) - -static inline uint8_t -stream_byte(struct cos_stream *stream, unsigned int offset) -{ - return *(stream->data + offset); -} - /* helpers in pdf_doc.c */ -nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, uint64_t *offset); -nspdferror doc_skip_eol(struct nspdf_doc *doc, uint64_t *offset); -nspdferror doc_read_uint(struct nspdf_doc *doc, uint64_t *offset_out, uint64_t *result_out); - -/* cross reference table handlers */ -/** - * parse xref from file - */ -nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out); - - -/** - * get an object dereferencing through xref table if necessary - */ -nspdferror nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out); +nspdferror nspdf__stream_skip_ws(struct cos_stream *stream, strmoff_t *offset); +nspdferror nspdf__stream_skip_eol(struct cos_stream *stream, strmoff_t *offset); +nspdferror nspdf__stream_read_uint(struct cos_stream *stream, strmoff_t *offset_out, uint64_t *result_out); -nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size); nspdferror nspdf__decode_page_tree(struct nspdf_doc *doc, struct cos_object *page_tree_node, unsigned int *page_index); diff --git a/src/xref.c b/src/xref.c index cdd4088..7780bf2 100644 --- a/src/xref.c +++ b/src/xref.c @@ -16,6 +16,7 @@ #include "cos_parse.h" #include "cos_object.h" #include "pdf_doc.h" +#include "xref.h" /** indirect object */ @@ -24,7 +25,7 @@ struct xref_table_entry { struct cos_reference ref; /** offset of object */ - uint64_t offset; + strmoff_t offset; /* indirect object if already decoded */ struct cos_object *object; @@ -50,9 +51,12 @@ nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size) return NSPDFERROR_OK; } -nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out) +nspdferror +nspdf__xref_parse(struct nspdf_doc *doc, + struct cos_stream *stream, + strmoff_t *offset_out) { - uint64_t offset; + strmoff_t offset; nspdferror res; uint64_t objnumber; /* current object number */ uint64_t objcount; @@ -60,15 +64,15 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out) offset = *offset_out; /* xref object header */ - if ((DOC_BYTE(doc, offset ) != 'x') && - (DOC_BYTE(doc, offset + 1) != 'r') && - (DOC_BYTE(doc, offset + 2) != 'e') && - (DOC_BYTE(doc, offset + 3) != 'f')) { + if ((stream_byte(stream, offset ) != 'x') || + (stream_byte(stream, offset + 1) != 'r') || + (stream_byte(stream, offset + 2) != 'e') || + (stream_byte(stream, offset + 3) != 'f')) { return NSPDFERROR_SYNTAX; } offset += 4; - res = nspdf__stream_skip_ws(doc->stream, &offset); + res = nspdf__stream_skip_ws(stream, &offset); if (res != NSPDFERROR_OK) { return res; } @@ -76,20 +80,20 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out) /* subsections * */ - res = doc_read_uint(doc, &offset, &objnumber); + res = nspdf__stream_read_uint(stream, &offset, &objnumber); while (res == NSPDFERROR_OK) { uint64_t lastobj; - res = nspdf__stream_skip_ws(doc->stream, &offset); + res = nspdf__stream_skip_ws(stream, &offset); if (res != NSPDFERROR_OK) { return res; } - res = doc_read_uint(doc, &offset, &objcount); + res = nspdf__stream_read_uint(stream, &offset, &objcount); if (res != NSPDFERROR_OK) { return res; } - res = nspdf__stream_skip_ws(doc->stream, &offset); + res = nspdf__stream_skip_ws(stream, &offset); if (res != NSPDFERROR_OK) { return res; } @@ -103,19 +107,19 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out) uint64_t objgeneration; /* object index */ - res = doc_read_uint(doc, &offset, &objindex); + res = nspdf__stream_read_uint(stream, &offset, &objindex); if (res != NSPDFERROR_OK) { return res; } offset++; /* skip space */ - res = doc_read_uint(doc, &offset, &objgeneration); + res = nspdf__stream_read_uint(stream, &offset, &objgeneration); if (res != NSPDFERROR_OK) { return res; } offset++; /* skip space */ - if ((DOC_BYTE(doc, offset++) == 'n')) { + if ((stream_byte(stream, offset++) == 'n')) { if (objnumber < doc->xref_table_size) { struct xref_table_entry *indobj; indobj = doc->xref_table + objnumber; @@ -133,7 +137,7 @@ nspdferror nspdf__xref_parse(struct nspdf_doc *doc, uint64_t *offset_out) offset += 2; /* skip EOL */ } - res = doc_read_uint(doc, &offset, &objnumber); + res = nspdf__stream_read_uint(stream, &offset, &objnumber); } return NSPDFERROR_OK; @@ -146,7 +150,7 @@ nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out) nspdferror res; struct cos_object *cobj; struct cos_object *indirect; - uint64_t offset; + strmoff_t offset; struct xref_table_entry *entry; cobj = *cobj_out; diff --git a/src/xref.h b/src/xref.h new file mode 100644 index 0000000..e53f2b2 --- /dev/null +++ b/src/xref.h @@ -0,0 +1,39 @@ +/* + * Copyright 2018 Vincent Sanders + * + * This file is part of libnspdf. + * + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + */ + +/** + * \file + * NetSurf PDF library cross reference table handling + */ + +#ifndef NSPDF__XREF_H_ +#define NSPDF__XREF_H_ + +#include "cos_stream.h" + +struct nspdf_doc; +struct cos_object; + +/** + * parse xref from file + */ +nspdferror nspdf__xref_parse(struct nspdf_doc *doc, struct cos_stream *stream, strmoff_t *offset_out); + + +/** + * get an object dereferencing through xref table if necessary + */ +nspdferror nspdf__xref_get_referenced(struct nspdf_doc *doc, struct cos_object **cobj_out); + +/** + * allocate storage for cross reference table + */ +nspdferror nspdf__xref_allocate(struct nspdf_doc *doc, int64_t size); + +#endif -- cgit v1.2.3