From 86ebd63cc1e9570cf247da26635375e31fc57d40 Mon Sep 17 00:00:00 2001 From: Michael Drake Date: Mon, 3 Apr 2017 10:14:25 +0100 Subject: New LZW decoder: Add client calls to initialise LZW, and perform decode. --- src/lzw.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lzw.h | 54 +++++++++++++++++ 2 files changed, 250 insertions(+) (limited to 'src') diff --git a/src/lzw.c b/src/lzw.c index 4f7ff8f..b161799 100644 --- a/src/lzw.c +++ b/src/lzw.c @@ -41,12 +41,46 @@ struct lzw_read_ctx { uint32_t sb_bit_count; /**< Bit count in sub-block */ }; +/** + * LZW dictionary entry. + * + * Records in the dictionary are composed of 1 or more entries. + * Entries point to previous entries which can be followed to compose + * the complete record. To compose the record in reverse order, take + * the `last_value` from each entry, and move to the previous entry. + * If the previous_entry's index is < the current clear_code, then it + * is the last entry in the record. + */ +struct lzw_dictionary_entry { + uint8_t last_value; /**< Last value for record ending at entry. */ + uint8_t first_value; /**< First value for entry's record. */ + uint16_t previous_entry; /**< Offset in dictionary to previous entry. */ +}; + /** * LZW decompression context. */ struct lzw_ctx { /** Input reading context */ struct lzw_read_ctx input; + + uint32_t previous_code; /**< Code read from input previously. */ + uint32_t previous_code_first; /**< First value of previous code. */ + + uint32_t initial_code_size; /**< Starting LZW code size. */ + uint32_t current_code_size; /**< Current LZW code size. */ + uint32_t current_code_size_max; /**< Max code value for current size. */ + + uint32_t clear_code; /**< Special Clear code value */ + uint32_t eoi_code; /**< Special End of Information code value */ + + uint32_t current_entry; /**< Next position in table to fill. */ + + /** Output value stack. */ + uint8_t stack_base[1 << LZW_CODE_MAX]; + + /** LZW decode dictionary. Generated during decode. */ + struct lzw_dictionary_entry table[1 << LZW_CODE_MAX]; }; @@ -175,3 +209,165 @@ static inline lzw_result lzw__next_code( *code_out = (code >> current_bit) & ((1 << code_size) - 1); return LZW_OK; } + + +/** + * Clear LZW code dictionary. + * + * \param[in] ctx LZW reading context, updated. + * \param[out] stack_pos_out Returns current stack position. + * \return LZW_OK or error code. + */ +static lzw_result lzw__clear_codes( + struct lzw_ctx *ctx, + const uint8_t ** const stack_pos_out) +{ + uint32_t code; + uint8_t *stack_pos; + + /* Reset dictionary building context */ + ctx->current_code_size = ctx->initial_code_size + 1; + ctx->current_code_size_max = (1 << ctx->current_code_size) - 1;; + ctx->current_entry = (1 << ctx->initial_code_size) + 2; + + /* There might be a sequence of clear codes, so process them all */ + do { + lzw_result res = lzw__next_code(&ctx->input, + ctx->current_code_size, &code); + if (res != LZW_OK) { + return res; + } + } while (code == ctx->clear_code); + + /* The initial code must be from the initial dictionary. */ + if (code > ctx->clear_code) { + return LZW_BAD_ICODE; + } + + /* Record this initial code as "previous" code, needed during decode. */ + ctx->previous_code = code; + ctx->previous_code_first = code; + + /* Reset the stack, and add first non-clear code added as first item. */ + stack_pos = ctx->stack_base; + *stack_pos++ = code; + + *stack_pos_out = stack_pos; + return LZW_OK; +} + + +/* Exported function, documented in lzw.h */ +lzw_result lzw_decode_init( + struct lzw_ctx *ctx, + const uint8_t *compressed_data, + uint32_t compressed_data_len, + uint32_t compressed_data_pos, + uint8_t code_size, + const uint8_t ** const stack_base_out, + const uint8_t ** const stack_pos_out) +{ + struct lzw_dictionary_entry *table = ctx->table; + + /* Initialise the input reading context */ + ctx->input.data = compressed_data; + ctx->input.data_len = compressed_data_len; + ctx->input.data_sb_next = compressed_data_pos; + + ctx->input.sb_bit = 0; + ctx->input.sb_bit_count = 0; + + /* Initialise the dictionary building context */ + ctx->initial_code_size = code_size; + + ctx->clear_code = (1 << code_size) + 0; + ctx->eoi_code = (1 << code_size) + 1; + + /* Initialise the standard dictionary entries */ + for (uint32_t i = 0; i < ctx->clear_code; ++i) { + table[i].first_value = i; + table[i].last_value = i; + } + + *stack_base_out = ctx->stack_base; + return lzw__clear_codes(ctx, stack_pos_out); +} + + +/* Exported function, documented in lzw.h */ +lzw_result lzw_decode(struct lzw_ctx *ctx, + const uint8_t ** const stack_pos_out) +{ + lzw_result res; + uint32_t code_new; + uint32_t code_out; + struct lzw_dictionary_entry *entry; + uint8_t *stack_pos = ctx->stack_base; + uint32_t clear_code = ctx->clear_code; + uint32_t current_entry = ctx->current_entry; + struct lzw_dictionary_entry * const table = ctx->table; + + /* Get a new code from the input */ + res = lzw__next_code(&ctx->input, ctx->current_code_size, &code_new); + if (res != LZW_OK) { + return res; + } + + if (code_new == clear_code) { + /* Got Clear code */ + return lzw__clear_codes(ctx, stack_pos_out); + + } else if (code_new == ctx->eoi_code) { + /* Got End of Information code */ + return LZW_EOI_CODE; + } + + if (current_entry >= (1 << LZW_CODE_MAX)) { + /* No space in table for new entries, only Clear and + * End of Information codes were allowed. */ + return LZW_BAD_DATA; + } + + entry = table + current_entry; + if (code_new > current_entry) { + /* Code is invalid */ + return LZW_BAD_CODE; + } else if (code_new < current_entry) { + /* Code is in table */ + code_out = code_new; + entry->last_value = table[code_new].first_value; + } else { + /* Code not in table */ + *stack_pos++ = ctx->previous_code_first; + code_out = ctx->previous_code; + entry->last_value = ctx->previous_code_first; + } + entry->first_value = ctx->previous_code_first; + entry->previous_entry = ctx->previous_code; + + /* Ensure code size is increased, if needed. */ + if (current_entry == ctx->current_code_size_max) { + if (ctx->current_code_size < LZW_CODE_MAX) { + ctx->current_code_size++; + ctx->current_code_size_max = + (1 << ctx->current_code_size) - 1; + } + } + ctx->current_entry++; + + ctx->previous_code_first = table[code_new].first_value; + ctx->previous_code = code_new; + + /* Put rest of data for this code on output stack. + * Note, in the case of "code not in table", the last entry of the + * current code has already been placed on the stack above. */ + while (code_out > clear_code) { + entry = table + code_out; + *stack_pos++ = entry->last_value; + code_out = entry->previous_entry; + } + *stack_pos++ = table[code_out].last_value; + + *stack_pos_out = stack_pos; + return LZW_OK; +} diff --git a/src/lzw.h b/src/lzw.h index a908414..5812c0d 100644 --- a/src/lzw.h +++ b/src/lzw.h @@ -17,6 +17,10 @@ */ +/** Maximum LZW code size in bits */ +#define LZW_CODE_MAX 12 + + /* Declare lzw internal context structure */ struct lzw_ctx; @@ -27,6 +31,10 @@ typedef enum lzw_result { LZW_OK_EOD, /**< Success; reached zero-length sub-block */ LZW_NO_MEM, /**< Error: Out of memory */ LZW_NO_DATA, /**< Error: Out of data */ + LZW_EOI_CODE, /**< Error: End of Information code */ + LZW_BAD_ICODE, /**< Error: Bad initial LZW code */ + LZW_BAD_CODE, /**< Error: Bad LZW code */ + LZW_BAD_DATA, /**< Error: Bad data */ } lzw_result; @@ -48,5 +56,51 @@ lzw_result lzw_context_create( void lzw_context_destroy( struct lzw_ctx *ctx); +/** + * Initialise an LZW decompression context for decoding. + * + * Caller owns neither `stack_base_out` or `stack_pos_out`. + * + * \param[in] ctx The LZW decompression context to initialise. + * \param[in] compressed_data The compressed data. + * \param[in] compressed_data_len Byte length of compressed data. + * \param[in] compressed_data_pos Start position in data. Must be position + * of a size byte at sub-block start. + * \param[in] code_size The initial LZW code size to use. + * \param[out] stack_base_out Returns base of decompressed data stack. + * \param[out] stack_pos_out Returns current stack position. + * There are `stack_pos_out - stack_base_out` + * current stack entries. + * \return LZW_OK on success, or appropriate error code otherwise. + */ +lzw_result lzw_decode_init( + struct lzw_ctx *ctx, + const uint8_t *compressed_data, + uint32_t compressed_data_len, + uint32_t compressed_data_pos, + uint8_t code_size, + const uint8_t ** const stack_base_out, + const uint8_t ** const stack_pos_out); + +/** + * Fill the LZW stack with decompressed data + * + * Ensure anything on the stack is used before calling this, as anything + * on the stack before this call will be trampled. + * + * Caller does not own `stack_pos_out`. + * + * \param[in] ctx LZW reading context, updated. + * \param[out] stack_pos_out Returns current stack position. + * Use with `stack_base_out` value from previous + * lzw_decode_init() call. + * There are `stack_pos_out - stack_base_out` + * current stack entries. + * \return LZW_OK on success, or appropriate error code otherwise. + */ +lzw_result lzw_decode( + struct lzw_ctx *ctx, + const uint8_t ** const stack_pos_out); + #endif -- cgit v1.2.3