diff options
Diffstat (limited to 'src/lzw.c')
-rw-r--r-- | src/lzw.c | 530 |
1 files changed, 384 insertions, 146 deletions
@@ -4,6 +4,7 @@ * http://www.opensource.org/licenses/mit-license.php * * Copyright 2017 Michael Drake <michael.drake@codethink.co.uk> + * Copyright 2021 Michael Drake <tlsa@netsurf-browser.org> */ #include <assert.h> @@ -20,6 +21,8 @@ * Decoder for GIF LZW data. */ +/** Maximum number of lzw table entries. */ +#define LZW_TABLE_ENTRY_MAX (1u << LZW_CODE_MAX) /** * Context for reading LZW data. @@ -33,57 +36,65 @@ * Note that an individual LZW code can be split over up to three sub-blocks. */ struct lzw_read_ctx { - const uint8_t *data; /**< Pointer to start of input data */ - uint32_t data_len; /**< Input data length */ - uint32_t data_sb_next; /**< Offset to sub-block size */ + const uint8_t *restrict data; /**< Pointer to start of input data */ + size_t data_len; /**< Input data length */ + size_t data_sb_next; /**< Offset to sub-block size */ const uint8_t *sb_data; /**< Pointer to current sub-block in data */ - uint32_t sb_bit; /**< Current bit offset in sub-block */ + size_t sb_bit; /**< Current bit offset in sub-block */ uint32_t sb_bit_count; /**< Bit count in sub-block */ }; /** - * LZW dictionary entry. + * LZW table entry. * - * Records in the dictionary are composed of 1 or more entries. - * Entries point to previous entries which can be followed to compose + * Records in the table are composed of 1 or more entries. + * Entries refer to the entry they extend which can be followed to compose * the complete record. To compose the record in reverse order, take - * the `last_value` from each entry, and move to the previous entry. - * If the previous_entry's index is < the current clear_code, then it + * the `value` from each entry, and move to the entry it extends. + * If the extended entries index is < the current clear_code, then it * is the last entry in the record. */ -struct lzw_dictionary_entry { - uint8_t last_value; /**< Last value for record ending at entry. */ - uint8_t first_value; /**< First value for entry's record. */ - uint16_t previous_entry; /**< Offset in dictionary to previous entry. */ +struct lzw_table_entry { + uint8_t value; /**< Last value for record ending at entry. */ + uint8_t first; /**< First value in entry's entire record. */ + uint16_t count; /**< Count of values in this entry's record. */ + uint16_t extends; /**< Offset in table to previous entry. */ }; /** * LZW decompression context. */ struct lzw_ctx { - /** Input reading context */ - struct lzw_read_ctx input; + struct lzw_read_ctx input; /**< Input reading context */ - uint32_t previous_code; /**< Code read from input previously. */ - uint32_t previous_code_first; /**< First value of previous code. */ + uint16_t prev_code; /**< Code read from input previously. */ + uint16_t prev_code_first; /**< First value of previous code. */ + uint16_t prev_code_count; /**< Total values for previous code. */ - uint32_t initial_code_size; /**< Starting LZW code size. */ - uint32_t current_code_size; /**< Current LZW code size. */ - uint32_t current_code_size_max; /**< Max code value for current size. */ + uint8_t initial_code_size; /**< Starting LZW code size. */ - uint32_t clear_code; /**< Special Clear code value */ - uint32_t eoi_code; /**< Special End of Information code value */ + uint8_t code_size; /**< Current LZW code size. */ + uint16_t code_max; /**< Max code value for current code size. */ - uint32_t current_entry; /**< Next position in table to fill. */ + uint16_t clear_code; /**< Special Clear code value */ + uint16_t eoi_code; /**< Special End of Information code value */ - /** Output value stack. */ - uint8_t stack_base[1 << LZW_CODE_MAX]; + uint16_t table_size; /**< Next position in table to fill. */ - /** LZW decode dictionary. Generated during decode. */ - struct lzw_dictionary_entry table[1 << LZW_CODE_MAX]; -}; + uint16_t output_code; /**< Code that has been partially output. */ + uint16_t output_left; /**< Number of values left for output_code. */ + + bool has_transparency; /**< Whether the image is opaque. */ + uint8_t transparency_idx; /**< Index representing transparency. */ + const uint32_t *restrict colour_map; /**< Index to colour mapping. */ + + /** LZW code table. Generated during decode. */ + struct lzw_table_entry table[LZW_TABLE_ENTRY_MAX]; + /** Output value stack. */ + uint8_t stack_base[LZW_TABLE_ENTRY_MAX]; +}; /* Exported function, documented in lzw.h */ lzw_result lzw_context_create(struct lzw_ctx **ctx) @@ -97,24 +108,22 @@ lzw_result lzw_context_create(struct lzw_ctx **ctx) return LZW_OK; } - /* Exported function, documented in lzw.h */ void lzw_context_destroy(struct lzw_ctx *ctx) { free(ctx); } - /** * Advance the context to the next sub-block in the input data. * * \param[in] ctx LZW reading context, updated on success. * \return LZW_OK or LZW_OK_EOD on success, appropriate error otherwise. */ -static lzw_result lzw__block_advance(struct lzw_read_ctx *ctx) +static lzw_result lzw__block_advance(struct lzw_read_ctx *restrict ctx) { - uint32_t block_size; - uint32_t next_block_pos = ctx->data_sb_next; + size_t block_size; + size_t next_block_pos = ctx->data_sb_next; const uint8_t *data_next = ctx->data + next_block_pos; if (next_block_pos >= ctx->data_len) { @@ -141,7 +150,6 @@ static lzw_result lzw__block_advance(struct lzw_read_ctx *ctx) return LZW_OK; } - /** * Get the next LZW code of given size from the raw input data. * @@ -152,31 +160,27 @@ static lzw_result lzw__block_advance(struct lzw_read_ctx *ctx) * \param[out] code_out Returns an LZW code on success. * \return LZW_OK or LZW_OK_EOD on success, appropriate error otherwise. */ -static inline lzw_result lzw__next_code( - struct lzw_read_ctx *ctx, - uint8_t code_size, - uint32_t *code_out) +static inline lzw_result lzw__read_code( + struct lzw_read_ctx *restrict ctx, + uint16_t code_size, + uint16_t *restrict code_out) { uint32_t code = 0; - uint8_t current_bit = ctx->sb_bit & 0x7; - uint8_t byte_advance = (current_bit + code_size) >> 3; - - assert(byte_advance <= 2); + uint32_t current_bit = ctx->sb_bit & 0x7; - if (ctx->sb_bit + code_size < ctx->sb_bit_count) { - /* Fast path: code fully inside this sub-block */ + if (ctx->sb_bit + 24 <= ctx->sb_bit_count) { + /* Fast path: read three bytes from this sub-block */ const uint8_t *data = ctx->sb_data + (ctx->sb_bit >> 3); - switch (byte_advance) { - case 2: code |= data[2] << 16; - case 1: code |= data[1] << 8; - case 0: code |= data[0] << 0; - } + code |= *data++ << 0; + code |= *data++ << 8; + code |= *data << 16; ctx->sb_bit += code_size; } else { /* Slow path: code spans sub-blocks */ + uint8_t byte_advance = (current_bit + code_size) >> 3; uint8_t byte = 0; - uint8_t bits_remaining_0 = (code_size < (8 - current_bit)) ? - code_size : (8 - current_bit); + uint8_t bits_remaining_0 = (code_size < (8u - current_bit)) ? + code_size : (8u - current_bit); uint8_t bits_remaining_1 = code_size - bits_remaining_0; uint8_t bits_used[3] = { [0] = bits_remaining_0, @@ -184,6 +188,8 @@ static inline lzw_result lzw__next_code( [2] = bits_remaining_1 - 8, }; + assert(byte_advance <= 2); + while (true) { const uint8_t *data = ctx->sb_data; lzw_result res; @@ -213,163 +219,395 @@ static inline lzw_result lzw__next_code( return LZW_OK; } - /** - * Clear LZW code dictionary. + * Handle clear code. * - * \param[in] ctx LZW reading context, updated. - * \param[out] stack_pos_out Returns current stack position. + * \param[in] ctx LZW reading context, updated. + * \param[out] code_out Returns next code after a clear code. * \return LZW_OK or error code. */ -static lzw_result lzw__clear_codes( +static inline lzw_result lzw__handle_clear( struct lzw_ctx *ctx, - const uint8_t ** const stack_pos_out) + uint16_t *code_out) { - uint32_t code; - uint8_t *stack_pos; + uint16_t code; - /* Reset dictionary building context */ - ctx->current_code_size = ctx->initial_code_size + 1; - ctx->current_code_size_max = (1 << ctx->current_code_size) - 1;; - ctx->current_entry = (1 << ctx->initial_code_size) + 2; + /* Reset table building context */ + ctx->code_size = ctx->initial_code_size; + ctx->code_max = (1 << ctx->initial_code_size) - 1; + ctx->table_size = ctx->eoi_code + 1; /* There might be a sequence of clear codes, so process them all */ do { - lzw_result res = lzw__next_code(&ctx->input, - ctx->current_code_size, &code); + lzw_result res = lzw__read_code(&ctx->input, + ctx->code_size, &code); if (res != LZW_OK) { return res; } } while (code == ctx->clear_code); - /* The initial code must be from the initial dictionary. */ + /* The initial code must be from the initial table. */ if (code > ctx->clear_code) { return LZW_BAD_ICODE; } - /* Record this initial code as "previous" code, needed during decode. */ - ctx->previous_code = code; - ctx->previous_code_first = code; - - /* Reset the stack, and add first non-clear code added as first item. */ - stack_pos = ctx->stack_base; - *stack_pos++ = code; - - *stack_pos_out = stack_pos; + *code_out = code; return LZW_OK; } - /* Exported function, documented in lzw.h */ lzw_result lzw_decode_init( struct lzw_ctx *ctx, - const uint8_t *compressed_data, - uint32_t compressed_data_len, - uint32_t compressed_data_pos, - uint8_t code_size, - const uint8_t ** const stack_base_out, - const uint8_t ** const stack_pos_out) + uint8_t minimum_code_size, + const uint8_t *input_data, + size_t input_length, + size_t input_pos) { - struct lzw_dictionary_entry *table = ctx->table; + struct lzw_table_entry *table = ctx->table; + lzw_result res; + uint16_t code; + + if (minimum_code_size >= LZW_CODE_MAX) { + return LZW_BAD_ICODE; + } /* Initialise the input reading context */ - ctx->input.data = compressed_data; - ctx->input.data_len = compressed_data_len; - ctx->input.data_sb_next = compressed_data_pos; + ctx->input.data = input_data; + ctx->input.data_len = input_length; + ctx->input.data_sb_next = input_pos; ctx->input.sb_bit = 0; ctx->input.sb_bit_count = 0; - /* Initialise the dictionary building context */ - ctx->initial_code_size = code_size; + /* Initialise the table building context */ + ctx->initial_code_size = minimum_code_size + 1; + + ctx->clear_code = (1 << minimum_code_size) + 0; + ctx->eoi_code = (1 << minimum_code_size) + 1; - ctx->clear_code = (1 << code_size) + 0; - ctx->eoi_code = (1 << code_size) + 1; + ctx->output_left = 0; - /* Initialise the standard dictionary entries */ - for (uint32_t i = 0; i < ctx->clear_code; ++i) { - table[i].first_value = i; - table[i].last_value = i; + /* Initialise the standard table entries */ + for (uint16_t i = 0; i < ctx->clear_code; i++) { + table[i].first = i; + table[i].value = i; + table[i].count = 1; } - *stack_base_out = ctx->stack_base; - return lzw__clear_codes(ctx, stack_pos_out); -} + res = lzw__handle_clear(ctx, &code); + if (res != LZW_OK) { + return res; + } + + /* Store details of this code as "previous code" to the context. */ + ctx->prev_code_first = ctx->table[code].first; + ctx->prev_code_count = ctx->table[code].count; + ctx->prev_code = code; + + /* Add code to context for immediate output. */ + ctx->output_code = code; + ctx->output_left = 1; + ctx->has_transparency = false; + ctx->transparency_idx = 0; + ctx->colour_map = NULL; + + return LZW_OK; +} /* Exported function, documented in lzw.h */ -lzw_result lzw_decode(struct lzw_ctx *ctx, - const uint8_t ** const stack_pos_out) +lzw_result lzw_decode_init_map( + struct lzw_ctx *ctx, + uint8_t minimum_code_size, + uint32_t transparency_idx, + const uint32_t *colour_table, + const uint8_t *input_data, + size_t input_length, + size_t input_pos) { lzw_result res; - uint32_t code_new; - uint32_t code_out; - uint8_t last_value; - uint8_t *stack_pos = ctx->stack_base; - uint32_t clear_code = ctx->clear_code; - uint32_t current_entry = ctx->current_entry; - struct lzw_dictionary_entry * const table = ctx->table; - /* Get a new code from the input */ - res = lzw__next_code(&ctx->input, ctx->current_code_size, &code_new); + if (colour_table == NULL) { + return LZW_BAD_PARAM; + } + + res = lzw_decode_init(ctx, minimum_code_size, + input_data, input_length, input_pos); if (res != LZW_OK) { return res; } - if (code_new == clear_code) { - /* Got Clear code */ - return lzw__clear_codes(ctx, stack_pos_out); + ctx->has_transparency = (transparency_idx <= 0xFF); + ctx->transparency_idx = transparency_idx; + ctx->colour_map = colour_table; + + return LZW_OK; +} + +/** + * Create new table entry. + * + * \param[in] ctx LZW reading context, updated. + * \param[in] code Last value code for new table entry. + */ +static inline void lzw__table_add_entry( + struct lzw_ctx *ctx, + uint16_t code) +{ + struct lzw_table_entry *entry = &ctx->table[ctx->table_size]; + + entry->value = code; + entry->first = ctx->prev_code_first; + entry->count = ctx->prev_code_count + 1; + entry->extends = ctx->prev_code; + + ctx->table_size++; +} + +typedef uint32_t (*lzw_writer_fn)( + struct lzw_ctx *ctx, + void *restrict output_data, + uint32_t output_length, + uint32_t output_pos, + uint16_t code, + uint16_t left); + +/** + * Get the next LZW code and write its value(s) to output buffer. + * + * \param[in] ctx LZW reading context, updated. + * \param[in] write_fn Function for writing pixels to output. + * \param[in] output_data Array to write output values into. + * \param[in] output_length Size of output array. + * \param[in,out] output_written Number of values written. Updated on exit. + * \return LZW_OK on success, or appropriate error code otherwise. + */ +static inline lzw_result lzw__decode( + struct lzw_ctx *ctx, + lzw_writer_fn write_fn, + void *restrict output_data, + uint32_t output_length, + uint32_t *restrict output_written) +{ + lzw_result res; + uint16_t code; + + /* Get a new code from the input */ + res = lzw__read_code(&ctx->input, ctx->code_size, &code); + if (res != LZW_OK) { + return res; + } - } else if (code_new == ctx->eoi_code) { + /* Handle the new code */ + if (code == ctx->eoi_code) { /* Got End of Information code */ return LZW_EOI_CODE; - } - if (code_new > current_entry) { + } else if (code > ctx->table_size) { /* Code is invalid */ return LZW_BAD_CODE; - } else if (code_new < current_entry) { - /* Code is in table */ - code_out = code_new; - last_value = table[code_new].first_value; + + } else if (code == ctx->clear_code) { + res = lzw__handle_clear(ctx, &code); + if (res != LZW_OK) { + return res; + } + + } else if (ctx->table_size < LZW_TABLE_ENTRY_MAX) { + uint16_t size = ctx->table_size; + lzw__table_add_entry(ctx, (code < size) ? + ctx->table[code].first : + ctx->prev_code_first); + + /* Ensure code size is increased, if needed. */ + if (size == ctx->code_max && ctx->code_size < LZW_CODE_MAX) { + ctx->code_size++; + ctx->code_max = (1 << ctx->code_size) - 1; + } + } + + *output_written += write_fn(ctx, + output_data, output_length, *output_written, + code, ctx->table[code].count); + + /* Store details of this code as "previous code" to the context. */ + ctx->prev_code_first = ctx->table[code].first; + ctx->prev_code_count = ctx->table[code].count; + ctx->prev_code = code; + + return LZW_OK; +} + +/** + * Write values for this code to the output stack. + * + * If there isn't enough space in the output stack, this function will write + * the as many as it can into the output. If `ctx->output_left > 0` after + * this call, then there is more data for this code left to output. The code + * is stored to the context as `ctx->output_code`. + * + * \param[in] ctx LZW reading context, updated. + * \param[in] output_data Array to write output values into. + * \param[in] output_length length Size of output array. + * \param[in] output_used Current position in output array. + * \param[in] code LZW code to output values for. + * \param[in] left Number of values remaining to output for this code. + * \return Number of pixel values written. + */ +static inline uint32_t lzw__write_fn(struct lzw_ctx *ctx, + void *restrict output_data, + uint32_t output_length, + uint32_t output_used, + uint16_t code, + uint16_t left) +{ + uint8_t *restrict output_pos = (uint8_t *)output_data + output_used; + const struct lzw_table_entry * const table = ctx->table; + uint32_t space = output_length - output_used; + uint16_t count = left; + + if (count > space) { + left = count - space; + count = space; } else { - /* Code not in table */ - *stack_pos++ = ctx->previous_code_first; - code_out = ctx->previous_code; - last_value = ctx->previous_code_first; + left = 0; + } + + ctx->output_code = code; + ctx->output_left = left; + + /* Skip over any values we don't have space for. */ + for (unsigned i = left; i != 0; i--) { + const struct lzw_table_entry *entry = table + code; + code = entry->extends; } - /* Add to the dictionary, only if there's space */ - if (current_entry < (1 << LZW_CODE_MAX)) { - struct lzw_dictionary_entry *entry = table + current_entry; - entry->last_value = last_value; - entry->first_value = ctx->previous_code_first; - entry->previous_entry = ctx->previous_code; - ctx->current_entry++; + output_pos += count; + for (unsigned i = count; i != 0; i--) { + const struct lzw_table_entry *entry = table + code; + *--output_pos = entry->value; + code = entry->extends; + } + + return count; +} + +/* Exported function, documented in lzw.h */ +lzw_result lzw_decode(struct lzw_ctx *ctx, + const uint8_t *restrict *const restrict output_data, + uint32_t *restrict output_written) +{ + const uint32_t output_length = sizeof(ctx->stack_base); + + *output_written = 0; + *output_data = ctx->stack_base; + + if (ctx->output_left != 0) { + *output_written += lzw__write_fn(ctx, + ctx->stack_base, output_length, *output_written, + ctx->output_code, ctx->output_left); } - /* Ensure code size is increased, if needed. */ - if (current_entry == ctx->current_code_size_max) { - if (ctx->current_code_size < LZW_CODE_MAX) { - ctx->current_code_size++; - ctx->current_code_size_max = - (1 << ctx->current_code_size) - 1; + while (*output_written != output_length) { + lzw_result res = lzw__decode(ctx, lzw__write_fn, + ctx->stack_base, output_length, output_written); + if (res != LZW_OK) { + return res; } } - ctx->previous_code_first = table[code_new].first_value; - ctx->previous_code = code_new; + return LZW_OK; +} + +/** + * Write colour mapped values for this code to the output. + * + * If there isn't enough space in the output stack, this function will write + * the as many as it can into the output. If `ctx->output_left > 0` after + * this call, then there is more data for this code left to output. The code + * is stored to the context as `ctx->output_code`. + * + * \param[in] ctx LZW reading context, updated. + * \param[in] output_data Array to write output values into. + * \param[in] output_length Size of output array. + * \param[in] output_used Current position in output array. + * \param[in] code LZW code to output values for. + * \param[in] left Number of values remaining to output for code. + * \return Number of pixel values written. + */ +static inline uint32_t lzw__map_write_fn(struct lzw_ctx *ctx, + void *restrict output_data, + uint32_t output_length, + uint32_t output_used, + uint16_t code, + uint16_t left) +{ + uint32_t *restrict output_pos = (uint32_t *)output_data + output_used; + const struct lzw_table_entry * const table = ctx->table; + uint32_t space = output_length - output_used; + uint16_t count = left; + + if (count > space) { + left = count - space; + count = space; + } else { + left = 0; + } + + ctx->output_code = code; + ctx->output_left = left; + + for (unsigned i = left; i != 0; i--) { + const struct lzw_table_entry *entry = table + code; + code = entry->extends; + } - /* Put rest of data for this code on output stack. - * Note, in the case of "code not in table", the last entry of the - * current code has already been placed on the stack above. */ - while (code_out > clear_code) { - struct lzw_dictionary_entry *entry = table + code_out; - *stack_pos++ = entry->last_value; - code_out = entry->previous_entry; + output_pos += count; + if (ctx->has_transparency) { + for (unsigned i = count; i != 0; i--) { + const struct lzw_table_entry *entry = table + code; + --output_pos; + if (entry->value != ctx->transparency_idx) { + *output_pos = ctx->colour_map[entry->value]; + } + code = entry->extends; + } + } else { + for (unsigned i = count; i != 0; i--) { + const struct lzw_table_entry *entry = table + code; + *--output_pos = ctx->colour_map[entry->value]; + code = entry->extends; + } + } + + return count; +} + +/* Exported function, documented in lzw.h */ +lzw_result lzw_decode_map(struct lzw_ctx *ctx, + uint32_t *restrict output_data, + uint32_t output_length, + uint32_t *restrict output_written) +{ + *output_written = 0; + + if (ctx->colour_map == NULL) { + return LZW_NO_COLOUR; + } + + if (ctx->output_left != 0) { + *output_written += lzw__map_write_fn(ctx, + output_data, output_length, *output_written, + ctx->output_code, ctx->output_left); + } + + while (*output_written != output_length) { + lzw_result res = lzw__decode(ctx, lzw__map_write_fn, + output_data, output_length, output_written); + if (res != LZW_OK) { + return res; + } } - *stack_pos++ = table[code_out].last_value; - *stack_pos_out = stack_pos; return LZW_OK; } |