summaryrefslogtreecommitdiff
path: root/src/lzw.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lzw.c')
-rw-r--r--src/lzw.c530
1 files changed, 384 insertions, 146 deletions
diff --git a/src/lzw.c b/src/lzw.c
index 6ad95aa..3098ef5 100644
--- a/src/lzw.c
+++ b/src/lzw.c
@@ -4,6 +4,7 @@
* http://www.opensource.org/licenses/mit-license.php
*
* Copyright 2017 Michael Drake <michael.drake@codethink.co.uk>
+ * Copyright 2021 Michael Drake <tlsa@netsurf-browser.org>
*/
#include <assert.h>
@@ -20,6 +21,8 @@
* Decoder for GIF LZW data.
*/
+/** Maximum number of lzw table entries. */
+#define LZW_TABLE_ENTRY_MAX (1u << LZW_CODE_MAX)
/**
* Context for reading LZW data.
@@ -33,57 +36,65 @@
* Note that an individual LZW code can be split over up to three sub-blocks.
*/
struct lzw_read_ctx {
- const uint8_t *data; /**< Pointer to start of input data */
- uint32_t data_len; /**< Input data length */
- uint32_t data_sb_next; /**< Offset to sub-block size */
+ const uint8_t *restrict data; /**< Pointer to start of input data */
+ size_t data_len; /**< Input data length */
+ size_t data_sb_next; /**< Offset to sub-block size */
const uint8_t *sb_data; /**< Pointer to current sub-block in data */
- uint32_t sb_bit; /**< Current bit offset in sub-block */
+ size_t sb_bit; /**< Current bit offset in sub-block */
uint32_t sb_bit_count; /**< Bit count in sub-block */
};
/**
- * LZW dictionary entry.
+ * LZW table entry.
*
- * Records in the dictionary are composed of 1 or more entries.
- * Entries point to previous entries which can be followed to compose
+ * Records in the table are composed of 1 or more entries.
+ * Entries refer to the entry they extend which can be followed to compose
* the complete record. To compose the record in reverse order, take
- * the `last_value` from each entry, and move to the previous entry.
- * If the previous_entry's index is < the current clear_code, then it
+ * the `value` from each entry, and move to the entry it extends.
+ * If the extended entries index is < the current clear_code, then it
* is the last entry in the record.
*/
-struct lzw_dictionary_entry {
- uint8_t last_value; /**< Last value for record ending at entry. */
- uint8_t first_value; /**< First value for entry's record. */
- uint16_t previous_entry; /**< Offset in dictionary to previous entry. */
+struct lzw_table_entry {
+ uint8_t value; /**< Last value for record ending at entry. */
+ uint8_t first; /**< First value in entry's entire record. */
+ uint16_t count; /**< Count of values in this entry's record. */
+ uint16_t extends; /**< Offset in table to previous entry. */
};
/**
* LZW decompression context.
*/
struct lzw_ctx {
- /** Input reading context */
- struct lzw_read_ctx input;
+ struct lzw_read_ctx input; /**< Input reading context */
- uint32_t previous_code; /**< Code read from input previously. */
- uint32_t previous_code_first; /**< First value of previous code. */
+ uint16_t prev_code; /**< Code read from input previously. */
+ uint16_t prev_code_first; /**< First value of previous code. */
+ uint16_t prev_code_count; /**< Total values for previous code. */
- uint32_t initial_code_size; /**< Starting LZW code size. */
- uint32_t current_code_size; /**< Current LZW code size. */
- uint32_t current_code_size_max; /**< Max code value for current size. */
+ uint8_t initial_code_size; /**< Starting LZW code size. */
- uint32_t clear_code; /**< Special Clear code value */
- uint32_t eoi_code; /**< Special End of Information code value */
+ uint8_t code_size; /**< Current LZW code size. */
+ uint16_t code_max; /**< Max code value for current code size. */
- uint32_t current_entry; /**< Next position in table to fill. */
+ uint16_t clear_code; /**< Special Clear code value */
+ uint16_t eoi_code; /**< Special End of Information code value */
- /** Output value stack. */
- uint8_t stack_base[1 << LZW_CODE_MAX];
+ uint16_t table_size; /**< Next position in table to fill. */
- /** LZW decode dictionary. Generated during decode. */
- struct lzw_dictionary_entry table[1 << LZW_CODE_MAX];
-};
+ uint16_t output_code; /**< Code that has been partially output. */
+ uint16_t output_left; /**< Number of values left for output_code. */
+
+ bool has_transparency; /**< Whether the image is opaque. */
+ uint8_t transparency_idx; /**< Index representing transparency. */
+ const uint32_t *restrict colour_map; /**< Index to colour mapping. */
+
+ /** LZW code table. Generated during decode. */
+ struct lzw_table_entry table[LZW_TABLE_ENTRY_MAX];
+ /** Output value stack. */
+ uint8_t stack_base[LZW_TABLE_ENTRY_MAX];
+};
/* Exported function, documented in lzw.h */
lzw_result lzw_context_create(struct lzw_ctx **ctx)
@@ -97,24 +108,22 @@ lzw_result lzw_context_create(struct lzw_ctx **ctx)
return LZW_OK;
}
-
/* Exported function, documented in lzw.h */
void lzw_context_destroy(struct lzw_ctx *ctx)
{
free(ctx);
}
-
/**
* Advance the context to the next sub-block in the input data.
*
* \param[in] ctx LZW reading context, updated on success.
* \return LZW_OK or LZW_OK_EOD on success, appropriate error otherwise.
*/
-static lzw_result lzw__block_advance(struct lzw_read_ctx *ctx)
+static lzw_result lzw__block_advance(struct lzw_read_ctx *restrict ctx)
{
- uint32_t block_size;
- uint32_t next_block_pos = ctx->data_sb_next;
+ size_t block_size;
+ size_t next_block_pos = ctx->data_sb_next;
const uint8_t *data_next = ctx->data + next_block_pos;
if (next_block_pos >= ctx->data_len) {
@@ -141,7 +150,6 @@ static lzw_result lzw__block_advance(struct lzw_read_ctx *ctx)
return LZW_OK;
}
-
/**
* Get the next LZW code of given size from the raw input data.
*
@@ -152,31 +160,27 @@ static lzw_result lzw__block_advance(struct lzw_read_ctx *ctx)
* \param[out] code_out Returns an LZW code on success.
* \return LZW_OK or LZW_OK_EOD on success, appropriate error otherwise.
*/
-static inline lzw_result lzw__next_code(
- struct lzw_read_ctx *ctx,
- uint8_t code_size,
- uint32_t *code_out)
+static inline lzw_result lzw__read_code(
+ struct lzw_read_ctx *restrict ctx,
+ uint16_t code_size,
+ uint16_t *restrict code_out)
{
uint32_t code = 0;
- uint8_t current_bit = ctx->sb_bit & 0x7;
- uint8_t byte_advance = (current_bit + code_size) >> 3;
-
- assert(byte_advance <= 2);
+ uint32_t current_bit = ctx->sb_bit & 0x7;
- if (ctx->sb_bit + code_size < ctx->sb_bit_count) {
- /* Fast path: code fully inside this sub-block */
+ if (ctx->sb_bit + 24 <= ctx->sb_bit_count) {
+ /* Fast path: read three bytes from this sub-block */
const uint8_t *data = ctx->sb_data + (ctx->sb_bit >> 3);
- switch (byte_advance) {
- case 2: code |= data[2] << 16;
- case 1: code |= data[1] << 8;
- case 0: code |= data[0] << 0;
- }
+ code |= *data++ << 0;
+ code |= *data++ << 8;
+ code |= *data << 16;
ctx->sb_bit += code_size;
} else {
/* Slow path: code spans sub-blocks */
+ uint8_t byte_advance = (current_bit + code_size) >> 3;
uint8_t byte = 0;
- uint8_t bits_remaining_0 = (code_size < (8 - current_bit)) ?
- code_size : (8 - current_bit);
+ uint8_t bits_remaining_0 = (code_size < (8u - current_bit)) ?
+ code_size : (8u - current_bit);
uint8_t bits_remaining_1 = code_size - bits_remaining_0;
uint8_t bits_used[3] = {
[0] = bits_remaining_0,
@@ -184,6 +188,8 @@ static inline lzw_result lzw__next_code(
[2] = bits_remaining_1 - 8,
};
+ assert(byte_advance <= 2);
+
while (true) {
const uint8_t *data = ctx->sb_data;
lzw_result res;
@@ -213,163 +219,395 @@ static inline lzw_result lzw__next_code(
return LZW_OK;
}
-
/**
- * Clear LZW code dictionary.
+ * Handle clear code.
*
- * \param[in] ctx LZW reading context, updated.
- * \param[out] stack_pos_out Returns current stack position.
+ * \param[in] ctx LZW reading context, updated.
+ * \param[out] code_out Returns next code after a clear code.
* \return LZW_OK or error code.
*/
-static lzw_result lzw__clear_codes(
+static inline lzw_result lzw__handle_clear(
struct lzw_ctx *ctx,
- const uint8_t ** const stack_pos_out)
+ uint16_t *code_out)
{
- uint32_t code;
- uint8_t *stack_pos;
+ uint16_t code;
- /* Reset dictionary building context */
- ctx->current_code_size = ctx->initial_code_size + 1;
- ctx->current_code_size_max = (1 << ctx->current_code_size) - 1;;
- ctx->current_entry = (1 << ctx->initial_code_size) + 2;
+ /* Reset table building context */
+ ctx->code_size = ctx->initial_code_size;
+ ctx->code_max = (1 << ctx->initial_code_size) - 1;
+ ctx->table_size = ctx->eoi_code + 1;
/* There might be a sequence of clear codes, so process them all */
do {
- lzw_result res = lzw__next_code(&ctx->input,
- ctx->current_code_size, &code);
+ lzw_result res = lzw__read_code(&ctx->input,
+ ctx->code_size, &code);
if (res != LZW_OK) {
return res;
}
} while (code == ctx->clear_code);
- /* The initial code must be from the initial dictionary. */
+ /* The initial code must be from the initial table. */
if (code > ctx->clear_code) {
return LZW_BAD_ICODE;
}
- /* Record this initial code as "previous" code, needed during decode. */
- ctx->previous_code = code;
- ctx->previous_code_first = code;
-
- /* Reset the stack, and add first non-clear code added as first item. */
- stack_pos = ctx->stack_base;
- *stack_pos++ = code;
-
- *stack_pos_out = stack_pos;
+ *code_out = code;
return LZW_OK;
}
-
/* Exported function, documented in lzw.h */
lzw_result lzw_decode_init(
struct lzw_ctx *ctx,
- const uint8_t *compressed_data,
- uint32_t compressed_data_len,
- uint32_t compressed_data_pos,
- uint8_t code_size,
- const uint8_t ** const stack_base_out,
- const uint8_t ** const stack_pos_out)
+ uint8_t minimum_code_size,
+ const uint8_t *input_data,
+ size_t input_length,
+ size_t input_pos)
{
- struct lzw_dictionary_entry *table = ctx->table;
+ struct lzw_table_entry *table = ctx->table;
+ lzw_result res;
+ uint16_t code;
+
+ if (minimum_code_size >= LZW_CODE_MAX) {
+ return LZW_BAD_ICODE;
+ }
/* Initialise the input reading context */
- ctx->input.data = compressed_data;
- ctx->input.data_len = compressed_data_len;
- ctx->input.data_sb_next = compressed_data_pos;
+ ctx->input.data = input_data;
+ ctx->input.data_len = input_length;
+ ctx->input.data_sb_next = input_pos;
ctx->input.sb_bit = 0;
ctx->input.sb_bit_count = 0;
- /* Initialise the dictionary building context */
- ctx->initial_code_size = code_size;
+ /* Initialise the table building context */
+ ctx->initial_code_size = minimum_code_size + 1;
+
+ ctx->clear_code = (1 << minimum_code_size) + 0;
+ ctx->eoi_code = (1 << minimum_code_size) + 1;
- ctx->clear_code = (1 << code_size) + 0;
- ctx->eoi_code = (1 << code_size) + 1;
+ ctx->output_left = 0;
- /* Initialise the standard dictionary entries */
- for (uint32_t i = 0; i < ctx->clear_code; ++i) {
- table[i].first_value = i;
- table[i].last_value = i;
+ /* Initialise the standard table entries */
+ for (uint16_t i = 0; i < ctx->clear_code; i++) {
+ table[i].first = i;
+ table[i].value = i;
+ table[i].count = 1;
}
- *stack_base_out = ctx->stack_base;
- return lzw__clear_codes(ctx, stack_pos_out);
-}
+ res = lzw__handle_clear(ctx, &code);
+ if (res != LZW_OK) {
+ return res;
+ }
+
+ /* Store details of this code as "previous code" to the context. */
+ ctx->prev_code_first = ctx->table[code].first;
+ ctx->prev_code_count = ctx->table[code].count;
+ ctx->prev_code = code;
+
+ /* Add code to context for immediate output. */
+ ctx->output_code = code;
+ ctx->output_left = 1;
+ ctx->has_transparency = false;
+ ctx->transparency_idx = 0;
+ ctx->colour_map = NULL;
+
+ return LZW_OK;
+}
/* Exported function, documented in lzw.h */
-lzw_result lzw_decode(struct lzw_ctx *ctx,
- const uint8_t ** const stack_pos_out)
+lzw_result lzw_decode_init_map(
+ struct lzw_ctx *ctx,
+ uint8_t minimum_code_size,
+ uint32_t transparency_idx,
+ const uint32_t *colour_table,
+ const uint8_t *input_data,
+ size_t input_length,
+ size_t input_pos)
{
lzw_result res;
- uint32_t code_new;
- uint32_t code_out;
- uint8_t last_value;
- uint8_t *stack_pos = ctx->stack_base;
- uint32_t clear_code = ctx->clear_code;
- uint32_t current_entry = ctx->current_entry;
- struct lzw_dictionary_entry * const table = ctx->table;
- /* Get a new code from the input */
- res = lzw__next_code(&ctx->input, ctx->current_code_size, &code_new);
+ if (colour_table == NULL) {
+ return LZW_BAD_PARAM;
+ }
+
+ res = lzw_decode_init(ctx, minimum_code_size,
+ input_data, input_length, input_pos);
if (res != LZW_OK) {
return res;
}
- if (code_new == clear_code) {
- /* Got Clear code */
- return lzw__clear_codes(ctx, stack_pos_out);
+ ctx->has_transparency = (transparency_idx <= 0xFF);
+ ctx->transparency_idx = transparency_idx;
+ ctx->colour_map = colour_table;
+
+ return LZW_OK;
+}
+
+/**
+ * Create new table entry.
+ *
+ * \param[in] ctx LZW reading context, updated.
+ * \param[in] code Last value code for new table entry.
+ */
+static inline void lzw__table_add_entry(
+ struct lzw_ctx *ctx,
+ uint16_t code)
+{
+ struct lzw_table_entry *entry = &ctx->table[ctx->table_size];
+
+ entry->value = code;
+ entry->first = ctx->prev_code_first;
+ entry->count = ctx->prev_code_count + 1;
+ entry->extends = ctx->prev_code;
+
+ ctx->table_size++;
+}
+
+typedef uint32_t (*lzw_writer_fn)(
+ struct lzw_ctx *ctx,
+ void *restrict output_data,
+ uint32_t output_length,
+ uint32_t output_pos,
+ uint16_t code,
+ uint16_t left);
+
+/**
+ * Get the next LZW code and write its value(s) to output buffer.
+ *
+ * \param[in] ctx LZW reading context, updated.
+ * \param[in] write_fn Function for writing pixels to output.
+ * \param[in] output_data Array to write output values into.
+ * \param[in] output_length Size of output array.
+ * \param[in,out] output_written Number of values written. Updated on exit.
+ * \return LZW_OK on success, or appropriate error code otherwise.
+ */
+static inline lzw_result lzw__decode(
+ struct lzw_ctx *ctx,
+ lzw_writer_fn write_fn,
+ void *restrict output_data,
+ uint32_t output_length,
+ uint32_t *restrict output_written)
+{
+ lzw_result res;
+ uint16_t code;
+
+ /* Get a new code from the input */
+ res = lzw__read_code(&ctx->input, ctx->code_size, &code);
+ if (res != LZW_OK) {
+ return res;
+ }
- } else if (code_new == ctx->eoi_code) {
+ /* Handle the new code */
+ if (code == ctx->eoi_code) {
/* Got End of Information code */
return LZW_EOI_CODE;
- }
- if (code_new > current_entry) {
+ } else if (code > ctx->table_size) {
/* Code is invalid */
return LZW_BAD_CODE;
- } else if (code_new < current_entry) {
- /* Code is in table */
- code_out = code_new;
- last_value = table[code_new].first_value;
+
+ } else if (code == ctx->clear_code) {
+ res = lzw__handle_clear(ctx, &code);
+ if (res != LZW_OK) {
+ return res;
+ }
+
+ } else if (ctx->table_size < LZW_TABLE_ENTRY_MAX) {
+ uint16_t size = ctx->table_size;
+ lzw__table_add_entry(ctx, (code < size) ?
+ ctx->table[code].first :
+ ctx->prev_code_first);
+
+ /* Ensure code size is increased, if needed. */
+ if (size == ctx->code_max && ctx->code_size < LZW_CODE_MAX) {
+ ctx->code_size++;
+ ctx->code_max = (1 << ctx->code_size) - 1;
+ }
+ }
+
+ *output_written += write_fn(ctx,
+ output_data, output_length, *output_written,
+ code, ctx->table[code].count);
+
+ /* Store details of this code as "previous code" to the context. */
+ ctx->prev_code_first = ctx->table[code].first;
+ ctx->prev_code_count = ctx->table[code].count;
+ ctx->prev_code = code;
+
+ return LZW_OK;
+}
+
+/**
+ * Write values for this code to the output stack.
+ *
+ * If there isn't enough space in the output stack, this function will write
+ * the as many as it can into the output. If `ctx->output_left > 0` after
+ * this call, then there is more data for this code left to output. The code
+ * is stored to the context as `ctx->output_code`.
+ *
+ * \param[in] ctx LZW reading context, updated.
+ * \param[in] output_data Array to write output values into.
+ * \param[in] output_length length Size of output array.
+ * \param[in] output_used Current position in output array.
+ * \param[in] code LZW code to output values for.
+ * \param[in] left Number of values remaining to output for this code.
+ * \return Number of pixel values written.
+ */
+static inline uint32_t lzw__write_fn(struct lzw_ctx *ctx,
+ void *restrict output_data,
+ uint32_t output_length,
+ uint32_t output_used,
+ uint16_t code,
+ uint16_t left)
+{
+ uint8_t *restrict output_pos = (uint8_t *)output_data + output_used;
+ const struct lzw_table_entry * const table = ctx->table;
+ uint32_t space = output_length - output_used;
+ uint16_t count = left;
+
+ if (count > space) {
+ left = count - space;
+ count = space;
} else {
- /* Code not in table */
- *stack_pos++ = ctx->previous_code_first;
- code_out = ctx->previous_code;
- last_value = ctx->previous_code_first;
+ left = 0;
+ }
+
+ ctx->output_code = code;
+ ctx->output_left = left;
+
+ /* Skip over any values we don't have space for. */
+ for (unsigned i = left; i != 0; i--) {
+ const struct lzw_table_entry *entry = table + code;
+ code = entry->extends;
}
- /* Add to the dictionary, only if there's space */
- if (current_entry < (1 << LZW_CODE_MAX)) {
- struct lzw_dictionary_entry *entry = table + current_entry;
- entry->last_value = last_value;
- entry->first_value = ctx->previous_code_first;
- entry->previous_entry = ctx->previous_code;
- ctx->current_entry++;
+ output_pos += count;
+ for (unsigned i = count; i != 0; i--) {
+ const struct lzw_table_entry *entry = table + code;
+ *--output_pos = entry->value;
+ code = entry->extends;
+ }
+
+ return count;
+}
+
+/* Exported function, documented in lzw.h */
+lzw_result lzw_decode(struct lzw_ctx *ctx,
+ const uint8_t *restrict *const restrict output_data,
+ uint32_t *restrict output_written)
+{
+ const uint32_t output_length = sizeof(ctx->stack_base);
+
+ *output_written = 0;
+ *output_data = ctx->stack_base;
+
+ if (ctx->output_left != 0) {
+ *output_written += lzw__write_fn(ctx,
+ ctx->stack_base, output_length, *output_written,
+ ctx->output_code, ctx->output_left);
}
- /* Ensure code size is increased, if needed. */
- if (current_entry == ctx->current_code_size_max) {
- if (ctx->current_code_size < LZW_CODE_MAX) {
- ctx->current_code_size++;
- ctx->current_code_size_max =
- (1 << ctx->current_code_size) - 1;
+ while (*output_written != output_length) {
+ lzw_result res = lzw__decode(ctx, lzw__write_fn,
+ ctx->stack_base, output_length, output_written);
+ if (res != LZW_OK) {
+ return res;
}
}
- ctx->previous_code_first = table[code_new].first_value;
- ctx->previous_code = code_new;
+ return LZW_OK;
+}
+
+/**
+ * Write colour mapped values for this code to the output.
+ *
+ * If there isn't enough space in the output stack, this function will write
+ * the as many as it can into the output. If `ctx->output_left > 0` after
+ * this call, then there is more data for this code left to output. The code
+ * is stored to the context as `ctx->output_code`.
+ *
+ * \param[in] ctx LZW reading context, updated.
+ * \param[in] output_data Array to write output values into.
+ * \param[in] output_length Size of output array.
+ * \param[in] output_used Current position in output array.
+ * \param[in] code LZW code to output values for.
+ * \param[in] left Number of values remaining to output for code.
+ * \return Number of pixel values written.
+ */
+static inline uint32_t lzw__map_write_fn(struct lzw_ctx *ctx,
+ void *restrict output_data,
+ uint32_t output_length,
+ uint32_t output_used,
+ uint16_t code,
+ uint16_t left)
+{
+ uint32_t *restrict output_pos = (uint32_t *)output_data + output_used;
+ const struct lzw_table_entry * const table = ctx->table;
+ uint32_t space = output_length - output_used;
+ uint16_t count = left;
+
+ if (count > space) {
+ left = count - space;
+ count = space;
+ } else {
+ left = 0;
+ }
+
+ ctx->output_code = code;
+ ctx->output_left = left;
+
+ for (unsigned i = left; i != 0; i--) {
+ const struct lzw_table_entry *entry = table + code;
+ code = entry->extends;
+ }
- /* Put rest of data for this code on output stack.
- * Note, in the case of "code not in table", the last entry of the
- * current code has already been placed on the stack above. */
- while (code_out > clear_code) {
- struct lzw_dictionary_entry *entry = table + code_out;
- *stack_pos++ = entry->last_value;
- code_out = entry->previous_entry;
+ output_pos += count;
+ if (ctx->has_transparency) {
+ for (unsigned i = count; i != 0; i--) {
+ const struct lzw_table_entry *entry = table + code;
+ --output_pos;
+ if (entry->value != ctx->transparency_idx) {
+ *output_pos = ctx->colour_map[entry->value];
+ }
+ code = entry->extends;
+ }
+ } else {
+ for (unsigned i = count; i != 0; i--) {
+ const struct lzw_table_entry *entry = table + code;
+ *--output_pos = ctx->colour_map[entry->value];
+ code = entry->extends;
+ }
+ }
+
+ return count;
+}
+
+/* Exported function, documented in lzw.h */
+lzw_result lzw_decode_map(struct lzw_ctx *ctx,
+ uint32_t *restrict output_data,
+ uint32_t output_length,
+ uint32_t *restrict output_written)
+{
+ *output_written = 0;
+
+ if (ctx->colour_map == NULL) {
+ return LZW_NO_COLOUR;
+ }
+
+ if (ctx->output_left != 0) {
+ *output_written += lzw__map_write_fn(ctx,
+ output_data, output_length, *output_written,
+ ctx->output_code, ctx->output_left);
+ }
+
+ while (*output_written != output_length) {
+ lzw_result res = lzw__decode(ctx, lzw__map_write_fn,
+ output_data, output_length, output_written);
+ if (res != LZW_OK) {
+ return res;
+ }
}
- *stack_pos++ = table[code_out].last_value;
- *stack_pos_out = stack_pos;
return LZW_OK;
}