Merge branch 'tlsa/lzw'

author: Michael Drake <michael.drake@codethink.co.uk> 2017-04-05 11:38:06 +0100
committer: Michael Drake <michael.drake@codethink.co.uk> 2017-04-05 11:38:06 +0100
commit: 099b873abca6b868f159e16894f3640d40dca5aa (patch)
tree: 5a8d0c86954d5bc46deff890cf7551ab1f4a5989 /src
parent: 4a6136b81993da5c838df13eb3b1433deb87abf4 (diff)
parent: f9797e4ae82090808036b1e1ec4318d0f1bdc456 (diff)
download: libnsgif-099b873abca6b868f159e16894f3640d40dca5aa.tar.gz
libnsgif-099b873abca6b868f159e16894f3640d40dca5aa.tar.bz2
4 files changed, 521 insertions, 326 deletions
diff --git a/src/Makefile b/src/Makefile
index 0fa4dc6..cb5d31f 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,4 +1,4 @@
 # Sources
-DIR_SOURCES := libnsgif.c
+DIR_SOURCES := libnsgif.c lzw.c
 
 include $(NSBUILD)/Makefile.subdir
diff --git a/src/libnsgif.c b/src/libnsgif.c
index 998f8d7..6bf9956 100644
--- a/src/libnsgif.c
+++ b/src/libnsgif.c
@@ -8,6 +8,7 @@
  */
 
 #include <stdbool.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
@@ -15,6 +16,8 @@
 #include "libnsgif.h"
 #include "utils/log.h"
 
+#include "lzw.h"
+
 /**
  *
  * \file
@@ -36,9 +39,6 @@
 /** Internal flag that a frame is invalid/unprocessed */
 #define GIF_INVALID_FRAME -1
 
-/** Maximum LZW bits available */
-#define GIF_MAX_LZW 12
-
 /** Transparent colour */
 #define GIF_TRANSPARENT_COLOUR 0x00
 
@@ -65,313 +65,6 @@
 /** standard GIF header size */
 #define GIF_STANDARD_HEADER_SIZE 13
 
-/** LZW parameters */
-struct lzw_s {
-    unsigned char buf[4];
-    const unsigned char *direct;
-    int table[2][(1 << GIF_MAX_LZW)];
-    unsigned char stack[(1 << GIF_MAX_LZW) * 2];
-    unsigned char *stack_pointer;
-
-    int code_size, set_code_size;
-
-    int max_code, max_code_size;
-
-    int clear_code, end_code;
-
-    int curbit, lastbit, last_byte;
-
-    int firstcode;
-    int oldcode;
-
-    bool zero_data_block;
-    bool get_done;
-};
-
-
-/* General LZW values. They are shared for all GIFs being decoded, and thus we
- * can't handle progressive decoding efficiently without having the data for
- * each image which would use an extra 10Kb or so per GIF.
- */
-static struct lzw_s lzw_params = {
-    .zero_data_block = false,
-};
-
-static struct lzw_s *lzw = &lzw_params;
-
-/**
- * get the next LZW code from the GIF
- *
- * reads codes from the input data stream coping with GIF data sub blocking
- *
- * \param[in]      compressed_data      LZW compressed data
- * \param[in]      compressed_data_len  Byte size of compressed_data
- * \param[in,out]  compressed_data_pos  Current position in compressed_data
- *                                      updated on exit.
- * \param[in]      code_size            Number of bits in the current LZW code
- * \return The next code to process or error return code
- */
-static int gif_next_code(
-                const uint8_t *compressed_data,
-                uint32_t compressed_data_len,
-                uint32_t *compressed_data_pos,
-                int code_size)
-{
-        int i, j, end, count, ret;
-        uint32_t pos = *compressed_data_pos;
-        const unsigned char *b;
-        static const int maskTbl[16] = {
-                0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f,
-                0x00ff, 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff
-        };
-
-        end = lzw->curbit + code_size;
-        if (end >= lzw->lastbit) {
-                if (lzw->get_done) {
-                        return GIF_END_OF_FRAME;
-                }
-                lzw->buf[0] = lzw->direct[lzw->last_byte - 2];
-                lzw->buf[1] = lzw->direct[lzw->last_byte - 1];
-
-                /* get the next block */
-                lzw->direct = compressed_data + pos;
-                if (pos >= compressed_data_len) {
-                        return GIF_INSUFFICIENT_FRAME_DATA;
-                }
-
-                count = lzw->direct[0];
-                lzw->zero_data_block = (count == 0);
-                if ((pos + count) >= compressed_data_len) {
-                        return GIF_INSUFFICIENT_FRAME_DATA;
-                }
-
-                if (count == 0) {
-                        lzw->get_done = true;
-                } else {
-                        if (pos + 3 >= compressed_data_len) {
-                                return GIF_INSUFFICIENT_FRAME_DATA;
-                        }
-                        lzw->direct -= 1;
-                        lzw->buf[2] = lzw->direct[2];
-                        lzw->buf[3] = lzw->direct[3];
-                }
-                pos += count + 1;
-
-                /* update our variables */
-                lzw->last_byte = 2 + count;
-                lzw->curbit = (lzw->curbit - lzw->lastbit) + 16;
-                lzw->lastbit = (2 + count) << 3;
-                end = lzw->curbit + code_size;
-        }
-
-        i = lzw->curbit >> 3;
-        if (i < 2) {
-                b = lzw->buf;
-        } else {
-                b = lzw->direct;
-        }
-
-        ret = b[i];
-        j = (end >> 3) - 1;
-        if (i <= j) {
-                ret |= (b[i + 1] << 8);
-                if (i < j) {
-                        ret |= (b[i + 2] << 16);
-                }
-        }
-        ret = (ret >> (lzw->curbit % 8)) & maskTbl[code_size];
-        lzw->curbit += code_size;
-
-        *compressed_data_pos = pos;
-        return ret;
-}
-
-
-/**
- * Clear LZW code dictionary
- *
- * \param[in]      compressed_data      LZW compressed data
- * \param[in]      compressed_data_len  Byte size of compressed_data
- * \param[in,out]  compressed_data_pos  Current position in compressed_data
- *                                      updated on exit.
- * \return GIF_OK or error code.
- */
-static gif_result gif_clear_codes_LZW(
-                const uint8_t *compressed_data,
-                uint32_t compressed_data_len,
-                uint32_t *compressed_data_pos)
-{
-        int i;
-        int code;
-
-        if (lzw->clear_code >= (1 << GIF_MAX_LZW)) {
-                lzw->stack_pointer = lzw->stack;
-                return GIF_FRAME_DATA_ERROR;
-        }
-
-        /* initialise our table */
-        memset(lzw->table, 0x00, (1 << GIF_MAX_LZW) * 8);
-        for (i = 0; i < lzw->clear_code; ++i) {
-                lzw->table[1][i] = i;
-        }
-
-        /* reset LZW parameters */
-        lzw->code_size = lzw->set_code_size + 1;
-        lzw->max_code_size = lzw->clear_code << 1;
-        lzw->max_code = lzw->clear_code + 2;
-        lzw->stack_pointer = lzw->stack;
-
-        /* process repeated clear codes */
-        do {
-                code = gif_next_code(compressed_data, compressed_data_len,
-                                compressed_data_pos, lzw->code_size);
-                if (code < 0) {
-                        return code;
-                }
-        } while (code == lzw->clear_code);
-        lzw->firstcode = lzw->oldcode = code;
-
-        *lzw->stack_pointer++ = lzw->firstcode;
-
-        return GIF_OK;
-}
-
-
-/**
- * Initialise LZW
- *
- * This initialises a LZW context ready to commence decompression.
- *
- * \param[in]      compressed_data      LZW compressed data
- * \param[in]      compressed_data_len  Byte size of compressed_data
- * \param[in,out]  compressed_data_pos  Current position in compressed_data
- *                                      updated on exit.
- * \param[in]      initial_code_size    Size of codes used on clearing of code dictionary
- */
-static gif_result gif_initialise_LZW(
-                const uint8_t *compressed_data,
-                uint32_t compressed_data_len,
-                uint32_t *compressed_data_pos,
-                int initial_code_size)
-{
-        lzw->set_code_size = initial_code_size;
-        lzw->code_size = lzw->set_code_size + 1;
-        lzw->clear_code = (1 << lzw->set_code_size);
-        lzw->end_code = lzw->clear_code + 1;
-        lzw->max_code_size = lzw->clear_code << 1;
-        lzw->max_code = lzw->clear_code + 2;
-        lzw->curbit = lzw->lastbit = 0;
-        lzw->last_byte = 2;
-        lzw->get_done = false;
-        lzw->direct = lzw->buf;
-
-        return gif_clear_codes_LZW(compressed_data,
-                                compressed_data_len,
-                                compressed_data_pos);
-}
-
-/**
- * fill the LZW stack with decompressed data
- *
- * \param[in]      compressed_data      LZW compressed data
- * \param[in]      compressed_data_len  Byte size of compressed_data
- * \param[in,out]  compressed_data_pos  Current position in compressed_data
- *                                      updated on exit.
- * \return true on sucessful decode of the next LZW code else false.
- */
-static gif_result gif_next_LZW(
-                const uint8_t *compressed_data,
-                uint32_t compressed_data_len,
-                uint32_t *compressed_data_pos)
-{
-        int code, incode;
-        int block_size;
-        int new_code;
-
-        code = gif_next_code(compressed_data, compressed_data_len,
-                                compressed_data_pos, lzw->code_size);
-        if (code < 0) {
-                return code;
-        }
-
-        if (code == lzw->clear_code) {
-                return gif_clear_codes_LZW(compressed_data,
-                                compressed_data_len,
-                                compressed_data_pos);
-        }
-
-        if (code == lzw->end_code) {
-                /* skip to the end of our data so multi-image GIFs work */
-                if (lzw->zero_data_block) {
-                        return GIF_FRAME_DATA_ERROR;
-                }
-                block_size = 0;
-                while (block_size != 1 &&
-                                *compressed_data_pos < compressed_data_len) {
-                        block_size = compressed_data[*compressed_data_pos] + 1;
-                        *compressed_data_pos += block_size;
-                }
-                return GIF_FRAME_DATA_ERROR;
-        }
-
-        incode = code;
-        if (code >= lzw->max_code) {
-                if (lzw->stack_pointer >= lzw->stack + ((1 << GIF_MAX_LZW) * 2)) {
-                        return GIF_FRAME_DATA_ERROR;
-                }
-                *lzw->stack_pointer++ = lzw->firstcode;
-                code = lzw->oldcode;
-        }
-
-        /* The following loop is the most important in the GIF decoding cycle
-         * as every single pixel passes through it.
-         *
-         * Note: our stack is always big enough to hold a complete decompressed
-         * chunk.
-         */
-        while (code >= lzw->clear_code) {
-                if (lzw->stack_pointer >= lzw->stack + ((1 << GIF_MAX_LZW) * 2) ||
-                    code >= (1 << GIF_MAX_LZW)) {
-                        return GIF_FRAME_DATA_ERROR;
-                }
-                *lzw->stack_pointer++ = lzw->table[1][code];
-                new_code = lzw->table[0][code];
-                if (new_code < lzw->clear_code) {
-                        code = new_code;
-                        break;
-                }
-
-                if (lzw->stack_pointer >= lzw->stack + ((1 << GIF_MAX_LZW) * 2) ||
-                    new_code >= (1 << GIF_MAX_LZW)) {
-                        return GIF_FRAME_DATA_ERROR;
-                }
-                *lzw->stack_pointer++ = lzw->table[1][new_code];
-                code = lzw->table[0][new_code];
-                if (code == new_code) {
-                        return GIF_FRAME_DATA_ERROR;
-                }
-        }
-
-        if (lzw->stack_pointer >= lzw->stack + ((1 << GIF_MAX_LZW) * 2)) {
-                return GIF_FRAME_DATA_ERROR;
-        }
-        *lzw->stack_pointer++ = lzw->firstcode = lzw->table[1][code];
-
-        code = lzw->max_code;
-        if (code < (1 << GIF_MAX_LZW)) {
-                lzw->table[0][code] = lzw->oldcode;
-                lzw->table[1][code] = lzw->firstcode;
-                ++lzw->max_code;
-                if ((lzw->max_code >= lzw->max_code_size) &&
-                    (lzw->max_code_size < (1 << GIF_MAX_LZW))) {
-                        lzw->max_code_size = lzw->max_code_size << 1;
-                        ++lzw->code_size;
-                }
-        }
-        lzw->oldcode = incode;
-        return GIF_OK;
-}
-
 
 /**
  * Updates the sprite memory size
@@ -725,7 +418,7 @@ static gif_result gif_initialise_frame(gif_animation *gif)
         if (gif_bytes < 1) {
                 return GIF_INSUFFICIENT_FRAME_DATA;
         }
-        if (gif_data[0] > GIF_MAX_LZW) {
+        if (gif_data[0] > LZW_CODE_MAX) {
                 return GIF_DATA_ERROR;
         }
 
@@ -863,7 +556,19 @@ static unsigned int gif_interlaced_line(int height, int y) {
 }
 
 
-
+static gif_result gif_error_from_lzw(lzw_result l_res)
+{
+        static const gif_result g_res[] = {
+                [LZW_OK]        = GIF_OK,
+                [LZW_OK_EOD]    = GIF_END_OF_FRAME,
+                [LZW_NO_MEM]    = GIF_INSUFFICIENT_MEMORY,
+                [LZW_NO_DATA]   = GIF_INSUFFICIENT_FRAME_DATA,
+                [LZW_EOI_CODE]  = GIF_FRAME_DATA_ERROR,
+                [LZW_BAD_ICODE] = GIF_FRAME_DATA_ERROR,
+                [LZW_BAD_CODE]  = GIF_FRAME_DATA_ERROR,
+        };
+        return g_res[l_res];
+}
 
 
 /**
@@ -878,7 +583,6 @@ gif_internal_decode_frame(gif_animation *gif,
                           unsigned int frame,
                           bool clear_image)
 {
-        gif_result res;
         unsigned int index = 0;
         unsigned char *gif_data, *gif_end;
         int gif_bytes;
@@ -1038,6 +742,10 @@ gif_internal_decode_frame(gif_animation *gif,
 
         /* If we are clearing the image we just clear, if not decode */
         if (!clear_image) {
+                lzw_result res;
+                const uint8_t *stack_base;
+                const uint8_t *stack_pos;
+
                 /* Ensure we have enough data for a 1-byte LZW code size +
                  * 1-byte gif trailer
                  */
@@ -1114,10 +822,11 @@ gif_internal_decode_frame(gif_animation *gif,
                 gif->buffer_position = (gif_data - gif->gif_data) + 1;
 
                 /* Initialise the LZW decoding */
-                res = gif_initialise_LZW(gif->gif_data, gif->buffer_size,
-                                &gif->buffer_position, gif_data[0]);
-                if (res != GIF_OK) {
-                        return res;
+                res = lzw_decode_init(gif->lzw_ctx, gif->gif_data,
+                                gif->buffer_size, gif->buffer_position,
+                                gif_data[0], &stack_base, &stack_pos);
+                if (res != LZW_OK) {
+                        return gif_error_from_lzw(res);
                 }
 
                 /* Decompress the data */
@@ -1135,14 +844,14 @@ gif_internal_decode_frame(gif_animation *gif,
                          */
                         x = width;
                         while (x > 0) {
-                                burst_bytes = (lzw->stack_pointer - lzw->stack);
+                                burst_bytes = (stack_pos - stack_base);
                                 if (burst_bytes > 0) {
                                         if (burst_bytes > x) {
                                                 burst_bytes = x;
                                         }
                                         x -= burst_bytes;
                                         while (burst_bytes-- > 0) {
-                                                colour = *--lzw->stack_pointer;
+                                                colour = *--stack_pos;
                                                 if (((gif->frames[frame].transparency) &&
                                                      (colour != gif->frames[frame].transparency_index)) ||
                                                     (!gif->frames[frame].transparency)) {
@@ -1151,15 +860,13 @@ gif_internal_decode_frame(gif_animation *gif,
                                                 frame_scanline++;
                                         }
                                 } else {
-                                        res = gif_next_LZW(gif->gif_data,
-                                                        gif->buffer_size,
-                                                        &gif->buffer_position);
-                                        if (res != GIF_OK) {
+                                        res = lzw_decode(gif->lzw_ctx, &stack_pos);
+                                        if (res != LZW_OK) {
                                                 /* Unexpected end of frame, try to recover */
-                                                if (res == GIF_END_OF_FRAME) {
+                                                if (res == LZW_OK_EOD) {
                                                         return_value = GIF_OK;
                                                 } else {
-                                                        return_value = res;
+                                                        return_value = gif_error_from_lzw(res);
                                                 }
                                                 goto gif_decode_frame_exit;
                                         }
@@ -1230,6 +937,14 @@ gif_result gif_initialise(gif_animation *gif, size_t size, unsigned char *data)
         gif->buffer_size = size;
         gif->gif_data = data;
 
+        if (gif->lzw_ctx == NULL) {
+                lzw_result res = lzw_context_create(
+                                (struct lzw_ctx **)&gif->lzw_ctx);
+                if (res != LZW_OK) {
+                        return gif_error_from_lzw(res);
+                }
+        }
+
         /* Check for sufficient data to be a GIF (6-byte header + 7-byte
          * logical screen descriptor)
          */
@@ -1448,4 +1163,7 @@ void gif_finalise(gif_animation *gif)
         gif->local_colour_table = NULL;
         free(gif->global_colour_table);
         gif->global_colour_table = NULL;
+
+        lzw_context_destroy(gif->lzw_ctx);
+        gif->lzw_ctx = NULL;
 }
diff --git a/src/lzw.c b/src/lzw.c
new file mode 100644
index 0000000..6b7156e
--- /dev/null
+++ b/src/lzw.c
@@ -0,0 +1,372 @@
+/*
+ * This file is part of NetSurf's LibNSGIF, http://www.netsurf-browser.org/
+ * Licensed under the MIT License,
+ *                http://www.opensource.org/licenses/mit-license.php
+ *
+ * Copyright 2017 Michael Drake <michael.drake@codethink.co.uk>
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "lzw.h"
+
+/**
+ * \file
+ * \brief LZW decompression (implementation)
+ *
+ * Decoder for GIF LZW data.
+ */
+
+
+/**
+ * Context for reading LZW data.
+ *
+ * LZW data is split over multiple sub-blocks.  Each sub-block has a
+ * byte at the start, which says the sub-block size, and then the data.
+ * Zero-size sub-blocks have no data, and the biggest sub-block size is
+ * 255, which means there are 255 bytes of data following the sub-block
+ * size entry.
+ *
+ * Note that an individual LZW code can be split over up to three sub-blocks.
+ */
+struct lzw_read_ctx {
+	const uint8_t *data;    /**< Pointer to start of input data */
+	uint32_t data_len;      /**< Input data length */
+	uint32_t data_sb_next;  /**< Offset to sub-block size */
+
+	const uint8_t *sb_data; /**< Pointer to current sub-block in data */
+	uint32_t sb_bit;        /**< Current bit offset in sub-block */
+	uint32_t sb_bit_count;  /**< Bit count in sub-block */
+};
+
+/**
+ * LZW dictionary entry.
+ *
+ * Records in the dictionary are composed of 1 or more entries.
+ * Entries point to previous entries which can be followed to compose
+ * the complete record.  To compose the record in reverse order, take
+ * the `last_value` from each entry, and move to the previous entry.
+ * If the previous_entry's index is < the current clear_code, then it
+ * is the last entry in the record.
+ */
+struct lzw_dictionary_entry {
+	uint8_t last_value;      /**< Last value for record ending at entry. */
+	uint8_t first_value;     /**< First value for entry's record. */
+	uint16_t previous_entry; /**< Offset in dictionary to previous entry. */
+};
+
+/**
+ * LZW decompression context.
+ */
+struct lzw_ctx {
+	/** Input reading context */
+	struct lzw_read_ctx input;
+
+	uint32_t previous_code;       /**< Code read from input previously. */
+	uint32_t previous_code_first; /**< First value of previous code. */
+
+	uint32_t initial_code_size;     /**< Starting LZW code size. */
+	uint32_t current_code_size;     /**< Current LZW code size. */
+	uint32_t current_code_size_max; /**< Max code value for current size. */
+
+	uint32_t clear_code; /**< Special Clear code value */
+	uint32_t eoi_code;   /**< Special End of Information code value */
+
+	uint32_t current_entry; /**< Next position in table to fill. */
+
+	/** Output value stack. */
+	uint8_t stack_base[1 << LZW_CODE_MAX];
+
+	/** LZW decode dictionary. Generated during decode. */
+	struct lzw_dictionary_entry table[1 << LZW_CODE_MAX];
+};
+
+
+/* Exported function, documented in lzw.h */
+lzw_result lzw_context_create(struct lzw_ctx **ctx)
+{
+	struct lzw_ctx *c = malloc(sizeof(*c));
+	if (c == NULL) {
+		return LZW_NO_MEM;
+	}
+
+	*ctx = c;
+	return LZW_OK;
+}
+
+
+/* Exported function, documented in lzw.h */
+void lzw_context_destroy(struct lzw_ctx *ctx)
+{
+	free(ctx);
+}
+
+
+/**
+ * Advance the context to the next sub-block in the input data.
+ *
+ * \param[in] ctx  LZW reading context, updated on success.
+ * \return LZW_OK or LZW_OK_EOD on success, appropriate error otherwise.
+ */
+static lzw_result lzw__block_advance(struct lzw_read_ctx *ctx)
+{
+	uint32_t block_size;
+	uint32_t next_block_pos = ctx->data_sb_next;
+	const uint8_t *data_next = ctx->data + next_block_pos;
+
+	if (next_block_pos >= ctx->data_len) {
+		return LZW_NO_DATA;
+	}
+
+	block_size = *data_next;
+
+	if ((next_block_pos + block_size) >= ctx->data_len) {
+		return LZW_NO_DATA;
+	}
+
+	ctx->sb_bit = 0;
+	ctx->sb_bit_count = block_size * 8;
+
+	if (block_size == 0) {
+		ctx->data_sb_next += 1;
+		return LZW_OK_EOD;
+	}
+
+	ctx->sb_data = data_next + 1;
+	ctx->data_sb_next += block_size + 1;
+
+	return LZW_OK;
+}
+
+
+/**
+ * Get the next LZW code of given size from the raw input data.
+ *
+ * Reads codes from the input data stream coping with GIF data sub-blocks.
+ *
+ * \param[in]  ctx        LZW reading context, updated.
+ * \param[in]  code_size  Size of LZW code to get from data.
+ * \param[out] code_out   Returns an LZW code on success.
+ * \return LZW_OK or LZW_OK_EOD on success, appropriate error otherwise.
+ */
+static inline lzw_result lzw__next_code(
+		struct lzw_read_ctx *ctx,
+		uint8_t code_size,
+		uint32_t *code_out)
+{
+	uint32_t code = 0;
+	uint8_t current_bit = ctx->sb_bit & 0x7;
+	uint8_t byte_advance = (current_bit + code_size) >> 3;
+
+	if (ctx->sb_bit + code_size < ctx->sb_bit_count) {
+		/* Fast path: code fully inside this sub-block */
+		const uint8_t *data = ctx->sb_data + (ctx->sb_bit >> 3);
+		switch (byte_advance) {
+			case 2: code |= data[2] << 16;
+			case 1: code |= data[1] <<  8;
+			case 0: code |= data[0] <<  0;
+		}
+		ctx->sb_bit += code_size;
+	} else {
+		/* Slow path: code spans sub-blocks */
+		uint8_t byte = 0;
+		uint8_t bits_remaining_0 = (code_size < (8 - current_bit)) ?
+				code_size : (8 - current_bit);
+		uint8_t bits_remaining_1 = code_size - bits_remaining_0;
+		uint8_t bits_used[3] = {
+			[0] = bits_remaining_0,
+			[1] = bits_remaining_1 < 8 ? bits_remaining_1 : 8,
+			[2] = bits_remaining_1 - 8,
+		};
+
+		while (true) {
+			const uint8_t *data = ctx->sb_data;
+			lzw_result res;
+
+			/* Get any data from end of this sub-block */
+			while (byte <= byte_advance &&
+					ctx->sb_bit < ctx->sb_bit_count) {
+				code |= data[ctx->sb_bit >> 3] << (byte << 3);
+				ctx->sb_bit += bits_used[byte];
+				byte++;
+			}
+
+			/* Check if we have all we need */
+			if (byte > byte_advance) {
+				break;
+			}
+
+			/* Move to next sub-block */
+			res = lzw__block_advance(ctx);
+			if (res != LZW_OK) {
+				return res;
+			}
+		}
+	}
+
+	*code_out = (code >> current_bit) & ((1 << code_size) - 1);
+	return LZW_OK;
+}
+
+
+/**
+ * Clear LZW code dictionary.
+ *
+ * \param[in]  ctx            LZW reading context, updated.
+ * \param[out] stack_pos_out  Returns current stack position.
+ * \return LZW_OK or error code.
+ */
+static lzw_result lzw__clear_codes(
+		struct lzw_ctx *ctx,
+		const uint8_t ** const stack_pos_out)
+{
+	uint32_t code;
+	uint8_t *stack_pos;
+
+	/* Reset dictionary building context */
+	ctx->current_code_size = ctx->initial_code_size + 1;
+	ctx->current_code_size_max = (1 << ctx->current_code_size) - 1;;
+	ctx->current_entry = (1 << ctx->initial_code_size) + 2;
+
+	/* There might be a sequence of clear codes, so process them all */
+	do {
+		lzw_result res = lzw__next_code(&ctx->input,
+				ctx->current_code_size, &code);
+		if (res != LZW_OK) {
+			return res;
+		}
+	} while (code == ctx->clear_code);
+
+	/* The initial code must be from the initial dictionary. */
+	if (code > ctx->clear_code) {
+		return LZW_BAD_ICODE;
+	}
+
+	/* Record this initial code as "previous" code, needed during decode. */
+	ctx->previous_code = code;
+	ctx->previous_code_first = code;
+
+	/* Reset the stack, and add first non-clear code added as first item. */
+	stack_pos = ctx->stack_base;
+	*stack_pos++ = code;
+
+	*stack_pos_out = stack_pos;
+	return LZW_OK;
+}
+
+
+/* Exported function, documented in lzw.h */
+lzw_result lzw_decode_init(
+		struct lzw_ctx *ctx,
+		const uint8_t *compressed_data,
+		uint32_t compressed_data_len,
+		uint32_t compressed_data_pos,
+		uint8_t code_size,
+		const uint8_t ** const stack_base_out,
+		const uint8_t ** const stack_pos_out)
+{
+	struct lzw_dictionary_entry *table = ctx->table;
+
+	/* Initialise the input reading context */
+	ctx->input.data = compressed_data;
+	ctx->input.data_len = compressed_data_len;
+	ctx->input.data_sb_next = compressed_data_pos;
+
+	ctx->input.sb_bit = 0;
+	ctx->input.sb_bit_count = 0;
+
+	/* Initialise the dictionary building context */
+	ctx->initial_code_size = code_size;
+
+	ctx->clear_code = (1 << code_size) + 0;
+	ctx->eoi_code   = (1 << code_size) + 1;
+
+	/* Initialise the standard dictionary entries */
+	for (uint32_t i = 0; i < ctx->clear_code; ++i) {
+		table[i].first_value = i;
+		table[i].last_value  = i;
+	}
+
+	*stack_base_out = ctx->stack_base;
+	return lzw__clear_codes(ctx, stack_pos_out);
+}
+
+
+/* Exported function, documented in lzw.h */
+lzw_result lzw_decode(struct lzw_ctx *ctx,
+		const uint8_t ** const stack_pos_out)
+{
+	lzw_result res;
+	uint32_t code_new;
+	uint32_t code_out;
+	uint8_t last_value;
+	uint8_t *stack_pos = ctx->stack_base;
+	uint32_t clear_code = ctx->clear_code;
+	uint32_t current_entry = ctx->current_entry;
+	struct lzw_dictionary_entry * const table = ctx->table;
+
+	/* Get a new code from the input */
+	res = lzw__next_code(&ctx->input, ctx->current_code_size, &code_new);
+	if (res != LZW_OK) {
+		return res;
+	}
+
+	if (code_new == clear_code) {
+		/* Got Clear code */
+		return lzw__clear_codes(ctx, stack_pos_out);
+
+	} else if (code_new == ctx->eoi_code) {
+		/* Got End of Information code */
+		return LZW_EOI_CODE;
+	}
+
+	if (code_new > current_entry) {
+		/* Code is invalid */
+		return LZW_BAD_CODE;
+	} else if (code_new < current_entry) {
+		/* Code is in table */
+		code_out = code_new;
+		last_value = table[code_new].first_value;
+	} else {
+		/* Code not in table */
+		*stack_pos++ = ctx->previous_code_first;
+		code_out = ctx->previous_code;
+		last_value = ctx->previous_code_first;
+	}
+
+	/* Add to the dictionary, only if there's space */
+	if (current_entry < (1 << LZW_CODE_MAX)) {
+		struct lzw_dictionary_entry *entry = table + current_entry;
+		entry->last_value     = last_value;
+		entry->first_value    = ctx->previous_code_first;
+		entry->previous_entry = ctx->previous_code;
+		ctx->current_entry++;
+	}
+
+	/* Ensure code size is increased, if needed. */
+	if (current_entry == ctx->current_code_size_max) {
+		if (ctx->current_code_size < LZW_CODE_MAX) {
+			ctx->current_code_size++;
+			ctx->current_code_size_max =
+					(1 << ctx->current_code_size) - 1;
+		}
+	}
+
+	ctx->previous_code_first = table[code_new].first_value;
+	ctx->previous_code = code_new;
+
+	/* Put rest of data for this code on output stack.
+	 * Note, in the case of "code not in table", the last entry of the
+	 * current code has already been placed on the stack above. */
+	while (code_out > clear_code) {
+		struct lzw_dictionary_entry *entry = table + code_out;
+		*stack_pos++ = entry->last_value;
+		code_out = entry->previous_entry;
+	}
+	*stack_pos++ = table[code_out].last_value;
+
+	*stack_pos_out = stack_pos;
+	return LZW_OK;
+}
diff --git a/src/lzw.h b/src/lzw.h
new file mode 100644
index 0000000..385b425
--- /dev/null
+++ b/src/lzw.h
@@ -0,0 +1,105 @@
+/*
+ * This file is part of NetSurf's LibNSGIF, http://www.netsurf-browser.org/
+ * Licensed under the MIT License,
+ *                http://www.opensource.org/licenses/mit-license.php
+ *
+ * Copyright 2017 Michael Drake <michael.drake@codethink.co.uk>
+ */
+
+#ifndef LZW_H_
+#define LZW_H_
+
+/**
+ * \file
+ * \brief LZW decompression (interface)
+ *
+ * Decoder for GIF LZW data.
+ */
+
+
+/** Maximum LZW code size in bits */
+#define LZW_CODE_MAX 12
+
+
+/* Declare lzw internal context structure */
+struct lzw_ctx;
+
+
+/** LZW decoding response codes */
+typedef enum lzw_result {
+	LZW_OK,        /**< Success */
+	LZW_OK_EOD,    /**< Success; reached zero-length sub-block */
+	LZW_NO_MEM,    /**< Error: Out of memory */
+	LZW_NO_DATA,   /**< Error: Out of data */
+	LZW_EOI_CODE,  /**< Error: End of Information code */
+	LZW_BAD_ICODE, /**< Error: Bad initial LZW code */
+	LZW_BAD_CODE,  /**< Error: Bad LZW code */
+} lzw_result;
+
+
+/**
+ * Create an LZW decompression context.
+ *
+ * \param[out] ctx  Returns an LZW decompression context.  Caller owned,
+ *                  free with lzw_context_destroy().
+ * \return LZW_OK on success, or appropriate error code otherwise.
+ */
+lzw_result lzw_context_create(
+		struct lzw_ctx **ctx);
+
+/**
+ * Destroy an LZW decompression context.
+ *
+ * \param[in] ctx  The LZW decompression context to destroy.
+ */
+void lzw_context_destroy(
+		struct lzw_ctx *ctx);
+
+/**
+ * Initialise an LZW decompression context for decoding.
+ *
+ * Caller owns neither `stack_base_out` or `stack_pos_out`.
+ *
+ * \param[in]  ctx                  The LZW decompression context to initialise.
+ * \param[in]  compressed_data      The compressed data.
+ * \param[in]  compressed_data_len  Byte length of compressed data.
+ * \param[in]  compressed_data_pos  Start position in data.  Must be position
+ *                                  of a size byte at sub-block start.
+ * \param[in]  code_size            The initial LZW code size to use.
+ * \param[out] stack_base_out       Returns base of decompressed data stack.
+ * \param[out] stack_pos_out        Returns current stack position.
+ *                                  There are `stack_pos_out - stack_base_out`
+ *                                  current stack entries.
+ * \return LZW_OK on success, or appropriate error code otherwise.
+ */
+lzw_result lzw_decode_init(
+		struct lzw_ctx *ctx,
+		const uint8_t *compressed_data,
+		uint32_t compressed_data_len,
+		uint32_t compressed_data_pos,
+		uint8_t code_size,
+		const uint8_t ** const stack_base_out,
+		const uint8_t ** const stack_pos_out);
+
+/**
+ * Fill the LZW stack with decompressed data
+ *
+ * Ensure anything on the stack is used before calling this, as anything
+ * on the stack before this call will be trampled.
+ *
+ * Caller does not own `stack_pos_out`.
+ *
+ * \param[in]  ctx            LZW reading context, updated.
+ * \param[out] stack_pos_out  Returns current stack position.
+ *                            Use with `stack_base_out` value from previous
+ *                            lzw_decode_init() call.
+ *                            There are `stack_pos_out - stack_base_out`
+ *                            current stack entries.
+ * \return LZW_OK on success, or appropriate error code otherwise.
+ */
+lzw_result lzw_decode(
+		struct lzw_ctx *ctx,
+		const uint8_t ** const stack_pos_out);
+
+
+#endif
author	Michael Drake <michael.drake@codethink.co.uk>	2017-04-05 11:38:06 +0100
committer	Michael Drake <michael.drake@codethink.co.uk>	2017-04-05 11:38:06 +0100
commit	099b873abca6b868f159e16894f3640d40dca5aa (patch)
tree	5a8d0c86954d5bc46deff890cf7551ab1f4a5989 /src
parent	4a6136b81993da5c838df13eb3b1433deb87abf4 (diff)
parent	f9797e4ae82090808036b1e1ec4318d0f1bdc456 (diff)
download	libnsgif-099b873abca6b868f159e16894f3640d40dca5aa.tar.gz libnsgif-099b873abca6b868f159e16894f3640d40dca5aa.tar.bz2