/*
 * This file is part of libdom.
 * Licensed under the MIT License,
 *                http://www.opensource.org/licenses/mit-license.php
 * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
 * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
 */

#include <assert.h>
#include <ctype.h>
#include <inttypes.h>
#include <string.h>

#include <parserutils/charset/utf8.h>

#include "core/string.h"
#include "core/document.h"
#include "utils/utils.h"

/**
 * A DOM string
 *
 * Strings are reference counted so destruction is performed correctly.
 */
struct dom_string {
	uint8_t *ptr;			/**< Pointer to string data */

	size_t len;			/**< Byte length of string */

	lwc_string *intern;		/**< The lwc_string of this string */

	dom_alloc alloc;		/**< Memory (de)allocation function */
	void *pw;			/**< Client-specific data */

	uint32_t refcnt;		/**< Reference count */
};

static struct dom_string empty_string = { 
	.ptr = NULL,
	.len = 0,
	.intern = NULL,
	.alloc = NULL,
	.pw = NULL,
	.refcnt = 1
};


/**
 * Claim a reference on a DOM string
 *
 * \param str  The string to claim a reference on
 */
void dom_string_ref(struct dom_string *str)
{
	if (str != NULL)
		str->refcnt++;
}

/**
 * Release a reference on a DOM string
 *
 * \param str  The string to release the reference from
 *
 * If the reference count reaches zero, any memory claimed by the
 * string will be released
 */
void dom_string_unref(struct dom_string *str)
{
	if (str == NULL)
		return;
	
	if (--str->refcnt == 0) {
		if (str->intern != NULL) {
			lwc_string_unref(str->intern);
			str->alloc(str, 0, str->pw);
		} else if (str->alloc != NULL) {
			str->alloc(str->ptr, 0, str->pw);
			str->alloc(str, 0, str->pw);
		}
	}
}

/**
 * Create a DOM string from a string of characters
 *
 * \param alloc  Memory (de)allocation function
 * \param pw     Pointer to client-specific private data
 * \param ptr    Pointer to string of characters
 * \param len    Length, in bytes, of string of characters
 * \param str    Pointer to location to receive result
 * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
 *
 * The returned string will already be referenced, so there is no need
 * to explicitly reference it.
 *
 * The string of characters passed in will be copied for use by the 
 * returned DOM string.
 */
dom_exception dom_string_create(dom_alloc alloc, void *pw,
		const uint8_t *ptr, size_t len, struct dom_string **str)
{
	struct dom_string *ret;

	if (ptr == NULL || len == 0) {
		dom_string_ref(&empty_string);

		*str = &empty_string;

		return DOM_NO_ERR;
	}

	ret = alloc(NULL, sizeof(struct dom_string), pw);
	if (ret == NULL)
		return DOM_NO_MEM_ERR;

	ret->ptr = alloc(NULL, len, pw);
	if (ret->ptr == NULL) {
		alloc(ret, 0, pw);
		return DOM_NO_MEM_ERR;
	}

	memcpy(ret->ptr, ptr, len);

	ret->len = len;

	ret->alloc = alloc;
	ret->pw = pw;

	ret->intern = NULL;

	ret->refcnt = 1;

	*str = ret;

	return DOM_NO_ERR;
}

/**
 * Clone a dom_string if necessary. This method is used to create a new string
 * with a new allocator, but if the allocator is the same with the paramter 
 * str, just ref the string.
 *
 * \param alloc  The new allocator for this string
 * \param pw     The new pw for this string
 * \param str    The source dom_string
 * \param ret    The cloned dom_string
 * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
 *
 * @note: When both the alloc and pw are the same as the str's, we need no
 *	  real clone, just ref the source string is ok.
 */
dom_exception dom_string_clone(dom_alloc alloc, void *pw, 
		struct dom_string *str, struct dom_string **ret)
{
	if (alloc == str->alloc && pw == str->pw) {
		*ret = str;
		dom_string_ref(str);
		return DOM_NO_ERR;
	}

	if (str->intern != NULL) {
		return _dom_string_create_from_lwcstring(alloc, pw,
				str->intern, ret);
	} else {
		return dom_string_create(alloc, pw, str->ptr, str->len, ret);
	}
}

/**
 * Create a dom_string from a lwc_string
 * 
 * \param str  The lwc_string
 * \param ret  The new dom_string
 * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
 */
dom_exception _dom_string_create_from_lwcstring(dom_alloc alloc, void *pw,
		lwc_string *str, struct dom_string **ret)
{
	dom_string *r;

	if (str == NULL) {
		*ret = NULL;
		return DOM_NO_ERR;
	}

	r = alloc(NULL, sizeof(struct dom_string), pw);
	if (r == NULL)
		return DOM_NO_MEM_ERR;

	if (str == NULL) {
		*ret = &empty_string;
		dom_string_ref(*ret);
		return DOM_NO_ERR;
	}

	r->intern = str;
	r->ptr = (uint8_t *)lwc_string_data(str);
	r->len = lwc_string_length(str);

	r->alloc = alloc;
	r->pw = pw;

	r->refcnt = 1;

	/* Ref the lwc_string */
	lwc_string_ref(str);

	*ret = r;
	return DOM_NO_ERR;

}

/**
 * Make the dom_string be interned
 *
 * \param str     The dom_string to be interned
 * \param lwcstr  The result lwc_string	
 * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
 */
dom_exception _dom_string_intern(struct dom_string *str, 
		struct lwc_string_s **lwcstr)
{
	lwc_string *ret;
	lwc_error lerr;

	/* If this string is already interned, do nothing */
	if (str->intern != NULL) {
		*lwcstr = lwc_string_ref(str->intern);
		return DOM_NO_ERR;
	}

	lerr = lwc_intern_string((const char *)str->ptr, str->len, &ret);
	if (lerr != lwc_error_ok) {
		return _dom_exception_from_lwc_error(lerr);
	}

	str->intern = ret;

	if (str->ptr != NULL) {
		str->alloc(str->ptr, 0, str->pw);
	}

	str->ptr = (uint8_t *) lwc_string_data(ret);

	*lwcstr = lwc_string_ref(ret);
	return DOM_NO_ERR;
}

/**
 * Get the internal lwc_string 
 *
 * \param str     The dom_string object
 * \param lwcstr  The lwc_string of this dom-string
 * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
 */
dom_exception dom_string_get_intern(struct dom_string *str, 
		struct lwc_string_s **lwcstr)
{
	*lwcstr = str->intern;

	if (*lwcstr != NULL)
		lwc_string_ref(*lwcstr);

	return DOM_NO_ERR;
}

/**
 * Case sensitively compare two DOM strings
 *
 * \param s1  The first string to compare
 * \param s2  The second string to compare
 * \return 0 if strings match, non-0 otherwise
 *
 * NULL and "" will match.
 */
int dom_string_cmp(struct dom_string *s1, struct dom_string *s2)
{
	bool ret;

	if (s1 == NULL)
		s1 = &empty_string;

	if (s2 == NULL)
		s2 = &empty_string;

	if (s1->intern != NULL && s2->intern != NULL) {
		lwc_string_isequal(s1->intern, s2->intern, &ret);
		if (ret == true) {
			return 0;
		} else {
			return -1;
		}
	}

	if (s1->len != s2->len)
		return 1;

	return memcmp(s1->ptr, s2->ptr, s1->len);
}

/**
 * Case insensitively compare two DOM strings
 *
 * \param s1  The first string to compare
 * \param s2  The second string to compare
 * \return 0 if strings match, non-0 otherwise
 *
 * NULL and "" will match.
 */
int dom_string_icmp(struct dom_string *s1, struct dom_string *s2)
{
	const uint8_t *d1 = NULL;
	const uint8_t *d2 = NULL;
	size_t l1, l2;

	if (s1 == NULL)
		s1 = &empty_string;
	if (s2 == NULL)
		s2 = &empty_string;

	bool ret;
	if (s1->intern != NULL && s2->intern != NULL) {
		lwc_string_caseless_isequal(s1->intern, s2->intern, &ret);
		if (ret == true) {
			return 0;
		} else {
			return -1;
		}
	}

	d1 = s1->ptr;
	d2 = s2->ptr;
	l1 = s1->len;
	l2 = s2->len;

	while (l1 > 0 && l2 > 0) {
		uint32_t c1, c2;
		size_t cl1, cl2;
		parserutils_error err;

		err = parserutils_charset_utf8_to_ucs4(d1, l1, &c1, &cl1); 
		if (err != PARSERUTILS_OK) {
		}

		err = parserutils_charset_utf8_to_ucs4(d2, l2, &c2, &cl2);
		if (err != PARSERUTILS_OK) {
		}

		/** \todo improved lower-casing algorithm */
		if (tolower(c1) != tolower(c2)) {
			return (int)(tolower(c1) - tolower(c2));
		}

		d1 += cl1;
		d2 += cl2;

		l1 -= cl1;
		l2 -= cl2;
	}

	return (int)(l1 - l2);
}

/**
 * Get the index of the first occurrence of a character in a dom string 
 * 
 * \param str  The string to search in
 * \param chr  UCS4 value to look for
 * \return Character index of found character, or -1 if none found 
 */
uint32_t dom_string_index(struct dom_string *str, uint32_t chr)
{
	const uint8_t *s;
	size_t clen, slen;
	uint32_t c, index;
	parserutils_error err;

	if (str == NULL)
		str = &empty_string;

	s = str->ptr;
	slen = str->len;

	index = 0;

	while (slen > 0) {
		err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
		if (err != PARSERUTILS_OK) {
			return (uint32_t) -1;
		}

		if (c == chr) {
			return index;
		}

		s += clen;
		slen -= clen;
		index++;
	}

	return (uint32_t) -1;
}

/**
 * Get the index of the last occurrence of a character in a dom string 
 * 
 * \param str  The string to search in
 * \param chr  UCS4 value to look for
 * \return Character index of found character, or -1 if none found
 */
uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr)
{
	const uint8_t *s;
	size_t clen, slen;
	uint32_t c, coff, index;
	parserutils_error err;

	if (str == NULL)
		str = &empty_string;

	s = str->ptr;
	slen = str->len;

	index = dom_string_length(str);

	while (slen > 0) {
		err = parserutils_charset_utf8_prev(s, slen, 
				(uint32_t *) &coff);
		if (err == PARSERUTILS_OK) {
			err = parserutils_charset_utf8_to_ucs4(s + coff, 
					slen - clen, &c, &clen);
		}

		if (err != PARSERUTILS_OK) {
			return (uint32_t) -1;
		}

		if (c == chr) {
			return index;
		}

		slen -= clen;
		index--;
	}

	return (uint32_t) -1;
}

/**
 * Get the length, in characters, of a dom string
 *
 * \param str  The string to measure the length of
 * \return The length of the string, in characters
 */
uint32_t dom_string_length(struct dom_string *str)
{
	size_t clen;
	parserutils_error err;

	if (str == NULL)
		str = &empty_string;

	err = parserutils_charset_utf8_length(str->ptr, str->len, &clen);
	if (err != PARSERUTILS_OK) {
		return 0;
	}

	return clen;
}

/**
 * Get the UCS4 character at position index
 *
 * \param index  The position of the charater
 * \param ch     The UCS4 character
 * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
 */
dom_exception dom_string_at(struct dom_string *str, uint32_t index, 
		uint32_t *ch)
{
	const uint8_t *s;
	size_t clen, slen;
	uint32_t c, i;
	parserutils_error err;

	if (str == NULL)
		str = &empty_string;

	s = str->ptr;
	slen = str->len;

	i = 0;

	while (slen > 0) {
		err = parserutils_charset_utf8_char_byte_length(s, &clen);
		if (err != PARSERUTILS_OK) {
			return (uint32_t) -1;
		}

		i++;
		if (i == index + 1)
			break;

		s += clen;
		slen -= clen;
	}

	if (i == index + 1) {
		err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
		if (err != PARSERUTILS_OK) {
			return (uint32_t) -1;
		}

		*ch = c;
		return DOM_NO_ERR;
	} else {
		return DOM_DOMSTRING_SIZE_ERR;
	}
}

/** 
 * Concatenate two dom strings 
 * 
 * \param s1      The first string
 * \param s2      The second string
 * \param result  Pointer to location to receive result
 * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
 *
 * The returned string will be allocated using the allocation details
 * stored in ::s1.
 * 
 * The returned string will have its reference count increased. The client
 * should dereference it once it has finished with it.
 */
dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2,
		struct dom_string **result)
{
	struct dom_string *concat;
	dom_alloc alloc;
	void *pw;

	assert(s1 != NULL);
	assert(s2 != NULL);

	if (s1->alloc != NULL) {
		alloc = s1->alloc;
		pw = s1->pw;
	} else if (s2->alloc != NULL) {
		alloc = s2->alloc;
		pw = s2->pw;
	} else {
		/* s1 == s2 == empty_string */
		*result = &empty_string;
		return DOM_NO_ERR;
	}

	concat = alloc(NULL, sizeof(struct dom_string), pw);

	if (concat == NULL) {
		return DOM_NO_MEM_ERR;
	}

	concat->ptr = alloc(NULL, s1->len + s2->len, pw);
	if (concat->ptr == NULL) {
		alloc(concat, 0, pw);

		return DOM_NO_MEM_ERR;
	}

	memcpy(concat->ptr, s1->ptr, s1->len);

	memcpy(concat->ptr + s1->len, s2->ptr, s2->len);

	concat->len = s1->len + s2->len;

	concat->alloc = alloc;
	concat->pw = pw;
	concat->intern = NULL;

	concat->refcnt = 1;

	*result = concat;

	return DOM_NO_ERR;
}

/**
 * Extract a substring from a dom string 
 *
 * \param str     The string to extract from
 * \param i1      The character index of the start of the substring
 * \param i2      The character index of the end of the substring
 * \param result  Pointer to location to receive result
 * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
 *
 * The returned string will be allocated using the allocation details
 * stored in ::str.
 *
 * The returned string will have its reference count increased. The client
 * should dereference it once it has finished with it.
 */
dom_exception dom_string_substr(struct dom_string *str, 
		uint32_t i1, uint32_t i2, struct dom_string **result)
{
	const uint8_t *s = str->ptr;
	size_t slen = str->len;
	uint32_t b1, b2;
	parserutils_error err;

	/* Initialise the byte index of the start to 0 */
	b1 = 0;
	/* Make the end a character offset from the start */
	i2 -= i1;

	/* Calculate the byte index of the start */
	while (i1 > 0) {
		err = parserutils_charset_utf8_next(s, slen, b1, &b1);
		if (err != PARSERUTILS_OK) {
			return DOM_NO_MEM_ERR;
		}

		i1--;
	}

	/* Initialise the byte index of the end to that of the start */
	b2 = b1;

	/* Calculate the byte index of the end */
	while (i2 > 0) {
		err = parserutils_charset_utf8_next(s, slen, b2, &b2);
		if (err != PARSERUTILS_OK) {
			return DOM_NO_MEM_ERR;
		}

		i2--;
	}

	/* Create a string from the specified byte range */
	return dom_string_create(str->alloc, str->pw, s + b1, b2 - b1, result);
}

/**
 * Insert data into a dom string at the given location
 *
 * \param target  Pointer to string to insert into
 * \param source  Pointer to string to insert
 * \param offset  Character offset of location to insert at
 * \param result  Pointer to location to receive result
 * \return DOM_NO_ERR          on success, 
 *         DOM_NO_MEM_ERR      on memory exhaustion,
 *         DOM_INDEX_SIZE_ERR  if ::offset > len(::target).
 *
 * The returned string will be allocated using the allocation details
 * stored in ::target.
 *
 * The returned string will have its reference count increased. The client
 * should dereference it once it has finished with it. 
 */
dom_exception dom_string_insert(struct dom_string *target,
		struct dom_string *source, uint32_t offset,
		struct dom_string **result)
{
	struct dom_string *res;
	const uint8_t *t, *s;
	uint32_t tlen, slen, clen;
	uint32_t ins = 0;
	parserutils_error err;

	t = target->ptr;
	tlen = target->len;
	s = source->ptr;
	slen = source->len;

	clen = dom_string_length(target);

	if (offset > clen)
		return DOM_INDEX_SIZE_ERR;

	/* Calculate the byte index of the insertion point */
	if (offset == clen) {
		/* Optimisation for append */
		offset = 0;
		ins = tlen;
	} else {
		while (offset > 0) {
			err = parserutils_charset_utf8_next(t, tlen, 
					ins, &ins);

			if (err != PARSERUTILS_OK) {
				return DOM_NO_MEM_ERR;
			}

			offset--;
		}
	}

	/* Allocate result string */
	res = target->alloc(NULL, sizeof(struct dom_string), target->pw);
	if (res == NULL) {
		return DOM_NO_MEM_ERR;
	}

	/* Allocate data buffer for result contents */
	res->ptr = target->alloc(NULL, tlen + slen, target->pw);
	if (res->ptr == NULL) {
		target->alloc(res, 0, target->pw);
		return DOM_NO_MEM_ERR;
	}

	/* Copy initial portion of target, if any, into result */
	if (ins > 0) {
		memcpy(res->ptr, t, ins);
	}

	/* Copy inserted data into result */
	memcpy(res->ptr + ins, s, slen);

	/* Copy remainder of target, if any, into result */
	if (tlen - ins > 0) {
		memcpy(res->ptr + ins + slen, t + ins, tlen - ins);
	}

	res->len = tlen + slen;

	res->alloc = target->alloc;
	res->pw = target->pw;
	res->intern = NULL;
	
	res->refcnt = 1;

	*result = res;

	return DOM_NO_ERR;
}

/** 
 * Replace a section of a dom string
 *
 * \param target  Pointer to string of which to replace a section
 * \param source  Pointer to replacement string
 * \param i1      Character index of start of region to replace
 * \param i2      Character index of end of region to replace
 * \param result  Pointer to location to receive result
 * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion.
 *
 * The returned string will be allocated using the allocation details
 * stored in ::target.
 *
 * The returned string will have its reference count increased. The client
 * should dereference it once it has finished with it. 
 */
dom_exception dom_string_replace(struct dom_string *target,
		struct dom_string *source, uint32_t i1, uint32_t i2,
		struct dom_string **result)
{
	struct dom_string *res;
	const uint8_t *t, *s;
	uint32_t tlen, slen;
	uint32_t b1, b2;
	parserutils_error err;

	if (source == NULL)
		source = &empty_string;

	t = target->ptr;
	tlen = target->len;
	s = source->ptr;
	slen = source->len;

	/* Initialise the byte index of the start to 0 */
	b1 = 0;
	/* Make the end a character offset from the start */
	i2 -= i1;

	/* Calculate the byte index of the start */
	while (i1 > 0) {
		err = parserutils_charset_utf8_next(s, slen, b1, &b1);

		if (err != PARSERUTILS_OK) {
			return DOM_NO_MEM_ERR;
		}

		i1--;
	}

	/* Initialise the byte index of the end to that of the start */
	b2 = b1;

	/* Calculate the byte index of the end */
	while (i2 > 0) {
		err = parserutils_charset_utf8_next(s, slen, b2, &b2);

		if (err != PARSERUTILS_OK) {
			return DOM_NO_MEM_ERR;
		}

		i2--;
	}

	/* Allocate result string */
	res = target->alloc(NULL, sizeof(struct dom_string), target->pw);

	if (res == NULL) {
		return DOM_NO_MEM_ERR;
	}

	/* Allocate data buffer for result contents */
	res->ptr = target->alloc(NULL, tlen + slen - (b2 - b1), target->pw);
	if (res->ptr == NULL) {
		target->alloc(res, 0, target->pw);
		return DOM_NO_MEM_ERR;
	}

	/* Copy initial portion of target, if any, into result */
	if (b1 > 0) {
		memcpy(res->ptr, t, b1);
	}

	/* Copy replacement data into result */
	if (slen > 0) {
		memcpy(res->ptr + b1, s, slen);
	}

	/* Copy remainder of target, if any, into result */
	if (tlen - b2 > 0) {
		memcpy(res->ptr + b1 + slen, t + b2, tlen - b2);
	}

	res->len = tlen + slen - (b2 - b1);

	res->alloc = target->alloc;
	res->pw = target->pw;
	res->intern = NULL;

	res->refcnt = 1;

	*result = res;

	return DOM_NO_ERR;
}

/**
 * Duplicate a dom string 
 *
 * \param str     The string to duplicate
 * \param result  Pointer to location to receive result
 * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
 *
 * The returned string will be allocated using the allocation details
 * stored in ::str.
 *
 * The returned string will have its reference count increased. The client
 * should dereference it once it has finished with it.
 */
dom_exception dom_string_dup(struct dom_string *str, 
		struct dom_string **result)
{
	if (str->intern != NULL) {
		return _dom_string_create_from_lwcstring(str->alloc, str->pw,
				str->intern, result);
	} else {
		return dom_string_create(str->alloc, str->pw, str->ptr,
				str->len, result);
	}
}

/**
 * Calculate a hash value from a dom string 
 *
 * \param str  The string to calculate a hash of
 * \return The hash value associated with the string
 */
uint32_t dom_string_hash(struct dom_string *str)
{
	const uint8_t *s = str->ptr;
	size_t slen = str->len;
	uint32_t hash = 0x01000193;

	while (slen > 0) {
		hash *= 0x01000193;
		hash ^= *s;

		s++;
		slen--;
	}

	return hash;
}

/**
 * Convert a lwc_error to a dom_exception
 * 
 * \param err  The input lwc_error
 * \return the dom_exception
 */
dom_exception _dom_exception_from_lwc_error(lwc_error err)
{
	switch (err) {
		case lwc_error_ok:
			return DOM_NO_ERR;
		case lwc_error_oom:
			return DOM_NO_MEM_ERR;
		case lwc_error_range:
			return DOM_INDEX_SIZE_ERR;
		case lwc_error_initialised:
			return DOM_NO_ERR;
	}
	assert ("Unknow lwc_error, can't convert to dom_exception");
	/* Suppress compile errors */
	return DOM_NO_ERR;
}

/**
 * Get the raw character data of the dom_string.
 *
 * \param str	The dom_string object
 * \return		The C string pointer
 *
 * @note: This function is just provided for the convenience of accessing the 
 * raw C string character, no change on the result string is allowed.
 */
char *_dom_string_data(struct dom_string *str)
{
	return (char *) str->ptr;
}

/* Get the string length of this dom_string 
 *
 * \param str	The dom_string object
 */
size_t _dom_string_length(struct dom_string *str)
{
	return str->len;
}