summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/charset/detect.c4
-rw-r--r--src/tokeniser/tokeniser.c47
-rw-r--r--src/utils/string.c27
-rw-r--r--src/utils/string.h4
4 files changed, 5 insertions, 77 deletions
diff --git a/src/charset/detect.c b/src/charset/detect.c
index f3f2e4f..755d9fd 100644
--- a/src/charset/detect.c
+++ b/src/charset/detect.c
@@ -676,14 +676,14 @@ void hubbub_charset_fix_charset(uint16_t *charset)
} else if (*charset == parserutils_charset_mibenum_from_name(
"KS_C_5601-1987", SLEN("KS_C_5601-1987")) ||
*charset == parserutils_charset_mibenum_from_name(
- "EUC-KR", SLEN("EUR-KR"))) {
+ "EUC-KR", SLEN("EUC-KR"))) {
tmp = parserutils_charset_mibenum_from_name(
"Windows-949", SLEN("Windows-949"));
/* TIS-620 -> Windows-874 */
} else if (*charset == parserutils_charset_mibenum_from_name(
"TIS-620", SLEN("TIS-620"))) {
tmp = parserutils_charset_mibenum_from_name(
- "Windows-847", SLEN("Windows-847"));
+ "Windows-874", SLEN("Windows-874"));
/* x-x-big5 -> Big5 */
} else if (*charset == parserutils_charset_mibenum_from_name(
"x-x-big5", SLEN("x-x-big5"))) {
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 0bf72ef..885a6c8 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -607,13 +607,6 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
} \
} while (0)
-#define FINISH(str) \
- /* no-op */
-
-
-
-
-
/* this should always be called with an empty "chars" buffer */
hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
@@ -1076,18 +1069,13 @@ hubbub_error hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)
tokeniser->context.pending += len;
if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
- FINISH(ctag->name);
-
tokeniser->state = STATE_BEFORE_ATTRIBUTE_NAME;
} else if (c == '>') {
- FINISH(ctag->name);
-
tokeniser->state = STATE_DATA;
return emit_current_tag(tokeniser);
} else if (c == '\0') {
COLLECT(ctag->name, u_fffd, sizeof(u_fffd));
} else if (c == '/') {
- FINISH(ctag->name);
tokeniser->state = STATE_SELF_CLOSING_START_TAG;
} else if ('A' <= c && c <= 'Z') {
uint8_t lc = (c + 0x20);
@@ -1188,18 +1176,13 @@ hubbub_error hubbub_tokeniser_handle_attribute_name(hubbub_tokeniser *tokeniser)
tokeniser->context.pending += len;
if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
- FINISH(ctag->attributes[ctag->n_attributes - 1].name);
tokeniser->state = STATE_AFTER_ATTRIBUTE_NAME;
} else if (c == '=') {
- FINISH(ctag->attributes[ctag->n_attributes - 1].name);
tokeniser->state = STATE_BEFORE_ATTRIBUTE_VALUE;
} else if (c == '>') {
- FINISH(ctag->attributes[ctag->n_attributes - 1].name);
-
tokeniser->state = STATE_DATA;
return emit_current_tag(tokeniser);
} else if (c == '/') {
- FINISH(ctag->attributes[ctag->n_attributes - 1].name);
tokeniser->state = STATE_SELF_CLOSING_START_TAG;
} else if (c == '\0') {
COLLECT(ctag->attributes[ctag->n_attributes - 1].name,
@@ -1347,8 +1330,6 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_dq(hubbub_tokeniser *tokeni
if (cptr == PARSERUTILS_INPUTSTREAM_OOD) {
return HUBBUB_OOD;
} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF) {
- FINISH(ctag->attributes[ctag->n_attributes - 1].value);
-
tokeniser->state = STATE_DATA;
return emit_current_tag(tokeniser);
}
@@ -1357,7 +1338,6 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_dq(hubbub_tokeniser *tokeni
if (c == '"') {
tokeniser->context.pending += len;
- FINISH(ctag->attributes[ctag->n_attributes - 1].value);
tokeniser->state = STATE_AFTER_ATTRIBUTE_VALUE_Q;
} else if (c == '&') {
tokeniser->context.prev_state = tokeniser->state;
@@ -1404,8 +1384,6 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_sq(hubbub_tokeniser *tokeni
if (cptr == PARSERUTILS_INPUTSTREAM_OOD) {
return HUBBUB_OOD;
} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF) {
- FINISH(ctag->attributes[ctag->n_attributes - 1].value);
-
tokeniser->state = STATE_DATA;
return emit_current_tag(tokeniser);
}
@@ -1414,9 +1392,7 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_sq(hubbub_tokeniser *tokeni
if (c == '\'') {
tokeniser->context.pending += len;
- FINISH(ctag->attributes[ctag->n_attributes - 1].value);
- tokeniser->state =
- STATE_AFTER_ATTRIBUTE_VALUE_Q;
+ tokeniser->state = STATE_AFTER_ATTRIBUTE_VALUE_Q;
} else if (c == '&') {
tokeniser->context.prev_state = tokeniser->state;
tokeniser->state = STATE_CHARACTER_REFERENCE_IN_ATTRIBUTE_VALUE;
@@ -1463,8 +1439,6 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_uq(hubbub_tokeniser *tokeni
if (cptr == PARSERUTILS_INPUTSTREAM_OOD) {
return HUBBUB_OOD;
} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF) {
- FINISH(ctag->attributes[ctag->n_attributes - 1].value);
-
tokeniser->state = STATE_DATA;
return emit_current_tag(tokeniser);
}
@@ -1473,7 +1447,6 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_uq(hubbub_tokeniser *tokeni
if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
tokeniser->context.pending += len;
- FINISH(ctag->attributes[ctag->n_attributes - 1].value);
tokeniser->state = STATE_BEFORE_ATTRIBUTE_NAME;
} else if (c == '&') {
tokeniser->context.prev_state = tokeniser->state;
@@ -1481,8 +1454,6 @@ hubbub_error hubbub_tokeniser_handle_attribute_value_uq(hubbub_tokeniser *tokeni
/* Don't eat the '&'; it'll be handled by entity consumption */
} else if (c == '>') {
tokeniser->context.pending += len;
- FINISH(ctag->attributes[ctag->n_attributes - 1].value);
-
tokeniser->state = STATE_DATA;
return emit_current_tag(tokeniser);
} else if (c == '\0') {
@@ -1957,8 +1928,6 @@ hubbub_error hubbub_tokeniser_handle_doctype_name(hubbub_tokeniser *tokeniser)
if (cptr == PARSERUTILS_INPUTSTREAM_OOD) {
return HUBBUB_OOD;
} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF) {
- FINISH(cdoc->name);
-
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
}
@@ -1967,10 +1936,8 @@ hubbub_error hubbub_tokeniser_handle_doctype_name(hubbub_tokeniser *tokeniser)
tokeniser->context.pending += len;
if (c == '\t' || c == '\n' || c == '\f' || c == ' ' || c == '\r') {
- FINISH(cdoc->name);
tokeniser->state = STATE_AFTER_DOCTYPE_NAME;
} else if (c == '>') {
- FINISH(cdoc->name);
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, false);
} else if (c == '\0') {
@@ -2106,7 +2073,6 @@ hubbub_error hubbub_tokeniser_handle_doctype_public_dq(hubbub_tokeniser *tokenis
if (cptr == PARSERUTILS_INPUTSTREAM_OOD) {
return HUBBUB_OOD;
} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF) {
- FINISH(cdoc->public_id);
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
}
@@ -2115,10 +2081,8 @@ hubbub_error hubbub_tokeniser_handle_doctype_public_dq(hubbub_tokeniser *tokenis
tokeniser->context.pending += len;
if (c == '"') {
- FINISH(cdoc->public_id);
tokeniser->state = STATE_AFTER_DOCTYPE_PUBLIC;
} else if (c == '>') {
- FINISH(cdoc->public_id);
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else if (c == '\0') {
@@ -2156,7 +2120,6 @@ hubbub_error hubbub_tokeniser_handle_doctype_public_sq(hubbub_tokeniser *tokenis
if (cptr == PARSERUTILS_INPUTSTREAM_OOD) {
return HUBBUB_OOD;
} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF) {
- FINISH(cdoc->public_id);
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
}
@@ -2165,10 +2128,8 @@ hubbub_error hubbub_tokeniser_handle_doctype_public_sq(hubbub_tokeniser *tokenis
tokeniser->context.pending += len;
if (c == '\'') {
- FINISH(cdoc->public_id);
tokeniser->state = STATE_AFTER_DOCTYPE_PUBLIC;
} else if (c == '>') {
- FINISH(cdoc->public_id);
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else if (c == '\0') {
@@ -2331,7 +2292,6 @@ hubbub_error hubbub_tokeniser_handle_doctype_system_dq(hubbub_tokeniser *tokenis
if (cptr == PARSERUTILS_INPUTSTREAM_OOD) {
return HUBBUB_OOD;
} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF) {
- FINISH(cdoc->system_id);
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
}
@@ -2340,10 +2300,8 @@ hubbub_error hubbub_tokeniser_handle_doctype_system_dq(hubbub_tokeniser *tokenis
tokeniser->context.pending += len;
if (c == '"') {
- FINISH(cdoc->system_id);
tokeniser->state = STATE_AFTER_DOCTYPE_SYSTEM;
} else if (c == '>') {
- FINISH(cdoc->system_id);
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else if (c == '\0') {
@@ -2382,7 +2340,6 @@ hubbub_error hubbub_tokeniser_handle_doctype_system_sq(hubbub_tokeniser *tokenis
if (cptr == PARSERUTILS_INPUTSTREAM_OOD) {
return HUBBUB_OOD;
} else if (cptr == PARSERUTILS_INPUTSTREAM_EOF) {
- FINISH(cdoc->system_id);
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
}
@@ -2391,10 +2348,8 @@ hubbub_error hubbub_tokeniser_handle_doctype_system_sq(hubbub_tokeniser *tokenis
tokeniser->context.pending += len;
if (c == '\'') {
- FINISH(cdoc->system_id);
tokeniser->state = STATE_AFTER_DOCTYPE_SYSTEM;
} else if (c == '>') {
- FINISH(cdoc->system_id);
tokeniser->state = STATE_DATA;
return emit_current_doctype(tokeniser, true);
} else if (c == '\0') {
diff --git a/src/utils/string.c b/src/utils/string.c
index 5a2a0bc..0a5d44d 100644
--- a/src/utils/string.c
+++ b/src/utils/string.c
@@ -12,33 +12,6 @@
/**
- * Check if one string starts with another.
- *
- * \param a String to compare
- * \param a_len Length of first string
- * \param b String to compare
- * \param b_len Length of second string
- */
-bool hubbub_string_starts(const uint8_t *a, size_t a_len,
- const uint8_t *b, size_t b_len)
-{
- uint8_t z1, z2;
-
- if (a_len < b_len)
- return false;
-
- for (const uint8_t *s1 = a, *s2 = b; b_len > 0; s1++, s2++, b_len--)
- {
- z1 = *s1;
- z2 = *s2;
- if (z1 != z2) return false;
- if (!z1) return true;
- }
-
- return true;
-}
-
-/**
* Check that one string is exactly equal to another
*
* \param a String to compare
diff --git a/src/utils/string.h b/src/utils/string.h
index cb65493..2487428 100644
--- a/src/utils/string.h
+++ b/src/utils/string.h
@@ -8,11 +8,11 @@
#ifndef hubbub_string_h_
#define hubbub_string_h_
-bool hubbub_string_starts(const uint8_t *a, size_t a_len,
- const uint8_t *b, size_t b_len);
+/** Match two strings case-sensitively */
bool hubbub_string_match(const uint8_t *a, size_t a_len,
const uint8_t *b, size_t b_len);
+/** Match two strings case-insensitively */
bool hubbub_string_match_ci(const uint8_t *a, size_t a_len,
const uint8_t *b, size_t b_len);