From a8596a80aeb70acb05aba29f654df24210f50c19 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Sun, 10 May 2020 23:22:29 +0100 Subject: move free text search general interface to content. needs additional cleanup to call content through handler table to perform searches. --- content/Makefile | 13 +- content/handlers/html/Makefile | 1 - content/handlers/html/html.c | 4 +- content/handlers/html/interaction.c | 25 +- content/handlers/html/private.h | 2 +- content/handlers/html/redraw.c | 25 +- content/handlers/html/search.c | 662 --------------------------------- content/handlers/html/search.h | 81 ---- content/handlers/text/textplain.c | 56 +-- content/textsearch.c | 718 ++++++++++++++++++++++++++++++++++++ content/textsearch.h | 80 ++++ 11 files changed, 868 insertions(+), 799 deletions(-) delete mode 100644 content/handlers/html/search.c delete mode 100644 content/handlers/html/search.h create mode 100644 content/textsearch.c create mode 100644 content/textsearch.h (limited to 'content') diff --git a/content/Makefile b/content/Makefile index b4353ba95..abc5a2463 100644 --- a/content/Makefile +++ b/content/Makefile @@ -1,7 +1,16 @@ # Content sources -S_CONTENT := content.c content_factory.c dirlist.c fetch.c hlcache.c \ - llcache.c mimesniff.c urldb.c no_backing_store.c +S_CONTENT := \ + content.c \ + content_factory.c \ + dirlist.c \ + fetch.c \ + hlcache.c \ + llcache.c \ + mimesniff.c \ + textsearch.c \ + urldb.c \ + no_backing_store.c # Make filesystem backing store available ifeq ($(NETSURF_FS_BACKING_STORE),YES) diff --git a/content/handlers/html/Makefile b/content/handlers/html/Makefile index 677786c80..233fb1327 100644 --- a/content/handlers/html/Makefile +++ b/content/handlers/html/Makefile @@ -20,5 +20,4 @@ S_HTML := box_construct.c \ redraw.c \ redraw_border.c \ script.c \ - search.c \ table.c diff --git a/content/handlers/html/html.c b/content/handlers/html/html.c index af06431bf..a3d426461 100644 --- a/content/handlers/html/html.c +++ b/content/handlers/html/html.c @@ -49,6 +49,7 @@ #include "netsurf/misc.h" #include "content/hlcache.h" #include "content/content_factory.h" +#include "content/textsearch.h" #include "desktop/selection.h" #include "desktop/scrollbar.h" #include "desktop/textarea.h" @@ -69,7 +70,6 @@ #include "html/form_internal.h" #include "html/imagemap.h" #include "html/layout.h" -#include "html/search.h" #define CHUNK 4096 @@ -1327,7 +1327,7 @@ static nserror html_close(struct content *c) selection_clear(&htmlc->sel, false); if (htmlc->search != NULL) { - search_destroy_context(htmlc->search); + content_textsearch_destroy(htmlc->search); } /* clear the html content reference to the browser window */ diff --git a/content/handlers/html/interaction.c b/content/handlers/html/interaction.c index 3f401bd11..8ae5144c0 100644 --- a/content/handlers/html/interaction.c +++ b/content/handlers/html/interaction.c @@ -41,6 +41,7 @@ #include "netsurf/layout.h" #include "netsurf/keypress.h" #include "content/hlcache.h" +#include "content/textsearch.h" #include "desktop/frames.h" #include "desktop/scrollbar.h" #include "desktop/selection.h" @@ -55,7 +56,6 @@ #include "html/form_internal.h" #include "html/private.h" #include "html/imagemap.h" -#include "html/search.h" #include "html/interaction.h" /** @@ -1602,17 +1602,18 @@ bool html_keypress(struct content *c, uint32_t key) * Handle search. * * \param c content of type HTML - * \param context front end private data + * \param fe_ctx front end private data * \param flags search flags * \param string search string */ void html_search(struct content *c, - void *context, + void *fe_ctx, search_flags_t flags, const char *string) { html_content *html = (html_content *)c; + nserror res; assert(c != NULL); @@ -1621,7 +1622,7 @@ html_search(struct content *c, (strcmp(string, html->search_string) == 0) && (html->search != NULL)) { /* Continue prev. search */ - search_step(html->search, flags, string); + content_textsearch_step(html->search, flags, string); } else if (string != NULL) { /* New search */ @@ -1631,16 +1632,16 @@ html_search(struct content *c, return; if (html->search != NULL) { - search_destroy_context(html->search); + content_textsearch_destroy(html->search); html->search = NULL; } - html->search = search_create_context(c, CONTENT_HTML, context); - - if (html->search == NULL) + res = content_textsearch_create(c, fe_ctx, &html->search); + if (res != NSERROR_OK) { return; + } - search_step(html->search, flags, string); + content_textsearch_step(html->search, flags, string); } else { /* Clear search */ @@ -1653,9 +1654,9 @@ html_search(struct content *c, /** - * Terminate a search. + * Terminate a text search. * - * \param c content of type HTML + * \param c content of type HTML */ void html_search_clear(struct content *c) { @@ -1667,7 +1668,7 @@ void html_search_clear(struct content *c) html->search_string = NULL; if (html->search != NULL) { - search_destroy_context(html->search); + content_textsearch_destroy(html->search); } html->search = NULL; } diff --git a/content/handlers/html/private.h b/content/handlers/html/private.h index dff0b7875..1367c624c 100644 --- a/content/handlers/html/private.h +++ b/content/handlers/html/private.h @@ -211,7 +211,7 @@ typedef struct html_content { struct form_control *visible_select_menu; /** Context for free text search, or NULL if none */ - struct search_context *search; + struct textsearch_context *search; /** Search string or NULL */ char *search_string; diff --git a/content/handlers/html/redraw.c b/content/handlers/html/redraw.c index 936c1b333..f9fb6b4fd 100644 --- a/content/handlers/html/redraw.c +++ b/content/handlers/html/redraw.c @@ -47,6 +47,7 @@ #include "netsurf/layout.h" #include "content/content.h" #include "content/content_protected.h" +#include "content/textsearch.h" #include "css/utils.h" #include "desktop/selection.h" #include "desktop/print.h" @@ -61,7 +62,6 @@ #include "html/form_internal.h" #include "html/private.h" #include "html/layout.h" -#include "html/search.h" bool html_redraw_debug = false; @@ -167,7 +167,7 @@ text_redraw(const char *utf8_text, bool excluded, struct content *c, const struct selection *sel, - struct search_context *search, + struct textsearch_context *search, const struct redraw_context *ctx) { bool highlighted = false; @@ -184,18 +184,23 @@ text_redraw(const char *utf8_text, unsigned end_idx; /* first try the browser window's current selection */ - if (selection_defined(sel) && selection_highlighted(sel, - offset, offset + len, - &start_idx, &end_idx)) { + if (selection_defined(sel) && + selection_highlighted(sel, + offset, + offset + len, + &start_idx, + &end_idx)) { highlighted = true; } /* what about the current search operation, if any? */ - if (!highlighted && (search != NULL) && - search_term_highlighted(c, - offset, offset + len, - &start_idx, &end_idx, - search)) { + if (!highlighted && + (search != NULL) && + content_textsearch_ishighlighted(search, + offset, + offset + len, + &start_idx, + &end_idx)) { highlighted = true; } diff --git a/content/handlers/html/search.c b/content/handlers/html/search.c deleted file mode 100644 index 8ccb6920c..000000000 --- a/content/handlers/html/search.c +++ /dev/null @@ -1,662 +0,0 @@ -/* - * Copyright 2004 John M Bell - * Copyright 2005 Adrian Lees - * Copyright 2009 Mark Benjamin - * - * This file is part of NetSurf, http://www.netsurf-browser.org/ - * - * NetSurf is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * NetSurf is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -/** - * \file - * Free text search (core) - */ - -#include -#include -#include - -#include "utils/config.h" -#include "utils/log.h" -#include "utils/messages.h" -#include "utils/utils.h" -#include "content/content.h" -#include "content/hlcache.h" -#include "desktop/selection.h" -#include "netsurf/search.h" -#include "netsurf/misc.h" -#include "desktop/gui_internal.h" - -#include "text/textplain.h" -#include "html/box.h" -#include "html/box_inspect.h" -#include "html/html.h" -#include "html/private.h" -#include "html/search.h" - -#ifndef NOF_ELEMENTS -#define NOF_ELEMENTS(array) (sizeof(array)/sizeof(*(array))) -#endif - - -struct list_entry { - unsigned start_idx; /* start position of match */ - unsigned end_idx; /* end of match */ - - struct box *start_box; /* used only for html contents */ - struct box *end_box; - - struct selection *sel; - - struct list_entry *prev; - struct list_entry *next; -}; - -struct search_context { - void *gui_p; - struct content *c; - struct list_entry *found; - struct list_entry *current; /* first for select all */ - char *string; - bool prev_case_sens; - bool newsearch; - bool is_html; -}; - - -/* Exported function documented in search.h */ -struct search_context * -search_create_context(struct content *c, content_type type, void *gui_data) -{ - struct search_context *context; - struct list_entry *search_head; - - if (type != CONTENT_HTML && type != CONTENT_TEXTPLAIN) { - return NULL; - } - - context = malloc(sizeof(struct search_context)); - if (context == NULL) { - return NULL; - } - - search_head = malloc(sizeof(struct list_entry)); - if (search_head == NULL) { - free(context); - return NULL; - } - - search_head->start_idx = 0; - search_head->end_idx = 0; - search_head->start_box = NULL; - search_head->end_box = NULL; - search_head->sel = NULL; - search_head->prev = NULL; - search_head->next = NULL; - - context->found = search_head; - context->current = NULL; - context->string = NULL; - context->prev_case_sens = false; - context->newsearch = true; - context->c = c; - context->is_html = (type == CONTENT_HTML) ? true : false; - context->gui_p = gui_data; - - return context; -} - - -/** - * Release the memory used by the list of matches, - * deleting selection objects too - */ - -static void free_matches(struct search_context *context) -{ - struct list_entry *a; - struct list_entry *b; - - a = context->found->next; - - /* empty the list before clearing and deleting the - * selections because the the clearing updates the - * screen immediately, causing nested accesses to the list */ - - context->found->prev = NULL; - context->found->next = NULL; - - for (; a; a = b) { - b = a->next; - if (a->sel) { - selection_clear(a->sel, true); - selection_destroy(a->sel); - } - free(a); - } -} - - -/** - * Find the first occurrence of 'match' in 'string' and return its index - * - * \param string the string to be searched (unterminated) - * \param s_len length of the string to be searched - * \param pattern the pattern for which we are searching (unterminated) - * \param p_len length of pattern - * \param case_sens true iff case sensitive match required - * \param m_len accepts length of match in bytes - * \return pointer to first match, NULL if none - */ - -static const char *find_pattern(const char *string, int s_len, - const char *pattern, int p_len, bool case_sens, - unsigned int *m_len) -{ - struct { const char *ss, *s, *p; bool first; } context[16]; - const char *ep = pattern + p_len; - const char *es = string + s_len; - const char *p = pattern - 1; /* a virtual '*' before the pattern */ - const char *ss = string; - const char *s = string; - bool first = true; - int top = 0; - - while (p < ep) { - bool matches; - if (p < pattern || *p == '*') { - char ch; - - /* skip any further asterisks; one is the same as many - */ - do p++; while (p < ep && *p == '*'); - - /* if we're at the end of the pattern, yes, it matches - */ - if (p >= ep) break; - - /* anything matches a # so continue matching from - here, and stack a context that will try to match - the wildcard against the next character */ - - ch = *p; - if (ch != '#') { - /* scan forwards until we find a match for - this char */ - if (!case_sens) ch = toupper(ch); - while (s < es) { - if (case_sens) { - if (*s == ch) break; - } else if (toupper(*s) == ch) - break; - s++; - } - } - - if (s < es) { - /* remember where we are in case the match - fails; we may then resume */ - if (top < (int)NOF_ELEMENTS(context)) { - context[top].ss = ss; - context[top].s = s + 1; - context[top].p = p - 1; - /* ptr to last asterisk */ - context[top].first = first; - top++; - } - - if (first) { - ss = s; - /* remember first non-'*' char */ - first = false; - } - - matches = true; - } else { - matches = false; - } - - } else if (s < es) { - char ch = *p; - if (ch == '#') - matches = true; - else { - if (case_sens) - matches = (*s == ch); - else - matches = (toupper(*s) == toupper(ch)); - } - if (matches && first) { - ss = s; /* remember first non-'*' char */ - first = false; - } - } else { - matches = false; - } - - if (matches) { - p++; s++; - } else { - /* doesn't match, - * resume with stacked context if we have one */ - if (--top < 0) - return NULL; /* no match, give up */ - - ss = context[top].ss; - s = context[top].s; - p = context[top].p; - first = context[top].first; - } - } - - /* end of pattern reached */ - *m_len = max(s - ss, 1); - return ss; -} - - -/** - * Add a new entry to the list of matches - * - * \param start_idx Offset of match start within textual representation - * \param end_idx Offset of match end - * \param context The search context to add the entry to. - * \return Pointer to added entry, NULL iff failed. - */ -static struct list_entry * -add_entry(unsigned start_idx, unsigned end_idx, struct search_context *context) -{ - struct list_entry *entry; - - /* found string in box => add to list */ - entry = calloc(1, sizeof(*entry)); - if (!entry) { - return NULL; - } - - entry->start_idx = start_idx; - entry->end_idx = end_idx; - entry->sel = NULL; - - entry->next = 0; - entry->prev = context->found->prev; - - if (context->found->prev == NULL) { - context->found->next = entry; - } else { - context->found->prev->next = entry; - } - - context->found->prev = entry; - - return entry; -} - - -/** - * Finds all occurrences of a given string in the html box tree - * - * \param pattern the string pattern to search for - * \param p_len pattern length - * \param cur pointer to the current box - * \param case_sens whether to perform a case sensitive search - * \param context The search context to add the entry to. - * \return true on success, false on memory allocation failure - */ -static bool find_occurrences_html(const char *pattern, int p_len, - struct box *cur, bool case_sens, - struct search_context *context) -{ - struct box *a; - - /* ignore this box, if there's no visible text */ - if (!cur->object && cur->text) { - const char *text = cur->text; - unsigned length = cur->length; - - while (length > 0) { - struct list_entry *entry; - unsigned match_length; - unsigned match_offset; - const char *new_text; - const char *pos = find_pattern(text, length, - pattern, p_len, case_sens, - &match_length); - if (!pos) - break; - - /* found string in box => add to list */ - match_offset = pos - cur->text; - - entry = add_entry(cur->byte_offset + match_offset, - cur->byte_offset + - match_offset + - match_length, context); - if (!entry) - return false; - - entry->start_box = cur; - entry->end_box = cur; - - new_text = pos + match_length; - length -= (new_text - text); - text = new_text; - } - } - - /* and recurse */ - for (a = cur->children; a; a = a->next) { - if (!find_occurrences_html(pattern, p_len, a, case_sens, - context)) - return false; - } - - return true; -} - - -/** - * Finds all occurrences of a given string in a textplain content - * - * \param pattern the string pattern to search for - * \param p_len pattern length - * \param c the content to be searched - * \param case_sens whether to perform a case sensitive search - * \param context The search context to add the entry to. - * \return true on success, false on memory allocation failure - */ - -static bool find_occurrences_text(const char *pattern, int p_len, - struct content *c, bool case_sens, - struct search_context *context) -{ - int nlines = textplain_line_count(c); - int line; - - for(line = 0; line < nlines; line++) { - size_t offset, length; - const char *text = textplain_get_line(c, line, - &offset, &length); - if (text) { - while (length > 0) { - struct list_entry *entry; - unsigned match_length; - size_t start_idx; - const char *new_text; - const char *pos = find_pattern(text, length, - pattern, p_len, case_sens, - &match_length); - if (!pos) - break; - - /* found string in line => add to list */ - start_idx = offset + (pos - text); - entry = add_entry(start_idx, start_idx + - match_length, context); - if (!entry) - return false; - - new_text = pos + match_length; - offset += (new_text - text); - length -= (new_text - text); - text = new_text; - } - } - } - - return true; -} - - -/** - * Specifies whether all matches or just the current match should - * be highlighted in the search text. - */ -static void search_show_all(bool all, struct search_context *context) -{ - struct list_entry *a; - - for (a = context->found->next; a; a = a->next) { - bool add = true; - if (!all && a != context->current) { - add = false; - if (a->sel) { - selection_clear(a->sel, true); - selection_destroy(a->sel); - a->sel = NULL; - } - } - if (add && !a->sel) { - - if (context->is_html == true) { - html_content *html = (html_content *)context->c; - a->sel = selection_create(context->c, true); - if (!a->sel) - continue; - - selection_init(a->sel, html->layout, - &html->len_ctx); - } else { - a->sel = selection_create(context->c, false); - if (!a->sel) - continue; - - selection_init(a->sel, NULL, NULL); - } - - selection_set_start(a->sel, a->start_idx); - selection_set_end(a->sel, a->end_idx); - } - } -} - - -/** - * Search for a string in the box tree - * - * \param string the string to search for - * \param string_len length of search string - * \param context The search context to add the entry to. - * \param flags flags to control the search. - */ -static void -search_text(const char *string, - int string_len, - struct search_context *context, - search_flags_t flags) -{ - struct rect bounds; - struct box *box = NULL; - union content_msg_data msg_data; - bool case_sensitive, forwards, showall; - - case_sensitive = ((flags & SEARCH_FLAG_CASE_SENSITIVE) != 0) ? - true : false; - forwards = ((flags & SEARCH_FLAG_FORWARDS) != 0) ? true : false; - showall = ((flags & SEARCH_FLAG_SHOWALL) != 0) ? true : false; - - if (context->c == NULL) - return; - - if (context->is_html == true) { - html_content *html = (html_content *)context->c; - - box = html->layout; - - if (!box) - return; - } - - - /* check if we need to start a new search or continue an old one */ - if ((context->newsearch) || - (context->prev_case_sens != case_sensitive)) { - bool res; - - if (context->string != NULL) - free(context->string); - - context->current = NULL; - free_matches(context); - - context->string = malloc(string_len + 1); - if (context->string != NULL) { - memcpy(context->string, string, string_len); - context->string[string_len] = '\0'; - } - - guit->search->hourglass(true, context->gui_p); - - if (context->is_html == true) { - res = find_occurrences_html(string, string_len, - box, case_sensitive, context); - } else { - res = find_occurrences_text(string, string_len, - context->c, case_sensitive, context); - } - - if (!res) { - free_matches(context); - guit->search->hourglass(false, context->gui_p); - return; - } - guit->search->hourglass(false, context->gui_p); - - context->prev_case_sens = case_sensitive; - - /* new search, beginning at the top of the page */ - context->current = context->found->next; - context->newsearch = false; - - } else if (context->current != NULL) { - /* continued search in the direction specified */ - if (forwards) { - if (context->current->next) - context->current = context->current->next; - } else { - if (context->current->prev) - context->current = context->current->prev; - } - } - - guit->search->status((context->current != NULL), context->gui_p); - - search_show_all(showall, context); - - guit->search->back_state((context->current != NULL) && - (context->current->prev != NULL), - context->gui_p); - guit->search->forward_state((context->current != NULL) && - (context->current->next != NULL), - context->gui_p); - - if (context->current == NULL) - return; - - if (context->is_html == true) { - /* get box position and jump to it */ - box_coords(context->current->start_box, &bounds.x0, &bounds.y0); - /* \todo: move x0 in by correct idx */ - box_coords(context->current->end_box, &bounds.x1, &bounds.y1); - /* \todo: move x1 in by correct idx */ - bounds.x1 += context->current->end_box->width; - bounds.y1 += context->current->end_box->height; - } else { - textplain_coords_from_range(context->c, - context->current->start_idx, - context->current->end_idx, &bounds); - } - - msg_data.scroll.area = true; - msg_data.scroll.x0 = bounds.x0; - msg_data.scroll.y0 = bounds.y0; - msg_data.scroll.x1 = bounds.x1; - msg_data.scroll.y1 = bounds.y1; - content_broadcast(context->c, CONTENT_MSG_SCROLL, &msg_data); -} - - -/* Exported function documented in search.h */ -void -search_step(struct search_context *context, - search_flags_t flags, - const char *string) -{ - int string_len; - int i = 0; - - assert(context != NULL); - - guit->search->add_recent(string, context->gui_p); - - string_len = strlen(string); - for (i = 0; i < string_len; i++) - if (string[i] != '#' && string[i] != '*') - break; - if (i >= string_len) { - union content_msg_data msg_data; - free_matches(context); - - guit->search->status(true, context->gui_p); - guit->search->back_state(false, context->gui_p); - guit->search->forward_state(false, context->gui_p); - - msg_data.scroll.area = false; - msg_data.scroll.x0 = 0; - msg_data.scroll.y0 = 0; - content_broadcast(context->c, CONTENT_MSG_SCROLL, &msg_data); - return; - } - search_text(string, string_len, context, flags); -} - - -/* Exported function documented in search.h */ -bool search_term_highlighted(struct content *c, - unsigned start_offset, unsigned end_offset, - unsigned *start_idx, unsigned *end_idx, - struct search_context *context) -{ - if (c == context->c) { - struct list_entry *a; - for (a = context->found->next; a; a = a->next) - if (a->sel && selection_defined(a->sel) && - selection_highlighted(a->sel, - start_offset, end_offset, - start_idx, end_idx)) - return true; - } - - return false; -} - - - - -/* Exported function documented in search.h */ -void search_destroy_context(struct search_context *context) -{ - assert(context != NULL); - - if (context->string != NULL) { - guit->search->add_recent(context->string, context->gui_p); - free(context->string); - } - - guit->search->forward_state(true, context->gui_p); - guit->search->back_state(true, context->gui_p); - - free_matches(context); - free(context); -} diff --git a/content/handlers/html/search.h b/content/handlers/html/search.h deleted file mode 100644 index dfb1afc64..000000000 --- a/content/handlers/html/search.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright 2009 Mark Benjamin - * - * This file is part of NetSurf, http://www.netsurf-browser.org/ - * - * NetSurf is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * NetSurf is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -/** - * \file - * Interface to HTML searching. - */ - -#ifndef NETSURF_HTML_SEARCH_H -#define NETSURF_HTML_SEARCH_H - -#include -#include - -#include "desktop/search.h" - -struct search_context; - -/** - * create a search_context - * - * \param c The content the search_context is connected to - * \param type The content type of c - * \param context A context pointer passed to the provider routines. - * \return A new search context or NULL on error. - */ -struct search_context *search_create_context(struct content *c, - content_type type, void *context); - -/** - * Ends the search process, invalidating all state - * freeing the list of found boxes - */ -void search_destroy_context(struct search_context *context); - -/** - * Begins/continues the search process - * - * \note that this may be called many times for a single search. - * - * \param context The search context in use. - * \param flags The flags forward/back etc - * \param string The string to match - */ -void search_step(struct search_context *context, search_flags_t flags, - const char * string); - - -/** - * Determines whether any portion of the given text box should be - * selected because it matches the current search string. - * - * \param c The content to hilight within. - * \param start_offset byte offset within text of string to be checked - * \param end_offset byte offset within text - * \param start_idx byte offset within string of highlight start - * \param end_idx byte offset of highlight end - * \param context The search context to hilight entries from. - * \return true iff part of the box should be highlighted - */ -bool search_term_highlighted(struct content *c, - unsigned start_offset, unsigned end_offset, - unsigned *start_idx, unsigned *end_idx, - struct search_context *context); - -#endif diff --git a/content/handlers/text/textplain.c b/content/handlers/text/textplain.c index e9b553410..e8c38314e 100644 --- a/content/handlers/text/textplain.c +++ b/content/handlers/text/textplain.c @@ -41,10 +41,10 @@ #include "content/content_protected.h" #include "content/content_factory.h" #include "content/hlcache.h" +#include "content/textsearch.h" #include "desktop/selection.h" #include "desktop/gui_internal.h" -#include "html/search.h" #include "text/textplain.h" struct textplain_line { @@ -71,7 +71,7 @@ typedef struct textplain_content { struct selection sel; /** Selection state */ /** Context for free text search, or NULL if none */ - struct search_context *search; + struct textsearch_context *search; /** Current search string, or NULL if none */ char *search_string; } textplain_content; @@ -753,7 +753,7 @@ static void textplain_search_clear(struct content *c) text->search_string = NULL; if (text->search != NULL) { - search_destroy_context(text->search); + content_textsearch_destroy(text->search); } text->search = NULL; } @@ -771,14 +771,16 @@ static void textplain_search(struct content *c, void *gui_data, search_flags_t flags, const char *string) { textplain_content *text = (textplain_content *) c; + nserror res; assert(c != NULL); - if (string != NULL && text->search_string != NULL && + if (string != NULL && + text->search_string != NULL && strcmp(string, text->search_string) == 0 && text->search != NULL) { /* Continue prev. search */ - search_step(text->search, flags, string); + content_textsearch_step(text->search, flags, string); } else if (string != NULL) { /* New search */ @@ -788,17 +790,16 @@ static void textplain_search(struct content *c, void *gui_data, return; if (text->search != NULL) { - search_destroy_context(text->search); + content_textsearch_destroy(text->search); text->search = NULL; } - text->search = search_create_context(c, CONTENT_TEXTPLAIN, - gui_data); - - if (text->search == NULL) + res = content_textsearch_create(c, gui_data, &text->search); + if (res != NSERROR_OK) { return; + } - search_step(text->search, flags, string); + content_textsearch_step(text->search, flags, string); } else { /* Clear search */ @@ -839,7 +840,6 @@ text_draw(const char *utf8_text, float scale, textplain_content *text, const struct selection *sel, - struct search_context *search, const struct redraw_context *ctx) { bool highlighted = false; @@ -868,13 +868,12 @@ text_draw(const char *utf8_text, /* what about the current search operation, if any? */ if (!highlighted && - (search != NULL) && - search_term_highlighted((struct content *)text, - offset, - offset + len, - &start_idx, - &end_idx, - search)) { + (text->search != NULL) && + content_textsearch_ishighlighted(text->search, + offset, + offset + len, + &start_idx, + &end_idx)) { highlighted = true; } @@ -1125,7 +1124,6 @@ textplain_redraw(struct content *c, data->scale, text, &text->sel, - text->search, ctx)) { return false; } @@ -1165,15 +1163,17 @@ textplain_redraw(struct content *c, highlighted = true; } - if (!highlighted && (text->search != NULL)) { + if (!highlighted && + (text->search != NULL)) { unsigned start_idx, end_idx; - if (search_term_highlighted(c, - tab_ofst, - tab_ofst + 1, - &start_idx, - &end_idx, - text->search)) + if (content_textsearch_ishighlighted( + text->search, + tab_ofst, + tab_ofst + 1, + &start_idx, + &end_idx)) { highlighted = true; + } } if (highlighted) { @@ -1228,7 +1228,7 @@ static nserror textplain_close(struct content *c) textplain_content *text = (textplain_content *) c; if (text->search != NULL) { - search_destroy_context(text->search); + content_textsearch_destroy(text->search); } text->bw = NULL; diff --git a/content/textsearch.c b/content/textsearch.c new file mode 100644 index 000000000..c5359482d --- /dev/null +++ b/content/textsearch.c @@ -0,0 +1,718 @@ +/* + * Copyright 2004 John M Bell + * Copyright 2005 Adrian Lees + * Copyright 2009 Mark Benjamin + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * \file + * Free text search + */ + +#include +#include +#include + +#include "utils/errors.h" +#include "utils/utils.h" +#include "content/content.h" +#include "desktop/selection.h" +#include "netsurf/search.h" +#include "netsurf/content_type.h" +#include "desktop/gui_internal.h" + +#include "text/textplain.h" +#include "html/box.h" +#include "html/box_inspect.h" +#include "html/private.h" + +#include "content/textsearch.h" + + +struct list_entry { + unsigned start_idx; /* start position of match */ + unsigned end_idx; /* end of match */ + + struct box *start_box; /* used only for html contents */ + struct box *end_box; + + struct selection *sel; + + struct list_entry *prev; + struct list_entry *next; +}; + +/** + * The context for a free text search + */ +struct textsearch_context { + void *gui_p; + struct content *c; + struct list_entry *found; + struct list_entry *current; /* first for select all */ + char *string; + bool prev_case_sens; + bool newsearch; + bool is_html; +}; + + + + +/** + * Release the memory used by the list of matches, + * deleting selection objects too + */ +static void free_matches(struct textsearch_context *textsearch) +{ + struct list_entry *cur; + struct list_entry *nxt; + + cur = textsearch->found->next; + + /* + * empty the list before clearing and deleting the selections + * because the the clearing may update the toolkit immediately, + * causing nested accesses to the list + */ + + textsearch->found->prev = NULL; + textsearch->found->next = NULL; + + for (; cur; cur = nxt) { + nxt = cur->next; + if (cur->sel) { + selection_clear(cur->sel, true); + selection_destroy(cur->sel); + } + free(cur); + } +} + + +/** + * Find the first occurrence of 'match' in 'string' and return its index + * + * \param string the string to be searched (unterminated) + * \param s_len length of the string to be searched + * \param pattern the pattern for which we are searching (unterminated) + * \param p_len length of pattern + * \param case_sens true iff case sensitive match required + * \param m_len accepts length of match in bytes + * \return pointer to first match, NULL if none + */ +static const char * +find_pattern(const char *string, + int s_len, + const char *pattern, + int p_len, + bool case_sens, + unsigned int *m_len) +{ + struct { const char *ss, *s, *p; bool first; } context[16]; + const char *ep = pattern + p_len; + const char *es = string + s_len; + const char *p = pattern - 1; /* a virtual '*' before the pattern */ + const char *ss = string; + const char *s = string; + bool first = true; + int top = 0; + + while (p < ep) { + bool matches; + if (p < pattern || *p == '*') { + char ch; + + /* skip any further asterisks; one is the same as many + */ + do p++; while (p < ep && *p == '*'); + + /* if we're at the end of the pattern, yes, it matches + */ + if (p >= ep) break; + + /* anything matches a # so continue matching from + here, and stack a context that will try to match + the wildcard against the next character */ + + ch = *p; + if (ch != '#') { + /* scan forwards until we find a match for + this char */ + if (!case_sens) ch = toupper(ch); + while (s < es) { + if (case_sens) { + if (*s == ch) break; + } else if (toupper(*s) == ch) + break; + s++; + } + } + + if (s < es) { + /* remember where we are in case the match + fails; we may then resume */ + if (top < (int)NOF_ELEMENTS(context)) { + context[top].ss = ss; + context[top].s = s + 1; + context[top].p = p - 1; + /* ptr to last asterisk */ + context[top].first = first; + top++; + } + + if (first) { + ss = s; + /* remember first non-'*' char */ + first = false; + } + + matches = true; + } else { + matches = false; + } + + } else if (s < es) { + char ch = *p; + if (ch == '#') + matches = true; + else { + if (case_sens) + matches = (*s == ch); + else + matches = (toupper(*s) == toupper(ch)); + } + if (matches && first) { + ss = s; /* remember first non-'*' char */ + first = false; + } + } else { + matches = false; + } + + if (matches) { + p++; s++; + } else { + /* doesn't match, + * resume with stacked context if we have one */ + if (--top < 0) + return NULL; /* no match, give up */ + + ss = context[top].ss; + s = context[top].s; + p = context[top].p; + first = context[top].first; + } + } + + /* end of pattern reached */ + *m_len = max(s - ss, 1); + return ss; +} + + +/** + * Add a new entry to the list of matches + * + * \param start_idx Offset of match start within textual representation + * \param end_idx Offset of match end + * \param context The search context to add the entry to. + * \return Pointer to added entry, NULL iff failed. + */ +static struct list_entry * +add_entry(unsigned start_idx, + unsigned end_idx, + struct textsearch_context *context) +{ + struct list_entry *entry; + + /* found string in box => add to list */ + entry = calloc(1, sizeof(*entry)); + if (!entry) { + return NULL; + } + + entry->start_idx = start_idx; + entry->end_idx = end_idx; + entry->sel = NULL; + + entry->next = NULL; + entry->prev = context->found->prev; + + if (context->found->prev == NULL) { + context->found->next = entry; + } else { + context->found->prev->next = entry; + } + + context->found->prev = entry; + + return entry; +} + + +/** + * Finds all occurrences of a given string in an html box + * + * \param pattern the string pattern to search for + * \param p_len pattern length + * \param cur pointer to the current box + * \param case_sens whether to perform a case sensitive search + * \param context The search context to add the entry to. + * \return true on success, false on memory allocation failure + */ +static bool +find_occurrences_html_box(const char *pattern, + int p_len, + struct box *cur, + bool case_sens, + struct textsearch_context *context) +{ + struct box *a; + + /* ignore this box, if there's no visible text */ + if (!cur->object && cur->text) { + const char *text = cur->text; + unsigned length = cur->length; + + while (length > 0) { + struct list_entry *entry; + unsigned match_length; + unsigned match_offset; + const char *new_text; + const char *pos; + + pos = find_pattern(text, + length, + pattern, + p_len, + case_sens, + &match_length); + if (!pos) + break; + + /* found string in box => add to list */ + match_offset = pos - cur->text; + + entry = add_entry(cur->byte_offset + match_offset, + cur->byte_offset + match_offset + match_length, + context); + if (!entry) + return false; + + entry->start_box = cur; + entry->end_box = cur; + + new_text = pos + match_length; + length -= (new_text - text); + text = new_text; + } + } + + /* and recurse */ + for (a = cur->children; a; a = a->next) { + if (!find_occurrences_html_box(pattern, + p_len, + a, + case_sens, + context)) + return false; + } + + return true; +} + +/** + * Finds all occurrences of a given string in the html box tree + * + * \param pattern the string pattern to search for + * \param p_len pattern length + * \param c The content to search + * \param csens whether to perform a case sensitive search + * \param context The search context to add the entry to. + * \return true on success, false on memory allocation failure + */ +static bool +find_occurrences_html(const char *pattern, + int p_len, + struct content *c, + bool csens, + struct textsearch_context *context) +{ + html_content *html = (html_content *)c; + + if (html->layout == NULL) { + return false; + } + + return find_occurrences_html_box(pattern, + p_len, + html->layout, + csens, + context); +} + +/** + * Finds all occurrences of a given string in a textplain content + * + * \param pattern the string pattern to search for + * \param p_len pattern length + * \param c the content to be searched + * \param case_sens whether to perform a case sensitive search + * \param context The search context to add the entry to. + * \return true on success, false on memory allocation failure + */ +static bool +find_occurrences_text(const char *pattern, + int p_len, + struct content *c, + bool case_sens, + struct textsearch_context *context) +{ + int nlines = textplain_line_count(c); + int line; + + for(line = 0; line < nlines; line++) { + size_t offset, length; + const char *text; + + text = textplain_get_line(c, line, &offset, &length); + if (text) { + while (length > 0) { + struct list_entry *entry; + unsigned match_length; + size_t start_idx; + const char *new_text; + const char *pos; + + pos = find_pattern(text, length, + pattern, p_len, + case_sens, + &match_length); + if (!pos) + break; + + /* found string in line => add to list */ + start_idx = offset + (pos - text); + entry = add_entry(start_idx, + start_idx + match_length, + context); + if (!entry) + return false; + + new_text = pos + match_length; + offset += (new_text - text); + length -= (new_text - text); + text = new_text; + } + } + } + + return true; +} + + +/** + * Specifies whether all matches or just the current match should + * be highlighted in the search text. + */ +static void search_show_all(bool all, struct textsearch_context *context) +{ + struct list_entry *a; + + for (a = context->found->next; a; a = a->next) { + bool add = true; + if (!all && a != context->current) { + add = false; + if (a->sel) { + selection_clear(a->sel, true); + selection_destroy(a->sel); + a->sel = NULL; + } + } + if (add && !a->sel) { + + if (context->is_html == true) { + html_content *html = (html_content *)context->c; + a->sel = selection_create(context->c, true); + if (!a->sel) + continue; + + selection_init(a->sel, html->layout, + &html->len_ctx); + } else { + a->sel = selection_create(context->c, false); + if (!a->sel) + continue; + + selection_init(a->sel, NULL, NULL); + } + + selection_set_start(a->sel, a->start_idx); + selection_set_end(a->sel, a->end_idx); + } + } +} + + +/** + * Search for a string in the box tree + * + * \param string the string to search for + * \param string_len length of search string + * \param context The search context to add the entry to. + * \param flags flags to control the search. + */ +static void +search_text(const char *string, + int string_len, + struct textsearch_context *context, + search_flags_t flags) +{ + struct rect bounds; + union content_msg_data msg_data; + bool case_sensitive, forwards, showall; + + case_sensitive = ((flags & SEARCH_FLAG_CASE_SENSITIVE) != 0) ? + true : false; + forwards = ((flags & SEARCH_FLAG_FORWARDS) != 0) ? true : false; + showall = ((flags & SEARCH_FLAG_SHOWALL) != 0) ? true : false; + + if (context->c == NULL) + return; + + /* check if we need to start a new search or continue an old one */ + if ((context->newsearch) || + (context->prev_case_sens != case_sensitive)) { + bool res; + + if (context->string != NULL) + free(context->string); + + context->current = NULL; + free_matches(context); + + context->string = malloc(string_len + 1); + if (context->string != NULL) { + memcpy(context->string, string, string_len); + context->string[string_len] = '\0'; + } + + guit->search->hourglass(true, context->gui_p); + + if (context->is_html == true) { + res = find_occurrences_html(string, string_len, + context->c, case_sensitive, context); + } else { + res = find_occurrences_text(string, string_len, + context->c, case_sensitive, context); + } + + guit->search->hourglass(false, context->gui_p); + + if (!res) { + free_matches(context); + return; + } + + context->prev_case_sens = case_sensitive; + + /* new search, beginning at the top of the page */ + context->current = context->found->next; + context->newsearch = false; + + } else if (context->current != NULL) { + /* continued search in the direction specified */ + if (forwards) { + if (context->current->next) + context->current = context->current->next; + } else { + if (context->current->prev) + context->current = context->current->prev; + } + } + + guit->search->status((context->current != NULL), context->gui_p); + + search_show_all(showall, context); + + guit->search->back_state((context->current != NULL) && + (context->current->prev != NULL), + context->gui_p); + guit->search->forward_state((context->current != NULL) && + (context->current->next != NULL), + context->gui_p); + + if (context->current == NULL) + return; + + if (context->is_html == true) { + /* get box position and jump to it */ + box_coords(context->current->start_box, &bounds.x0, &bounds.y0); + /* \todo: move x0 in by correct idx */ + box_coords(context->current->end_box, &bounds.x1, &bounds.y1); + /* \todo: move x1 in by correct idx */ + bounds.x1 += context->current->end_box->width; + bounds.y1 += context->current->end_box->height; + } else { + textplain_coords_from_range(context->c, + context->current->start_idx, + context->current->end_idx, &bounds); + } + + msg_data.scroll.area = true; + msg_data.scroll.x0 = bounds.x0; + msg_data.scroll.y0 = bounds.y0; + msg_data.scroll.x1 = bounds.x1; + msg_data.scroll.y1 = bounds.y1; + content_broadcast(context->c, CONTENT_MSG_SCROLL, &msg_data); +} + + +/* Exported function documented in context/textsearch.h */ +nserror +content_textsearch_step(struct textsearch_context *textsearch, + search_flags_t flags, + const char *string) +{ + int string_len; + int i = 0; + + assert(textsearch != NULL); + + guit->search->add_recent(string, textsearch->gui_p); + + string_len = strlen(string); + for (i = 0; i < string_len; i++) { + if (string[i] != '#' && string[i] != '*') + break; + } + + if (i < string_len) { + search_text(string, string_len, textsearch, flags); + } else { + union content_msg_data msg_data; + free_matches(textsearch); + + guit->search->status(true, textsearch->gui_p); + guit->search->back_state(false, textsearch->gui_p); + guit->search->forward_state(false, textsearch->gui_p); + + msg_data.scroll.area = false; + msg_data.scroll.x0 = 0; + msg_data.scroll.y0 = 0; + content_broadcast(textsearch->c, CONTENT_MSG_SCROLL, &msg_data); + } + + return NSERROR_OK; +} + + +/* Exported function documented in content/textsearch.h */ +bool +content_textsearch_ishighlighted(struct textsearch_context *textsearch, + unsigned start_offset, + unsigned end_offset, + unsigned *start_idx, + unsigned *end_idx) +{ + struct list_entry *cur; + + for (cur = textsearch->found->next; cur != NULL; cur = cur->next) { + if (cur->sel && + selection_defined(cur->sel) && + selection_highlighted(cur->sel, + start_offset, + end_offset, + start_idx, + end_idx)) { + return true; + } + } + + return false; +} + + +/* Exported function documented in content/textsearch.h */ +nserror +content_textsearch_create(struct content *c, + void *gui_data, + struct textsearch_context **textsearch_out) +{ + struct textsearch_context *context; + struct list_entry *search_head; + content_type type; + + type = c->handler->type(); + + if (type != CONTENT_HTML && type != CONTENT_TEXTPLAIN) { + return NSERROR_NOT_IMPLEMENTED; + } + + context = malloc(sizeof(struct textsearch_context)); + if (context == NULL) { + return NSERROR_NOMEM; + } + + search_head = malloc(sizeof(struct list_entry)); + if (search_head == NULL) { + free(context); + return NSERROR_NOMEM; + } + + search_head->start_idx = 0; + search_head->end_idx = 0; + search_head->start_box = NULL; + search_head->end_box = NULL; + search_head->sel = NULL; + search_head->prev = NULL; + search_head->next = NULL; + + context->found = search_head; + context->current = NULL; + context->string = NULL; + context->prev_case_sens = false; + context->newsearch = true; + context->c = c; + context->is_html = (type == CONTENT_HTML) ? true : false; + context->gui_p = gui_data; + + *textsearch_out = context; + + return NSERROR_OK; +} + + +/* Exported function documented in search.h */ +nserror content_textsearch_destroy(struct textsearch_context *textsearch) +{ + assert(textsearch != NULL); + + if (textsearch->string != NULL) { + guit->search->add_recent(textsearch->string, textsearch->gui_p); + free(textsearch->string); + } + + guit->search->forward_state(true, textsearch->gui_p); + guit->search->back_state(true, textsearch->gui_p); + + free_matches(textsearch); + free(textsearch); + + return NSERROR_OK; +} diff --git a/content/textsearch.h b/content/textsearch.h new file mode 100644 index 000000000..f94bcdb42 --- /dev/null +++ b/content/textsearch.h @@ -0,0 +1,80 @@ +/* + * Copyright 2009 Mark Benjamin + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * \file + * Interface to HTML searching. + */ + +#ifndef NETSURF_CONTENT_SEARCH_H +#define NETSURF_CONTENT_SEARCH_H + +#include +#include + +#include "desktop/search.h" + +struct textsearch_context; +struct content; + +/** + * create a search_context + * + * \param c The content the search_context is connected to + * \param context A context pointer passed to the provider routines. + * \param search_out A pointer to recive the new text search context + * \return NSERROR_OK on success and \a search_out updated else error code + */ +nserror content_textsearch_create(struct content *c, void *context, struct textsearch_context **textsearch_out); + +/** + * Begins/continues the search process + * + * \note that this may be called many times for a single search. + * + * \param context The search context in use. + * \param flags The flags forward/back etc + * \param string The string to match + */ +nserror content_textsearch_step(struct textsearch_context *textsearch, search_flags_t flags, const char *string); + +/** + * Ends the search process, invalidating all state freeing the list of + * found boxes. + */ +nserror content_textsearch_destroy(struct textsearch_context *textsearch); + +/** + * Determines whether any portion of the given text box should be + * selected because it matches the current search string. + * + * \param textsearch The search context to hilight entries from. + * \param c The content to highlight within. + * \param start_offset byte offset within text of string to be checked + * \param end_offset byte offset within text + * \param start_idx byte offset within string of highlight start + * \param end_idx byte offset of highlight end + * \return true iff part of the box should be highlighted + */ +bool content_textsearch_ishighlighted(struct textsearch_context *textsearch, + unsigned start_offset, + unsigned end_offset, + unsigned *start_idx, + unsigned *end_idx); + +#endif -- cgit v1.2.3