/* * Copyright 2006 James Bursa * Copyright 2006 Adrian Lees * * This file is part of NetSurf, http://www.netsurf-browser.org/ * * NetSurf is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * NetSurf is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** \file * Content for text/plain (implementation). */ #include #include #include #define LIBICONV_PLUG #include #include "content/content.h" #include "css/css.h" #include "desktop/gui.h" #include "desktop/plotters.h" #include "desktop/selection.h" #include "render/box.h" #include "render/font.h" #include "render/textplain.h" #include "utils/log.h" #include "utils/messages.h" #include "utils/talloc.h" #include "utils/utils.h" #include "utils/utf8.h" #define CHUNK 20480 #define MARGIN 4 #define TAB_WIDTH 8 /* must be power of 2 currently */ static struct css_style textplain_style; static int textplain_tab_width = 256; /* try for a sensible default */ static int textplain_coord_from_offset(const char *text, size_t offset, size_t length); /** * Create a CONTENT_TEXTPLAIN. */ bool textplain_create(struct content *c, const char *params[]) { unsigned int i; char *utf8_data; const char *encoding = "iso-8859-1"; iconv_t iconv_cd; union content_msg_data msg_data; textplain_style = css_base_style; textplain_style.font_family = CSS_FONT_FAMILY_MONOSPACE; utf8_data = talloc_array(c, char, CHUNK); if (!utf8_data) goto no_memory; for (i = 0; params[i]; i += 2) { if (strcasecmp(params[i], "charset") == 0) { encoding = talloc_strdup(c, params[i + 1]); if (!encoding) goto no_memory; break; } } iconv_cd = iconv_open("utf-8", encoding); if (iconv_cd == (iconv_t)(-1) && errno == EINVAL) { LOG(("unsupported encoding \"%s\"", encoding)); iconv_cd = iconv_open("utf-8", "iso-8859-1"); } if (iconv_cd == (iconv_t)(-1)) { char buf[300]; snprintf(buf, sizeof buf, "IconvFailed %s", strerror(errno)); buf[sizeof buf - 1] = 0; msg_data.error = buf; content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } c->data.textplain.encoding = encoding; c->data.textplain.iconv_cd = iconv_cd; c->data.textplain.converted = 0; c->data.textplain.utf8_data = utf8_data; c->data.textplain.utf8_data_size = 0; c->data.textplain.utf8_data_allocated = CHUNK; c->data.textplain.physical_line = 0; c->data.textplain.physical_line_count = 0; c->data.textplain.formatted_width = 0; return true; no_memory: msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /** * Process data for CONTENT_TEXTPLAIN. */ bool textplain_process_data(struct content *c, char *data, unsigned int size) { iconv_t iconv_cd = c->data.textplain.iconv_cd; size_t count; union content_msg_data msg_data; do { char *inbuf = c->source_data + c->data.textplain.converted; size_t inbytesleft = c->source_size - c->data.textplain.converted; char *outbuf = c->data.textplain.utf8_data + c->data.textplain.utf8_data_size; size_t outbytesleft = c->data.textplain.utf8_data_allocated - c->data.textplain.utf8_data_size; count = iconv(iconv_cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); c->data.textplain.converted = inbuf - c->source_data; c->data.textplain.utf8_data_size = c->data.textplain. utf8_data_allocated - outbytesleft; if (count == (size_t)(-1) && errno == E2BIG) { size_t allocated = CHUNK + c->data.textplain.utf8_data_allocated; char *utf8_data = talloc_realloc(c, c->data.textplain.utf8_data, char, allocated); if (!utf8_data) goto no_memory; c->data.textplain.utf8_data = utf8_data; c->data.textplain.utf8_data_allocated = allocated; } else if (count == (size_t)(-1) && errno != EINVAL) { char buf[300]; snprintf(buf, sizeof buf, "IconvFailed %s", strerror(errno)); buf[sizeof buf - 1] = 0; msg_data.error = buf; content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } gui_multitask(); } while (!(c->data.textplain.converted == c->source_size || (count == (size_t)(-1) && errno == EINVAL))); return true; no_memory: msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /** * Convert a CONTENT_TEXTPLAIN for display. */ bool textplain_convert(struct content *c, int width, int height) { iconv_close(c->data.textplain.iconv_cd); c->data.textplain.iconv_cd = 0; textplain_reformat(c, width, height); c->status = CONTENT_STATUS_DONE; content_set_status(c, messages_get("Done")); return true; } /** * Reformat a CONTENT_TEXTPLAIN to a new width. */ void textplain_reformat(struct content *c, int width, int height) { char *utf8_data = c->data.textplain.utf8_data; size_t utf8_data_size = c->data.textplain.utf8_data_size; unsigned long line_count = 0; struct textplain_line *line = c->data.textplain.physical_line; struct textplain_line *line1; size_t i, space, col; size_t columns = 80; int character_width; size_t line_start; /* compute available columns (assuming monospaced font) - use 8 * characters for better accuracy */ if (!nsfont.font_width(&textplain_style, "ABCDEFGH", 8, &character_width)) return; columns = (width - MARGIN - MARGIN) * 8 / character_width; textplain_tab_width = (TAB_WIDTH * character_width) / 8; c->data.textplain.formatted_width = width; c->data.textplain.physical_line_count = 0; if (!line) { c->data.textplain.physical_line = line = talloc_array(c, struct textplain_line, 1024 + 3); if (!line) goto no_memory; } line[line_count++].start = line_start = 0; space = 0; for (i = 0, col = 0; i != utf8_data_size; i++) { bool term = (utf8_data[i] == '\n' || utf8_data[i] == '\r'); size_t next_col = col + 1; if (utf8_data[i] == '\t') next_col = (next_col + TAB_WIDTH - 1) & ~(TAB_WIDTH - 1); if (term || next_col >= columns) { if (line_count % 1024 == 0) { line1 = talloc_realloc(c, line, struct textplain_line, line_count + 1024 + 3); if (!line1) goto no_memory; c->data.textplain.physical_line = line = line1; } if (term) { line[line_count-1].length = i - line_start; /* skip second char of CR/LF or LF/CR pair */ if (i + 1 < utf8_data_size && utf8_data[i+1] != utf8_data[i] && (utf8_data[i+1] == '\n' || utf8_data[i+1] == '\r')) i++; } else { if (space) { /* break at last space in line */ i = space; line[line_count-1].length = (i + 1) - line_start; } else line[line_count-1].length = i - line_start; } line[line_count++].start = line_start = i + 1; col = 0; space = 0; } else { col++; if (utf8_data[i] == ' ') space = i; } } line[line_count-1].length = i - line[line_count-1].start; line[line_count].start = utf8_data_size; c->data.textplain.physical_line_count = line_count; c->width = width; c->height = line_count * css_len2px(&textplain_style.font_size.value.length, &textplain_style) * 1.2 + MARGIN + MARGIN; return; no_memory: LOG(("out of memory (line_count %lu)", line_count)); return; } /** * Destroy a CONTENT_TEXTPLAIN and free all resources it owns. */ void textplain_destroy(struct content *c) { if (c->data.textplain.iconv_cd) iconv_close(c->data.textplain.iconv_cd); } /** * Draw a CONTENT_TEXTPLAIN using the current set of plotters (plot). * * \param c content of type CONTENT_TEXTPLAIN * \param x coordinate for top-left of redraw * \param y coordinate for top-left of redraw * \param width available width * \param height available height * \param clip_x0 clip rectangle * \param clip_y0 clip rectangle * \param clip_x1 clip rectangle * \param clip_y1 clip rectangle * \param scale scale for redraw * \param background_colour the background colour * \return true if successful, false otherwise * * x, y, clip_[xy][01] are in target coordinates. */ bool textplain_redraw(struct content *c, int x, int y, int width, int height, int clip_x0, int clip_y0, int clip_x1, int clip_y1, float scale, unsigned long background_colour) { struct browser_window *bw = current_redraw_browser; char *utf8_data = c->data.textplain.utf8_data; long lineno; unsigned long line_count = c->data.textplain.physical_line_count; float line_height = css_len2px(&textplain_style.font_size.value.length, &textplain_style) * 1.2; float scaled_line_height = line_height * scale; long line0 = clip_y0 / scaled_line_height - 1; long line1 = clip_y1 / scaled_line_height + 1; struct textplain_line *line = c->data.textplain.physical_line; colour hback_col; struct rect clip; size_t length; clip.x0 = clip_x0; clip.y0 = clip_y0; clip.x1 = clip_x1; clip.y1 = clip_y1; if (line0 < 0) line0 = 0; if (line1 < 0) line1 = 0; if (line_count < (unsigned long) line0) line0 = line_count; if (line_count < (unsigned long) line1) line1 = line_count; if (line1 < line0) line1 = line0; if (!plot.clg(0xffffff)) return false; if (!line) return true; /* choose a suitable background colour for any highlighted text */ if ((background_colour & 0x808080) == 0x808080) hback_col = 0; else hback_col = 0xffffff; x += MARGIN * scale; y += MARGIN * scale; for (lineno = line0; lineno != line1; lineno++) { const char *text = utf8_data + line[lineno].start; int tab_width = textplain_tab_width * scale; size_t offset = 0; int tx = x; if (!tab_width) tab_width = 1; length = line[lineno].length; if (!length) continue; while (offset < length) { size_t next_offset = offset; int width; int ntx; while (next_offset < length && text[next_offset] != '\t') next_offset = utf8_next(text, length, next_offset); if (!text_redraw(text + offset, next_offset - offset, line[lineno].start + offset, false, &textplain_style, tx, y + (lineno * scaled_line_height), &clip, line_height, scale, background_colour, false)) return false; if (next_offset >= length) break; /* locate end of string and align to next tab position */ if (nsfont.font_width(&textplain_style, &text[offset], next_offset - offset, &width)) tx += (int)(width * scale); ntx = x + ((1 + (tx - x) / tab_width) * tab_width); /* if the tab character lies within the selection, if any, then we must draw it as a filled rectangle so that it's consistent with background of the selected text */ if (bw) { unsigned tab_ofst = line[lineno].start + next_offset; struct selection *sel = bw->sel; bool highlighted = false; if (selection_defined(sel)) { unsigned start_idx, end_idx; if (selection_highlighted(sel, tab_ofst, tab_ofst + 1, &start_idx, &end_idx)) highlighted = true; } if (!highlighted && search_current_window == bw->window) { unsigned start_idx, end_idx; if (gui_search_term_highlighted(bw->window, tab_ofst, tab_ofst + 1, &start_idx, &end_idx)) highlighted = true; } if (highlighted) { int sy = y + (lineno * scaled_line_height); if (!plot.fill(tx, sy, ntx, sy + scaled_line_height, hback_col)) return false; } } offset = next_offset + 1; tx = ntx; } } return true; } /** * Return byte offset within UTF8 textplain content, given the co-ordinates * of a point within a textplain content. 'dir' specifies the direction in * which to search (-1 = above-left, +1 = below-right) if the co-ordinates are not * contained within a line. * * \param c content of type CONTENT_TEXTPLAIN * \param x x ordinate of point * \param y y ordinate of point * \param dir direction of search if not within line * \return byte offset of character containing (or nearest to) point */ size_t textplain_offset_from_coords(struct content *c, int x, int y, int dir) { float line_height = css_len2px(&textplain_style.font_size.value.length, &textplain_style) * 1.2; struct textplain_line *line; const char *text; unsigned nlines; size_t length; int idx; assert(c->type == CONTENT_TEXTPLAIN); y = (int)((float)(y - MARGIN) / line_height); x -= MARGIN; nlines = c->data.textplain.physical_line_count; if (!nlines) return 0; if (y <= 0) y = 0; else if ((unsigned)y >= nlines) y = nlines - 1; line = &c->data.textplain.physical_line[y]; text = c->data.textplain.utf8_data + line->start; length = line->length; idx = 0; while (x > 0) { size_t next_offset = 0; int width = INT_MAX; while (next_offset < length && text[next_offset] != '\t') next_offset = utf8_next(text, length, next_offset); if (next_offset < length) nsfont.font_width(&textplain_style, text, next_offset, &width); if (x <= width) { int pixel_offset; size_t char_offset; nsfont.font_position_in_string(&textplain_style, text, next_offset, x, &char_offset, &pixel_offset); idx += char_offset; break; } x -= width; length -= next_offset; text += next_offset; idx += next_offset; /* check if it's within the tab */ width = textplain_tab_width - (width % textplain_tab_width); if (x <= width) break; x -= width; length--; text++; idx++; } return line->start + idx; } /** * Given a byte offset within the text, return the line number * of the line containing that offset (or -1 if offset invalid) * * \param c content of type CONTENT_TEXTPLAIN * \param offset byte offset within textual representation * \return line number, or -1 if offset invalid (larger than size) */ int textplain_find_line(struct content *c, unsigned offset) { struct textplain_line *line = c->data.textplain.physical_line; int nlines = c->data.textplain.physical_line_count; int lineno = 0; assert(c->type == CONTENT_TEXTPLAIN); if (offset > c->data.textplain.utf8_data_size) return -1; /* \todo - implement binary search here */ while (lineno < nlines && line[lineno].start < offset) lineno++; if (line[lineno].start > offset) lineno--; return lineno; } /** * Convert a character offset within a line of text into the * horizontal co-ordinate, taking into account the font being * used and any tabs in the text * * \param text line of text * \param offset char offset within text * \param length line length * \return x ordinate */ int textplain_coord_from_offset(const char *text, size_t offset, size_t length) { int x = 0; while (offset > 0) { size_t next_offset = 0; int tx; while (next_offset < offset && text[next_offset] != '\t') next_offset = utf8_next(text, length, next_offset); nsfont.font_width(&textplain_style, text, next_offset, &tx); x += tx; if (next_offset >= offset) break; /* align to next tab boundary */ next_offset++; x = (1 + (x / textplain_tab_width)) * textplain_tab_width; offset -= next_offset; text += next_offset; length -= next_offset; } return x; } /** * Given a range of byte offsets within a UTF8 textplain content, * return a box that fully encloses the text * * \param c content of type CONTENT_TEXTPLAIN * \param start byte offset of start of text range * \param end byte offset of end * \param r rectangle to be completed */ void textplain_coords_from_range(struct content *c, unsigned start, unsigned end, struct rect *r) { float line_height = css_len2px(&textplain_style.font_size.value.length, &textplain_style) * 1.2; char *utf8_data = c->data.textplain.utf8_data; struct textplain_line *line; unsigned lineno = 0; unsigned nlines; assert(c->type == CONTENT_TEXTPLAIN); assert(start <= end); assert(end <= c->data.textplain.utf8_data_size); nlines = c->data.textplain.physical_line_count; line = c->data.textplain.physical_line; /* find start */ lineno = textplain_find_line(c, start); r->y0 = (int)(MARGIN + lineno * line_height); if (lineno + 1 <= nlines || line[lineno + 1].start >= end) { /* \todo - it may actually be more efficient just to run forwards most of the time */ /* find end */ lineno = textplain_find_line(c, end); r->x0 = 0; r->x1 = c->data.textplain.formatted_width; } else { /* single line */ const char *text = utf8_data + line[lineno].start; r->x0 = textplain_coord_from_offset(text, start - line[lineno].start, line[lineno].length); r->x1 = textplain_coord_from_offset(text, end - line[lineno].start, line[lineno].length); } r->y1 = (int)(MARGIN + (lineno + 1) * line_height); } /** * Return a pointer to the requested line of text. * * \param c content of type CONTENT_TEXTPLAIN * \param lineno line number * \param poffset receives byte offset of line start within text * \param plen receives length of returned line * \return pointer to text, or NULL if invalid line number */ char *textplain_get_line(struct content *c, unsigned lineno, size_t *poffset, size_t *plen) { struct textplain_line *line; assert(c->type == CONTENT_TEXTPLAIN); if (lineno >= c->data.textplain.physical_line_count) return NULL; line = &c->data.textplain.physical_line[lineno]; *poffset = line->start; *plen = line->length; return c->data.textplain.utf8_data + line->start; } /** * Return a pointer to the raw UTF-8 data, as opposed to the reformatted * text to fit the window width. Thus only hard newlines are preserved * in the saved/copied text of a selection. * * \param c content of type CONTENT_TEXTPLAIN * \param start starting byte offset within UTF-8 text * \param end ending byte offset * \param plen receives validated length * \return pointer to text, or NULL if no text */ char *textplain_get_raw_data(struct content *c, unsigned start, unsigned end, size_t *plen) { size_t utf8_size = c->data.textplain.utf8_data_size; assert(c->type == CONTENT_TEXTPLAIN); /* any text at all? */ if (!utf8_size) return NULL; /* clamp to valid offset range */ if (start >= utf8_size) start = utf8_size; if (end >= utf8_size) end = utf8_size; *plen = end - start; return c->data.textplain.utf8_data + start; }