summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn-Mark Bell <jmb@netsurf-browser.org>2018-01-22 20:53:31 +0000
committerJohn-Mark Bell <jmb@netsurf-browser.org>2018-01-22 22:54:18 +0000
commit1a62581accb099cf83c08de9e9f1535d807bebfc (patch)
treeb9894a204c4b8d5ba1893ada3e40ad80b200c82e
parentb75b31437a21604a8f28ae6b5bd9e5cc182d289f (diff)
downloadlibrufl-1a62581accb099cf83c08de9e9f1535d807bebfc.tar.gz
librufl-1a62581accb099cf83c08de9e9f1535d807bebfc.tar.bz2
"Old" FontManager: improve Encoding file parser.
In a non-Unicode world, a (non-Base) encoding may define glyphs for up to 256 character codes. Ensure that at most 256 Encoding file entries are used (as, otherwise, the character code will overflow). In particular, if symbol fonts created for the Unicode Font Manager (which does not have a 256 character limit for an encoding) are installed on a non-Unicode-capable system, only the first 256 glyphs in the font are accessible although the Encoding file may have more than 256 entries. Note, however, that the first 32 character codes will never be used as they are considered control codes. Thus, at most 224 usable characters may be defined. A further wrinkle is that glyph names may map to multiple Unicode codepoints, thus consuming multiple slots in the unicode map (which itself has a fixed size of 256 entries). Thus, it is technically possible for the unicode map to further limit the number of usable characters in a font to fewer than 224. However, unless the font is particularly baroque, this isn't a problem in the real world, because there are only 12 glyph names which map to more than one Unicode codepoint (they map to 2, each, for a total of 24 unicode map entries, if they're all present). Thus, to run out of space in the unicode map, you'd need a font which defines at least 4 of those glyphs twice (and defines the others once, and also defines known glyphs for every other character code). Fixes #2577.
-rw-r--r--src/rufl_init.c67
-rw-r--r--test/data/Encoding7
2 files changed, 62 insertions, 12 deletions
diff --git a/src/rufl_init.c b/src/rufl_init.c
index 9c80b0a..8cd0358 100644
--- a/src/rufl_init.c
+++ b/src/rufl_init.c
@@ -1011,6 +1011,12 @@ rufl_code rufl_init_scan_font_in_encoding(const char *font_name,
rufl_code rufl_init_read_encoding(font_f font,
struct rufl_unicode_map *umap)
{
+ enum {
+ STATE_START,
+ STATE_COMMENT,
+ STATE_COLLECT,
+ } state = STATE_START;
+ bool emit = false;
unsigned int u = 0;
unsigned int i = 0;
int c;
@@ -1035,18 +1041,50 @@ rufl_code rufl_init_read_encoding(font_f font,
if (!fp)
return rufl_IO_ERROR;
- while (!feof(fp) && u != 256) {
+ while (!feof(fp) && i < 256 && u < 256) {
c = fgetc(fp);
- if (c == '%') {
- /* comment line */
- fgets(s, sizeof s, fp);
- } else if (c == '/') {
- /* character definition */
- if (i++ < 32)
- continue;
- n = fscanf(fp, "%100s", s);
- if (n != 1)
- break;
+
+ if (state == STATE_START) {
+ if (c == '/') {
+ n = 0;
+ state = STATE_COLLECT;
+ } else if (c <= 0x20) {
+ /* Consume C0 and space */
+ } else {
+ /* Comment, or garbage */
+ state = STATE_COMMENT;
+ }
+ } else if (state == STATE_COMMENT) {
+ /* Consume until the next C0 */
+ if (c < 0x20) {
+ state = STATE_START;
+ }
+ } else {
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c == '.') || (c == '_')) {
+ /* Printable: append */
+ s[n++] = c;
+ if (n >= sizeof(s)) {
+ /* Too long: garbage */
+ state = STATE_COMMENT;
+ }
+ } else if (c > 0x20) {
+ /* Garbage */
+ state = STATE_COMMENT;
+ } else {
+ /* C0 or space: done */
+ s[n] = '\0';
+ if (n != 0) {
+ emit = true;
+ }
+ state = STATE_START;
+ }
+ }
+
+ /* Ignore first 32 character codes (these are control chars) */
+ if (emit && i > 31 && i < 256 && u < 256) {
entry = bsearch(s, rufl_glyph_map,
rufl_glyph_map_size,
sizeof rufl_glyph_map[0],
@@ -1059,13 +1097,18 @@ rufl_code rufl_init_read_encoding(font_f font,
for (; strcmp(s, entry->glyph_name) == 0;
entry++) {
umap->map[u].u = entry->u;
- umap->map[u].c = i - 1;
+ umap->map[u].c = i;
u++;
if (u == 256)
break;
}
}
}
+
+ if (emit) {
+ i++;
+ emit = false;
+ }
}
if (fclose(fp) == EOF)
diff --git a/test/data/Encoding b/test/data/Encoding
new file mode 100644
index 0000000..e5b633e
--- /dev/null
+++ b/test/data/Encoding
@@ -0,0 +1,7 @@
+% Encoding file for font 'Allerta'
+/
+/ x
+/w
+ % xyz abc /y
+ /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /space /exclam /quotedbl /.notdef /dollar /percent /ampersand /quotesingle /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /.notdef /.notdef /grave /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p /q /r /s /t /u /v /w /x /y /z /.notdef /bar /.notdef /.notdef /.notdef /Euro /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /quoteleft /quoteright /guilsinglleft /guilsinglright /quotedblleft /quotedblright /.notdef /endash /emdash /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /space /exclamdown /cent /sterling /currency /yen /.notdef /.notdef /dieresis /.notdef /.notdef /guillemotleft /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /acute /.notdef /.notdef /.notdef /cedilla /.notdef /.notdef /guillemotright /.notdef /.notdef /.notdef /questiondown /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /.notdef /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /.notdef /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls /agrave /aacute /acircumflex /atilde /adieresis /aring /.notdef /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /.notdef /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis /dotlessi /Lslash /lslash /Scaron /scaron /Ydieresis /Zcaron /zcaron /circumflex /caron /breve /dotaccent /ring /ogonek /tilde /hungarumlaut
+