summaryrefslogtreecommitdiff
path: root/aliases/makealiases.c
diff options
context:
space:
mode:
Diffstat (limited to 'aliases/makealiases.c')
-rw-r--r--aliases/makealiases.c243
1 files changed, 243 insertions, 0 deletions
diff --git a/aliases/makealiases.c b/aliases/makealiases.c
new file mode 100644
index 0000000..f24370b
--- /dev/null
+++ b/aliases/makealiases.c
@@ -0,0 +1,243 @@
+/**
+ * IANA charset data to Iconv Aliases file convertor
+ *
+ * Version history:
+ *
+ * 0.01 - Initial version
+ * 0.02 - Added "utf8" alias seen in the wild
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct extra {
+ const char *canon;
+ const char *aliases;
+} extras[] = {
+ { "ISO-8859-1", "8859_1 ISO8859-1" },
+ { "ISO-8859-2", "8859_2 ISO8859-2" },
+ { "ISO-8859-3", "8859_3 ISO8859-3" },
+ { "ISO-8859-4", "8859_4 ISO8859-4" },
+ { "ISO-8859-5", "8859_5 ISO8859-5" },
+ { "ISO-8859-7", "8859_7 ISO8859-7" },
+ { "ISO-8859-8", "8859_8 ISO8859-8" },
+ { "ISO-8859-9", "8859_9 ISO8859-9" },
+ { "ISO-8859-10", "8859_10 ISO8859-10" },
+ { "ISO-8859-13", "8859_13 ISO8859-13" },
+ { "ISO-8859-14", "8859_14 ISO8859-14" },
+ { "ISO-8859-15", "8859_15 ISO8859-15" },
+ { "Shift_JIS", "X-SJIS Shift-JIS" },
+ { "EUC-JP", "EUCJP" },
+ { "EUC-KR", "EUCKR" },
+ { "UTF-8", "UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8" },
+ { "ISO-10646-UCS-4", "UCS-4 UCS4" },
+ { "ISO-10646-UCS-2", "UCS-2 UCS2" },
+ { "GB2312", "EUC-CN EUCCN CN-GB" },
+ { "Big5", "BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE" },
+ { "macintosh", "MACROMAN MAC-ROMAN X-MAC-ROMAN" },
+ { "windows-1250", "CP1250 MS-EE" },
+ { "windows-1251", "CP1251 MS-CYRL" },
+ { "windows-1252", "CP1252 MS-ANSI" },
+ { "windows-1253", "CP1253 MS-GREEK" },
+ { "windows-1254", "CP1254 MS-TURK" },
+ { "windows-1256", "CP1256 MS-ARAB" },
+ { "windows-1257", "CP1257 WINBALTRIM" },
+};
+#define EXTRAS_SIZE (sizeof(extras) / sizeof(extras[0]))
+
+/*
+ * Make aliases file from IANA charset data.
+ * The canonical name of an encoding is that which follows the "Name:" tag
+ * in the input file. There is an exception, however, for those encodings
+ * which have an alias which is denoted as the "preferred MIME name". For
+ * these encodings, the preferred MIME name is taken as the canonical form.
+ */
+
+#define TOP argv[1]
+#define SETS argv[2]
+#define BOTTOM argv[3]
+#define ALIASES argv[4]
+
+int main(int argc, const char **argv)
+{
+ FILE *in, *out;
+ char buf[200], name[64];
+ short mibenum;
+ char *s, *n, *aliases, *temp;
+ int i;
+ int namelen;
+
+ in = fopen(TOP, "r");
+ if (!in)
+ return 1;
+
+ out = fopen(ALIASES, "w");
+ if (!out)
+ return 1;
+
+ while (fgets(buf, sizeof buf, in)) {
+ fputs(buf, out);
+ }
+
+ fclose(in);
+
+ in = fopen(SETS, "r");
+ if (!in) {
+ fclose(out);
+ return 1;
+ }
+
+ fgets(buf, sizeof buf, in);
+
+ while (1) {
+ /* find start of record */
+ if (strncmp(buf, "Name:", 5) != 0) {
+ while(fgets(buf, sizeof buf, in)) {
+ if (strncmp(buf, "Name:", 5) == 0)
+ break;
+ }
+ }
+ if(strncmp(buf, "Name:", 5) != 0)
+ break;
+
+ buf[strlen(buf) - 1] = '\0';
+
+ s = buf+5;
+ /* skip whitespace */
+ while (isspace(*s))
+ s++;
+ /* copy name to buffer */
+ n = name;
+ while (*s) {
+ if (isspace(*s))
+ break;
+ *n++ = *s++;
+ }
+ *n = '\0';
+
+ /* get mibenum */
+ while(fgets(buf, sizeof buf, in)) {
+ if (strncmp(buf, "Name:", 5) == 0)
+ break;
+ if (strncmp(buf, "MIBenum:", 8) == 0)
+ break;
+ }
+ if (strncmp(buf, "MIBenum:", 8) != 0)
+ continue;
+
+ buf[strlen(buf) - 1] = '\0';
+
+ s = buf+8;
+ while (isspace(*s))
+ s++;
+ mibenum = atoi(s);
+
+ aliases = malloc(1);
+ if (!aliases)
+ break;
+ *aliases = '\0';
+
+ /* parse aliases */
+ while(fgets(buf, sizeof buf, in)) {
+ if (strncmp(buf, "Name:", 5) == 0)
+ break;
+ if (strncmp(buf, "Alias:", 6) != 0)
+ continue;
+
+ buf[strlen(buf) - 1] = '\0';
+
+ s = buf + 6;
+ while (isspace(*s))
+ s++;
+
+ if (strncmp(s, "None", 4) == 0)
+ /* ignore this */
+ continue;
+
+ if (strstr(s, "preferred MIME name") != 0) {
+ temp = realloc(aliases,
+ strlen(aliases) + 1 +
+ strlen(name) + 1);
+ if (!temp)
+ goto end;
+ aliases = temp;
+ sprintf(aliases, "%s%s%s", aliases,
+ aliases[0] == '\0' ? "" : " ", name);
+ n = name;
+ while (*s) {
+ if (isspace(*s))
+ break;
+ *n++ = *s++;
+ }
+ *n = '\0';
+ }
+ else {
+ n = s;
+ while (*n) {
+ if (isspace(*n))
+ break;
+ n++;
+ }
+ temp = realloc(aliases,
+ strlen(aliases) + 1 + (n - s) + 1);
+ if (!temp)
+ goto end;
+ aliases = temp;
+ n = aliases + strlen(aliases);
+ if (aliases[0] != '\0')
+ *n++ = ' ';
+ while (*s) {
+ if (isspace(*s))
+ break;
+ *n++ = *s++;
+ }
+ *n = '\0';
+ }
+ }
+
+ fprintf(out, "%s\t", name);
+
+ /* Rounded up to tab stop */
+ namelen = (strlen(name) + 8) & ~(8 - 1);
+ while (namelen < 3 * 8) {
+ fputc('\t', out);
+ namelen += 8;
+ }
+
+ fprintf(out, "%d", mibenum);
+
+ if (aliases[0] != '\0')
+ fprintf(out, "\t\t%s", aliases);
+ for (i = 0; i != EXTRAS_SIZE; i++) {
+ if (strcmp(name, extras[i].canon) == 0) {
+ fprintf(out, "%s%s",
+ aliases[0] == '\0' ? "\t\t" : " ",
+ extras[i].aliases);
+ break;
+ }
+ }
+ fprintf(out, "\n");
+
+ free(aliases);
+ }
+
+end:
+ fclose(in);
+
+ in = fopen(BOTTOM, "r");
+ if (!in) {
+ fclose(out);
+ return 1;
+ }
+
+ while (fgets(buf, sizeof buf, in))
+ fputs(buf, out);
+
+ fclose(in);
+ fclose(out);
+
+ return 0;
+}
+