summaryrefslogtreecommitdiff
path: root/utils/poparser.c
diff options
context:
space:
mode:
authorWaldemar Brodkorb <wbx@uclibc-ng.org>2018-02-17 18:17:08 +0000
committerWaldemar Brodkorb <wbx@uclibc-ng.org>2018-02-17 18:17:08 +0000
commit748b5e3d2d70e47ab72ed44a371bd0db9a3a0c20 (patch)
treea6b903d0656b76e3d879adeb50ad6df63844c0d7 /utils/poparser.c
parent42ff2bba8eb24626919672f0bc129289cdca6fee (diff)
add gettext-tiny tools
For libintl stub users tiny gettext tools might be useful. Tested with x86_64 on Gentoo/uClibc-ng system.
Diffstat (limited to 'utils/poparser.c')
-rw-r--r--utils/poparser.c218
1 files changed, 218 insertions, 0 deletions
diff --git a/utils/poparser.c b/utils/poparser.c
new file mode 100644
index 000000000..236a1d995
--- /dev/null
+++ b/utils/poparser.c
@@ -0,0 +1,218 @@
+#include <ctype.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <iconv.h>
+#include "poparser.h"
+#include "StringEscape.h"
+
+#define streq(A, B) (!strcmp(A, B))
+#define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1)))
+
+static unsigned fuzzymark = 0;
+static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* end, size_t *stringstart) {
+ enum po_entry result_type;
+ char *x, *y;
+ size_t start = (size_t) lp;
+ while(isspace(*lp) && lp < end) lp++;
+ if(lp[0] == '#') {
+ char *s;
+ if((s = strstr(lp, ", fuzzy"))) {
+ if(fuzzymark != 0) fuzzymark++;
+ else fuzzymark=2;
+ }
+ inv:
+ *stringstart = 0;
+ return pe_invalid;
+ } else if((y = strstarts(lp, "msg"))) {
+ if((x = strstarts(y, "id")) && isspace(*x))
+ result_type = pe_msgid;
+ else if ((x = strstarts(y, "id_plural")) && isspace(*x))
+ result_type = pe_plural;
+ else if ((x = strstarts(y, "ctxt")) && isspace(*x))
+ result_type = pe_ctxt;
+ else if ((x = strstarts(y, "str")) && (isspace(*x) ||
+ (x[0] == '[' && (x[1]-'0') < info->nplurals && x[2] == ']' && (x += 3) && isspace(*x))))
+ result_type = pe_msgstr;
+ else
+ goto inv;
+ while(isspace(*x) && x < end) x++;
+ if(*x != '"') abort();
+ conv:
+ *stringstart = ((size_t) x - start) + 1;
+ } else if(lp[0] == '"') {
+ if(!(*info->charset)) {
+ if((x = strstr(lp, "charset="))) {
+ // charset=xxx\\n
+ int len = strlen(x+=8) - 4;
+ assert(len <= 11);
+ if(strncmp(x, "UTF-8", 5) && strncmp(x, "utf-8", 5)) {
+ memcpy(info->charset, x, len);
+ info->charset[len] = 0;
+ }
+ }
+ }
+ if((x = strstr(lp, "nplurals=")))
+ if(*(x+9) - '0')
+ info->nplurals = *(x+9) - '0';
+ result_type = pe_str;
+ x = lp;
+ goto conv;
+ } else {
+ goto inv;
+ }
+ return result_type;
+}
+
+/* expects a pointer to the first char after a opening " in a string,
+ * converts the string into convbuf, and returns the length of that string */
+static size_t get_length_and_convert(struct po_info *info, char* x, char* end, char* convbuf, size_t convbuflen) {
+ size_t result = 0;
+ char* e = x + strlen(x);
+ assert(e > x && e < end && *e == 0);
+ e--;
+ while(isspace(*e)) e--;
+ if(*e != '"') abort();
+ *e = 0;
+ char *s;
+ if(*info->charset) {
+ iconv_t ret = iconv_open("UTF-8", info->charset);
+ if(ret != (iconv_t)-1) {
+ size_t a=end-x, b=a*4;
+ char mid[b], *midp=mid;
+ iconv(iconv_open("UTF-8", info->charset), &x, &a, &midp, &b);
+ if((s = strstr(mid, "charset=")))
+ memcpy(s+8, "UTF-8\\n\0", 8);
+ result = unescape(mid, convbuf, convbuflen);
+ // iconv doesnt recognize the encoding
+ } else result = unescape(x, convbuf, convbuflen);
+ } else result = unescape(x, convbuf, convbuflen);
+ return result;
+}
+
+
+void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata) {
+ p->buf = workbuf;
+ p->bufsize = bufsize;
+ p->cb = cb;
+ p->prev_type = pe_invalid;
+ p->prev_rtype = pe_invalid;
+ p->curr_len = 0;
+ p->cbdata = cbdata;
+ *(p->info.charset) = 0;
+ // nplurals = 2 by default
+ p->info.nplurals = 2;
+ fuzzymark = 0;
+}
+
+enum lineactions {
+ la_incr,
+ la_proc,
+ la_abort,
+ la_nop,
+ la_max,
+};
+
+/* return 0 on success */
+int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) {
+ char *convbuf = p->buf;
+ size_t convbuflen = p->bufsize;
+ size_t strstart;
+
+ static const enum lineactions action_tbl[pe_max][pe_max] = {
+ // pe_str will never be set as curr_type
+ [pe_str] = {
+ [pe_str] = la_abort,
+ [pe_msgid] = la_abort,
+ [pe_ctxt] = la_abort,
+ [pe_plural] = la_abort,
+ [pe_msgstr] = la_abort,
+ [pe_invalid] = la_abort,
+ },
+ [pe_msgid] = {
+ [pe_str] = la_incr,
+ [pe_msgid] = la_abort,
+ [pe_ctxt] = la_abort,
+ [pe_plural] = la_proc,
+ [pe_msgstr] = la_proc,
+ [pe_invalid] = la_proc,
+ },
+ [pe_ctxt] = {
+ [pe_str] = la_incr,
+ [pe_msgid] = la_proc,
+ [pe_ctxt] = la_abort,
+ [pe_plural] = la_abort,
+ [pe_msgstr] = la_abort,
+ [pe_invalid] = la_proc,
+ },
+ [pe_plural] = {
+ [pe_str] = la_incr,
+ [pe_msgid] = la_abort,
+ [pe_ctxt] = la_abort,
+ [pe_plural] = la_abort,
+ [pe_msgstr] = la_proc,
+ [pe_invalid] = la_proc,
+ },
+ [pe_msgstr] = {
+ [pe_str] = la_incr,
+ [pe_msgid] = la_proc,
+ [pe_ctxt] = la_proc,
+ [pe_plural] = la_abort,
+ [pe_msgstr] = la_proc,
+ [pe_invalid] = la_proc,
+ },
+ [pe_invalid] = {
+ [pe_str] = la_nop,
+ [pe_msgid] = la_incr,
+ [pe_ctxt] = la_incr,
+ [pe_plural] = la_nop,
+ [pe_msgstr] = la_nop,
+ [pe_invalid] = la_nop,
+ },
+ };
+
+ enum po_entry type;
+
+ type = get_type_and_start(&p->info, line, line + buflen, &strstart);
+ if(p->prev_rtype != pe_invalid && action_tbl[p->prev_rtype][type] == la_abort)
+ abort();
+ if(type != pe_invalid && type != pe_str)
+ p->prev_rtype = type;
+ if(fuzzymark) {
+ if(type == pe_ctxt && fuzzymark == 1) fuzzymark--;
+ if(type == pe_msgid) fuzzymark--;
+ if(fuzzymark > 0) return 0;
+ }
+ switch(action_tbl[p->prev_type][type]) {
+ case la_incr:
+ assert(type == pe_msgid || type == pe_msgstr || type == pe_str || type == pe_plural || pe_ctxt);
+ p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len);
+ break;
+ case la_proc:
+ assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr || p->prev_type == pe_plural || p->prev_type == pe_ctxt);
+ p->info.text = convbuf;
+ p->info.textlen = p->curr_len;
+ p->info.type = p->prev_type;
+ p->cb(&p->info, p->cbdata);
+ if(type != pe_invalid)
+ p->curr_len = get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf, convbuflen);
+ else
+ p->curr_len = 0;
+ break;
+ case la_nop:
+ break;
+ case la_abort:
+ default:
+ abort();
+ // todo : return error code
+ }
+ if(type != pe_str) {
+ p->prev_type = type;
+ }
+ return 0;
+}
+
+int poparser_finish(struct po_parser *p) {
+ char empty[4] = "";
+ return poparser_feed_line(p, empty, sizeof(empty));
+}