#include #include #include #include #include #include "poparser.h" #include "StringEscape.h" #define streq(A, B) (!strcmp(A, B)) #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1))) static unsigned fuzzymark = 0; static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* end, size_t *stringstart) { enum po_entry result_type; char *x, *y; size_t start = (size_t) lp; while(isspace(*lp) && lp < end) lp++; if(lp[0] == '#') { char *s; if((s = strstr(lp, ", fuzzy"))) { if(fuzzymark != 0) fuzzymark++; else fuzzymark=2; } inv: *stringstart = 0; return pe_invalid; } else if((y = strstarts(lp, "msg"))) { if((x = strstarts(y, "id")) && isspace(*x)) result_type = pe_msgid; else if ((x = strstarts(y, "id_plural")) && isspace(*x)) result_type = pe_plural; else if ((x = strstarts(y, "ctxt")) && isspace(*x)) result_type = pe_ctxt; else if ((x = strstarts(y, "str")) && (isspace(*x) || (x[0] == '[' && (x[1]-'0') < info->nplurals && x[2] == ']' && (x += 3) && isspace(*x)))) result_type = pe_msgstr; else goto inv; while(isspace(*x) && x < end) x++; if(*x != '"') abort(); conv: *stringstart = ((size_t) x - start) + 1; } else if(lp[0] == '"') { if(!(*info->charset)) { if((x = strstr(lp, "charset="))) { // charset=xxx\\n int len = strlen(x+=8) - 4; assert(len <= 11); if(strncmp(x, "UTF-8", 5) && strncmp(x, "utf-8", 5)) { memcpy(info->charset, x, len); info->charset[len] = 0; } } } if((x = strstr(lp, "nplurals="))) if(*(x+9) - '0') info->nplurals = *(x+9) - '0'; result_type = pe_str; x = lp; goto conv; } else { goto inv; } return result_type; } /* expects a pointer to the first char after a opening " in a string, * converts the string into convbuf, and returns the length of that string */ static size_t get_length_and_convert(struct po_info *info, char* x, char* end, char* convbuf, size_t convbuflen) { size_t result = 0; char* e = x + strlen(x); assert(e > x && e < end && *e == 0); e--; while(isspace(*e)) e--; if(*e != '"') abort(); *e = 0; char *s; if(*info->charset) { iconv_t ret = iconv_open("UTF-8", info->charset); if(ret != (iconv_t)-1) { size_t a=end-x, b=a*4; char mid[b], *midp=mid; iconv(iconv_open("UTF-8", info->charset), &x, &a, &midp, &b); if((s = strstr(mid, "charset="))) memcpy(s+8, "UTF-8\\n\0", 8); result = unescape(mid, convbuf, convbuflen); // iconv doesnt recognize the encoding } else result = unescape(x, convbuf, convbuflen); } else result = unescape(x, convbuf, convbuflen); return result; } void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata) { p->buf = workbuf; p->bufsize = bufsize; p->cb = cb; p->prev_type = pe_invalid; p->prev_rtype = pe_invalid; p->curr_len = 0; p->cbdata = cbdata; *(p->info.charset) = 0; // nplurals = 2 by default p->info.nplurals = 2; fuzzymark = 0; } enum lineactions { la_incr, la_proc, la_abort, la_nop, la_max, }; /* return 0 on success */ int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { char *convbuf = p->buf; size_t convbuflen = p->bufsize; size_t strstart; static const enum lineactions action_tbl[pe_max][pe_max] = { // pe_str will never be set as curr_type [pe_str] = { [pe_str] = la_abort, [pe_msgid] = la_abort, [pe_ctxt] = la_abort, [pe_plural] = la_abort, [pe_msgstr] = la_abort, [pe_invalid] = la_abort, }, [pe_msgid] = { [pe_str] = la_incr, [pe_msgid] = la_abort, [pe_ctxt] = la_abort, [pe_plural] = la_proc, [pe_msgstr] = la_proc, [pe_invalid] = la_proc, }, [pe_ctxt] = { [pe_str] = la_incr, [pe_msgid] = la_proc, [pe_ctxt] = la_abort, [pe_plural] = la_abort, [pe_msgstr] = la_abort, [pe_invalid] = la_proc, }, [pe_plural] = { [pe_str] = la_incr, [pe_msgid] = la_abort, [pe_ctxt] = la_abort, [pe_plural] = la_abort, [pe_msgstr] = la_proc, [pe_invalid] = la_proc, }, [pe_msgstr] = { [pe_str] = la_incr, [pe_msgid] = la_proc, [pe_ctxt] = la_proc, [pe_plural] = la_abort, [pe_msgstr] = la_proc, [pe_invalid] = la_proc, }, [pe_invalid] = { [pe_str] = la_nop, [pe_msgid] = la_incr, [pe_ctxt] = la_incr, [pe_plural] = la_nop, [pe_msgstr] = la_nop, [pe_invalid] = la_nop, }, }; enum po_entry type; type = get_type_and_start(&p->info, line, line + buflen, &strstart); if(p->prev_rtype != pe_invalid && action_tbl[p->prev_rtype][type] == la_abort) abort(); if(type != pe_invalid && type != pe_str) p->prev_rtype = type; if(fuzzymark) { if(type == pe_ctxt && fuzzymark == 1) fuzzymark--; if(type == pe_msgid) fuzzymark--; if(fuzzymark > 0) return 0; } switch(action_tbl[p->prev_type][type]) { case la_incr: assert(type == pe_msgid || type == pe_msgstr || type == pe_str || type == pe_plural || pe_ctxt); p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len); break; case la_proc: assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr || p->prev_type == pe_plural || p->prev_type == pe_ctxt); p->info.text = convbuf; p->info.textlen = p->curr_len; p->info.type = p->prev_type; p->cb(&p->info, p->cbdata); if(type != pe_invalid) p->curr_len = get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf, convbuflen); else p->curr_len = 0; break; case la_nop: break; case la_abort: default: abort(); // todo : return error code } if(type != pe_str) { p->prev_type = type; } return 0; } int poparser_finish(struct po_parser *p) { char empty[4] = ""; return poparser_feed_line(p, empty, sizeof(empty)); }