summaryrefslogtreecommitdiff
path: root/include/regexp.h
diff options
context:
space:
mode:
authorEric Andersen <andersen@codepoet.org>2000-07-09 06:39:19 +0000
committerEric Andersen <andersen@codepoet.org>2000-07-09 06:39:19 +0000
commitf3a9360625bca4fb0b5fe9730d6e25431ac4704b (patch)
tree9de32b51a64464128da7d908a10378369f739a02 /include/regexp.h
parent283f7172a1d7f3e42a38747f7a9b83bfc04c5238 (diff)
Add in a bunch of junk. Busybox now compiles (except for mkfs.minix and
fsck.minix). Of course, it doesn't link yet due to missing functions, but hey... At least it is now easy to see what isn't working. :-) -Erik
Diffstat (limited to 'include/regexp.h')
-rw-r--r--include/regexp.h241
1 files changed, 222 insertions, 19 deletions
diff --git a/include/regexp.h b/include/regexp.h
index 73d6bf412..174e10b75 100644
--- a/include/regexp.h
+++ b/include/regexp.h
@@ -1,21 +1,224 @@
/*
- * Definitions etc. for regexp(3) routines.
+ * regexp.h -- old-style regexp compile and step (emulated with POSIX regex)
+ * Copyright (C) 1993 Rick Sladkey <jrs@world.std.com>
*
- * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
- * not the System V one.
- */
-#define NSUBEXP 10
-typedef struct regexp {
- char *startp[NSUBEXP];
- char *endp[NSUBEXP];
- char regstart; /* Internal use only. */
- char reganch; /* Internal use only. */
- char *regmust; /* Internal use only. */
- int regmlen; /* Internal use only. */
- char program[1]; /* Unwarranted chumminess with compiler. */
-} regexp;
-
-extern regexp *regcomp();
-extern int regexec();
-extern void regsub();
-extern void regerror();
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ */
+
+/*
+ * Think really hard before you intentionally include this file.
+ * You should really be using the POSIX regex interface instead.
+ * This emulation file is intended solely for compiling old code.
+ *
+ * A program that uses this file must define six macros: INIT,
+ * GETC, PEEKC, UNGETC, RETURN, and ERROR. This interface is
+ * so arcane that VMS hackers point at it in ridicule.
+ */
+
+#ifndef _REGEXP_H
+#define _REGEXP_H
+
+#include <sys/types.h> /* regex.h needs size_t */
+#include <regex.h> /* POSIX.2 regexp routines */
+#include <stdlib.h> /* for malloc, realloc and free */
+
+/*
+ * These three advertised external variables record state information
+ * for compile and step. They are so gross, I'm choking as I write this.
+ */
+char *loc1; /* the beginning of a match */
+char *loc2; /* the end of a match */
+int circf; /* current pattern begins with '^' */
+
+/*
+ * These are the other variables mentioned in the regexp.h manpage.
+ * Since we don't emulate them (whatever they do), we want errors if
+ * they are referenced. Therefore they are commented out here.
+ */
+#if 0
+char *locs;
+int sed;
+int nbra;
+#endif
+
+/*
+ * We need to stuff a regex_t into an arbitrary buffer so align it.
+ * GCC make this easy. For the others we have to guess.
+ */
+#ifdef __GNUC__
+#define __REGEX_T_ALIGN (__alignof__(regex_t))
+#else /* !__GNUC__ */
+#define __REGEX_T_ALIGN 8
+#endif /* !__GNUC__ */
+
+#define __regex_t_align(p) \
+ ((regex_t *) ((((unsigned long) p) + __REGEX_T_ALIGN - 1) \
+ / __REGEX_T_ALIGN * __REGEX_T_ALIGN))
+
+/*
+ * We just slurp the whole pattern into a string and then compile
+ * it `normally'. With this implementation we never use the PEEKC
+ * macro. Please feel free to die laughing when we translate
+ * error symbols into hard-coded numbers.
+ */
+char *
+compile(char *instring, char *expbuf, char *endbuf, int eof)
+{
+ int __c;
+ int __len;
+ char *__buf;
+ int __buflen;
+ int __error;
+ regex_t *__preg;
+ INIT;
+
+ __buflen = 128;
+ __buf = malloc(__buflen);
+ if (!__buf) {
+ ERROR(50);
+ return 0;
+ }
+ __len = 0;
+ circf = 0;
+ for (;;) {
+ __c = GETC();
+ if (__c == eof)
+ break;
+ if (__c == '\0' || __c == '\n') {
+ UNGETC(__c);
+ break;
+ }
+ if (__len + 2 > __buflen) {
+ __buflen *= 2;
+ __buf = realloc(__buf, __buflen);
+ if (!__buf) {
+ ERROR(50);
+ return 0;
+ }
+ }
+ if (__len == 0 && !circf && __c == '^')
+ circf = 1;
+ else
+ __buf[__len++] = __c;
+ }
+ if (__len == 0 && !circf) {
+ free(__buf);
+ ERROR(41);
+ return 0;
+ }
+ __buf[__len] = '\0';
+ if (endbuf <= expbuf + sizeof(regex_t)) {
+ free(__buf);
+ ERROR(50);
+ return 0;
+ }
+ __preg = __regex_t_align(expbuf);
+ __preg->buffer = (char *) (__preg + 1);
+ __preg->allocated = endbuf - (char *) __preg->buffer;
+ __error = regcomp(__preg, __buf, REG_NEWLINE);
+ free(__buf);
+ switch (__error) {
+ case 0:
+ break;
+ case REG_BADRPT:
+ __error = 36; /* poor fit */
+ break;
+ case REG_BADBR:
+ __error = 16;
+ break;
+ case REG_EBRACE:
+ __error = 44; /* poor fit */
+ break;
+ case REG_EBRACK:
+ __error = 49;
+ break;
+ case REG_ERANGE:
+ __error = 36; /* poor fit */
+ break;
+ case REG_ECTYPE:
+ __error = 36; /* poor fit */
+ break;
+ case REG_EPAREN:
+ __error = 42;
+ break;
+ case REG_ESUBREG:
+ __error = 36; /* poor fit */
+ break;
+ case REG_EEND:
+ __error = 36; /* poor fit */
+ break;
+ case REG_EESCAPE:
+ __error = 36;
+ break;
+ case REG_BADPAT:
+ __error = 36; /* poor fit */
+ break;
+ case REG_ESIZE:
+ __error = 50;
+ break;
+ case REG_ESPACE:
+ __error = 50;
+ break;
+ default:
+ __error = 36; /* as good as any */
+ break;
+ }
+ if (__error) {
+ ERROR(__error);
+ return 0;
+ }
+#ifdef _RX_H
+ RETURN((__preg->buffer + __preg->rx.allocated - __preg->rx.reserved));
+#else
+ RETURN((__preg->buffer + __preg->used));
+#endif
+}
+
+/*
+ * Note how we carefully emulate the gross `circf' hack. Otherwise,
+ * this just looks like an ordinary matching call that records the
+ * starting and ending match positions.
+ */
+int
+step(char *string, char *expbuf)
+{
+ int __result;
+ regmatch_t __pmatch[1];
+
+ __result = regexec(__regex_t_align(expbuf), string, 1, __pmatch, 0);
+ if (circf && __pmatch[0].rm_so != 0)
+ __result = REG_NOMATCH;
+ if (__result == 0) {
+ loc1 = string + __pmatch[0].rm_so;
+ loc2 = string + __pmatch[0].rm_eo;
+ }
+ return __result == 0;
+}
+
+/*
+ * For advance we are only supposed to match at the beginning of the
+ * string. You have to read the man page really carefully to find this
+ * one. We'll match them kludge-for-kludge.
+ */
+int
+advance(char *string, char *expbuf)
+{
+ int __old_circf;
+ int __result;
+
+ __old_circf = circf;
+ circf = 1;
+ __result = step(string, expbuf);
+ circf = __old_circf;
+ return __result;
+}
+
+#endif /* _REGEXP_H */