summaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
Diffstat (limited to 'libc')
-rw-r--r--libc/misc/regex/Makefile2
-rw-r--r--libc/misc/regex/regex.c5725
-rw-r--r--libc/misc/regex/rx.c7273
3 files changed, 5726 insertions, 7274 deletions
diff --git a/libc/misc/regex/Makefile b/libc/misc/regex/Makefile
index c4c13f6cf..38b7e98bf 100644
--- a/libc/misc/regex/Makefile
+++ b/libc/misc/regex/Makefile
@@ -24,7 +24,7 @@ TOPDIR=../../
include $(TOPDIR)Rules.mak
LIBC=$(TOPDIR)libc.a
-CSRC=rx.c
+CSRC=regex.c
COBJS=$(patsubst %.c,%.o, $(CSRC))
OBJS=$(COBJS)
diff --git a/libc/misc/regex/regex.c b/libc/misc/regex/regex.c
new file mode 100644
index 000000000..64e754ee0
--- /dev/null
+++ b/libc/misc/regex/regex.c
@@ -0,0 +1,5725 @@
+/* Extended regular expression matching and search library,
+ version 0.12.
+ (Implements POSIX draft P1003.2/D11.2, except for some of the
+ internationalization features.)
+ Copyright (C) 1993-1999, 2000 Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* AIX requires this to be the first thing in the file. */
+#if defined _AIX && !defined REGEX_MALLOC
+#pragma alloca
+#endif
+
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+#define STDC_HEADERS
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#ifndef PARAMS
+# if defined __GNUC__ || (defined __STDC__ && __STDC__)
+# define PARAMS(args) args
+# else
+# define PARAMS(args) ()
+# endif /* GCC. */
+#endif /* Not PARAMS. */
+
+#if defined STDC_HEADERS && !defined emacs
+# include <stddef.h>
+#else
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+# include <sys/types.h>
+#endif
+
+#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
+
+/* For platform which support the ISO C amendement 1 functionality we
+ support user defined character classes. */
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
+# include <wchar.h>
+# include <wctype.h>
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+#define btowc __btowc
+#endif
+
+/* This is for other GNU distributions with internationalized messages. */
+#if HAVE_LIBINTL_H || defined _LIBC
+# include <libintl.h>
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+# define gettext_noop(String) String
+#endif
+
+/* The `emacs' switch turns on certain matching commands
+ that make sense only in Emacs. */
+#ifdef emacs
+
+# include "lisp.h"
+# include "buffer.h"
+# include "syntax.h"
+
+#else /* not emacs */
+
+/* If we are not linking with Emacs proper,
+ we can't use the relocating allocator
+ even if config.h says that we can. */
+# undef REL_ALLOC
+
+# if defined STDC_HEADERS || defined _LIBC
+# include <stdlib.h>
+# else
+char *malloc();
+char *realloc();
+# endif
+
+/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
+ If nothing else has been done, use the method below. */
+# ifdef INHIBIT_STRING_HEADER
+# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
+# if !defined bzero && !defined bcopy
+# undef INHIBIT_STRING_HEADER
+# endif
+# endif
+# endif
+
+/* This is the normal way of making sure we have a bcopy and a bzero.
+ This is used in most programs--a few other programs avoid this
+ by defining INHIBIT_STRING_HEADER. */
+# ifndef INHIBIT_STRING_HEADER
+# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
+# include <string.h>
+# ifndef bzero
+# ifndef _LIBC
+# define bzero(s, n) (memset (s, '\0', n), (s))
+# else
+# define bzero(s, n) __bzero (s, n)
+# endif
+# endif
+# else
+# include <strings.h>
+# ifndef memcmp
+# define memcmp(s1, s2, n) bcmp (s1, s2, n)
+# endif
+# ifndef memcpy
+# define memcpy(d, s, n) (bcopy (s, d, n), (d))
+# endif
+# endif
+# endif
+
+/* Define the syntax stuff for \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+# ifndef Sword
+# define Sword 1
+# endif
+
+# ifdef SWITCH_ENUM_BUG
+# define SWITCH_ENUM_CAST(x) ((int)(x))
+# else
+# define SWITCH_ENUM_CAST(x) (x)
+# endif
+
+#endif /* not emacs */
+
+/* Get the interface, including the syntax bits. */
+#include <regex.h>
+
+/* isalpha etc. are used for the character classes. */
+#include <ctype.h>
+
+/* Jim Meyering writes:
+
+ "... Some ctype macros are valid only for character codes that
+ isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
+ using /bin/cc or gcc but without giving an ansi option). So, all
+ ctype uses should be through macros like ISPRINT... If
+ STDC_HEADERS is defined, then autoconf has verified that the ctype
+ macros don't need to be guarded with references to isascii. ...
+ Defining isascii to 1 should let any compiler worth its salt
+ eliminate the && through constant folding."
+ Solaris defines some of these symbols so we must undefine them first. */
+
+#undef ISASCII
+#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
+# define ISASCII(c) 1
+#else
+# define ISASCII(c) isascii(c)
+#endif
+
+#ifdef isblank
+# define ISBLANK(c) (ISASCII (c) && isblank (c))
+#else
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+#else
+# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+#endif
+
+#undef ISPRINT
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+#ifdef _tolower
+# define TOLOWER(c) _tolower(c)
+#else
+# define TOLOWER(c) tolower(c)
+#endif
+
+#ifndef NULL
+# define NULL (void *)0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+#ifndef emacs
+/* How many characters in the character set. */
+# define CHAR_SET_SIZE 256
+
+# ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+# else /* not SYNTAX_TABLE */
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void init_syntax_once()
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+ bzero(re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 0; c < CHAR_SET_SIZE; ++c)
+ if (ISALNUM(c))
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+
+# endif /* not SYNTAX_TABLE */
+
+# define SYNTAX(c) re_syntax_table[((c) & 0xFF)]
+
+#endif /* emacs */
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ use `alloca' instead of `malloc'. This is because using malloc in
+ re_search* or re_match* could cause memory leaks when C-g is used in
+ Emacs; also, malloc is slower and causes storage fragmentation. On
+ the other hand, malloc is more portable, and easier to debug.
+
+ Because we sometimes use alloca, some routines have to be macros,
+ not functions -- `alloca'-allocated space disappears at the end of the
+ function it is called in. */
+
+#ifdef REGEX_MALLOC
+
+# define REGEX_ALLOCATE malloc
+# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE free
+
+#else /* not REGEX_MALLOC */
+
+/* Emacs already defines alloca, sometimes. */
+# ifndef alloca
+
+/* Make alloca work the best possible way. */
+# ifdef __GNUC__
+# define alloca __builtin_alloca
+# else /* not __GNUC__ */
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# endif /* HAVE_ALLOCA_H */
+# endif /* not __GNUC__ */
+
+# endif /* not alloca */
+
+# define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable. */
+# define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ memcpy (destination, source, osize))
+
+/* No need to do anything to free, after alloca. */
+# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
+
+#endif /* not REGEX_MALLOC */
+
+/* Define how to allocate the failure stack. */
+
+#if defined REL_ALLOC && defined REGEX_MALLOC
+
+# define REGEX_ALLOCATE_STACK(size) \
+ r_alloc (&failure_stack_ptr, (size))
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ r_re_alloc (&failure_stack_ptr, (nsize))
+# define REGEX_FREE_STACK(ptr) \
+ r_alloc_free (&failure_stack_ptr)
+
+#else /* not using relocating allocator */
+
+# ifdef REGEX_MALLOC
+
+# define REGEX_ALLOCATE_STACK malloc
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE_STACK free
+
+# else /* not REGEX_MALLOC */
+
+# define REGEX_ALLOCATE_STACK alloca
+
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ REGEX_REALLOCATE (source, osize, nsize)
+/* No need to explicitly free anything. */
+# define REGEX_FREE_STACK(arg)
+
+# endif /* not REGEX_MALLOC */
+#endif /* not using relocating allocator */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+#define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail. */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define RETALLOC_IF(addr, n, t) \
+ if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits. */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+
+#define false 0
+#define true 1
+
+static int re_match_2_internal PARAMS((struct re_pattern_buffer * bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int pos,
+ struct re_registers * regs,
+
+ int stop));
+
+/* These are the command codes that appear in compiled regular
+ expressions. Some opcodes are followed by argument bytes. A
+ command code can specify any interpretation whatsoever for its
+ arguments. Zero bytes may appear in the compiled regular expression. */
+
+typedef enum {
+ no_op = 0,
+
+ /* Succeed right away--no more backtracking. */
+ succeed,
+
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn,
+
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
+
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ dummy_failure_jump,
+
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ succeed_n,
+
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ jump_n,
+
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ set_number_at,
+
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+#ifdef emacs
+ , before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+
+ /* Matches any character whose syntax is specified. Followed by
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
+
+ /* Matches any character whose syntax is not that specified. */
+ notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern. */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+
+#define STORE_NUMBER(destination, number) \
+ do { \
+ (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; \
+ } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+ the byte after where the number is stored. Therefore, DESTINATION
+ must be an lvalue. */
+
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ do { \
+ STORE_NUMBER (destination, number); \
+ (destination) += 2; \
+ } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+ at SOURCE. */
+
+#define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
+ } while (0)
+
+#ifdef DEBUG
+static void extract_number _RE_ARGS((int *dest, unsigned char *source));
+static void extract_number(dest, source)
+int *dest;
+unsigned char *source;
+{
+ int temp = SIGN_EXTEND_CHAR(*(source + 1));
+
+ *dest = *source & 0377;
+ *dest += temp << 8;
+}
+
+# ifndef EXTRACT_MACROS /* To debug the macros. */
+# undef EXTRACT_NUMBER
+# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+# endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+ SOURCE must be an lvalue. */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ do { \
+ EXTRACT_NUMBER (destination, source); \
+ (source) += 2; \
+ } while (0)
+
+#ifdef DEBUG
+static void extract_number_and_incr _RE_ARGS((int *destination,
+ unsigned char **source));
+static void extract_number_and_incr(destination, source)
+int *destination;
+unsigned char **source;
+{
+ extract_number(destination, *source);
+ *source += 2;
+}
+
+# ifndef EXTRACT_MACROS
+# undef EXTRACT_NUMBER_AND_INCR
+# define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ extract_number_and_incr (&dest, &src)
+# endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging. */
+# include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+# include <assert.h>
+
+static int debug;
+
+# define DEBUG_STATEMENT(e) e
+# define DEBUG_PRINT1(x) if (debug) printf (x)
+# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) print_partial_compiled_pattern (s, e)
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+/* Print the fastmap in human-readable form. */
+
+void print_fastmap(fastmap)
+char *fastmap;
+{
+ unsigned was_a_range = 0;
+ unsigned i = 0;
+
+ while (i < (1 << BYTEWIDTH)) {
+ if (fastmap[i++]) {
+ was_a_range = 0;
+ putchar(i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i]) {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range) {
+ printf("-");
+ putchar(i - 1);
+ }
+ }
+ }
+ putchar('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+ the START pointer into it and ending just before the pointer END. */
+
+void print_partial_compiled_pattern(start, end)
+unsigned char *start;
+unsigned char *end;
+{
+ int mcnt, mcnt2;
+ unsigned char *p1;
+ unsigned char *p = start;
+ unsigned char *pend = end;
+
+ if (start == NULL) {
+ printf("(null)\n");
+ return;
+ }
+
+ /* Loop over pattern commands. */
+ while (p < pend) {
+ printf("%d:\t", p - start);
+
+ switch ((re_opcode_t) * p++) {
+ case no_op:
+ printf("/no_op");
+ break;
+
+ case exactn:
+ mcnt = *p++;
+ printf("/exactn/%d", mcnt);
+ do {
+ putchar('/');
+ putchar(*p++);
+ }
+ while (--mcnt);
+ break;
+
+ case start_memory:
+ mcnt = *p++;
+ printf("/start_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case stop_memory:
+ mcnt = *p++;
+ printf("/stop_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case duplicate:
+ printf("/duplicate/%d", *p++);
+ break;
+
+ case anychar:
+ printf("/anychar");
+ break;
+
+ case charset:
+ case charset_not:
+ {
+ register int c, last = -100;
+ register int in_range = 0;
+
+ printf("/charset [%s",
+ (re_opcode_t) * (p - 1) == charset_not ? "^" : "");
+
+ assert(p + *p < pend);
+
+ for (c = 0; c < 256; c++)
+ if (c / 8 < *p && (p[1 + (c / 8)] & (1 << (c % 8)))) {
+ /* Are we starting a range? */
+ if (last + 1 == c && !in_range) {
+ putchar('-');
+ in_range = 1;
+ }
+ /* Have we broken a range? */
+ else if (last + 1 != c && in_range) {
+ putchar(last);
+ in_range = 0;
+ }
+
+ if (!in_range)
+ putchar(c);
+
+ last = c;
+ }
+
+ if (in_range)
+ putchar(last);
+
+ putchar(']');
+
+ p += 1 + *p;
+ }
+ break;
+
+ case begline:
+ printf("/begline");
+ break;
+
+ case endline:
+ printf("/endline");
+ break;
+
+ case on_failure_jump:
+ extract_number_and_incr(&mcnt, &p);
+ printf("/on_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case on_failure_keep_string_jump:
+ extract_number_and_incr(&mcnt, &p);
+ printf("/on_failure_keep_string_jump to %d", p + mcnt - start);
+ break;
+
+ case dummy_failure_jump:
+ extract_number_and_incr(&mcnt, &p);
+ printf("/dummy_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case push_dummy_failure:
+ printf("/push_dummy_failure");
+ break;
+
+ case maybe_pop_jump:
+ extract_number_and_incr(&mcnt, &p);
+ printf("/maybe_pop_jump to %d", p + mcnt - start);
+ break;
+
+ case pop_failure_jump:
+ extract_number_and_incr(&mcnt, &p);
+ printf("/pop_failure_jump to %d", p + mcnt - start);
+ break;
+
+ case jump_past_alt:
+ extract_number_and_incr(&mcnt, &p);
+ printf("/jump_past_alt to %d", p + mcnt - start);
+ break;
+
+ case jump:
+ extract_number_and_incr(&mcnt, &p);
+ printf("/jump to %d", p + mcnt - start);
+ break;
+
+ case succeed_n:
+ extract_number_and_incr(&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr(&mcnt2, &p);
+ printf("/succeed_n to %d, %d times", p1 - start, mcnt2);
+ break;
+
+ case jump_n:
+ extract_number_and_incr(&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr(&mcnt2, &p);
+ printf("/jump_n to %d, %d times", p1 - start, mcnt2);
+ break;
+
+ case set_number_at:
+ extract_number_and_incr(&mcnt, &p);
+ p1 = p + mcnt;
+ extract_number_and_incr(&mcnt2, &p);
+ printf("/set_number_at location %d to %d", p1 - start, mcnt2);
+ break;
+
+ case wordbound:
+ printf("/wordbound");
+ break;
+
+ case notwordbound:
+ printf("/notwordbound");
+ break;
+
+ case wordbeg:
+ printf("/wordbeg");
+ break;
+
+ case wordend:
+ printf("/wordend");
+
+# ifdef emacs
+ case before_dot:
+ printf("/before_dot");
+ break;
+
+ case at_dot:
+ printf("/at_dot");
+ break;
+
+ case after_dot:
+ printf("/after_dot");
+ break;
+
+ case syntaxspec:
+ printf("/syntaxspec");
+ mcnt = *p++;
+ printf("/%d", mcnt);
+ break;
+
+ case notsyntaxspec:
+ printf("/notsyntaxspec");
+ mcnt = *p++;
+ printf("/%d", mcnt);
+ break;
+# endif /* emacs */
+
+ case wordchar:
+ printf("/wordchar");
+ break;
+
+ case notwordchar:
+ printf("/notwordchar");
+ break;
+
+ case begbuf:
+ printf("/begbuf");
+ break;
+
+ case endbuf:
+ printf("/endbuf");
+ break;
+
+ default:
+ printf("?%d", *(p - 1));
+ }
+
+ putchar('\n');
+ }
+
+ printf("%d:\tend of pattern.\n", p - start);
+}
+
+
+void print_compiled_pattern(bufp)
+struct re_pattern_buffer *bufp;
+{
+ unsigned char *buffer = bufp->buffer;
+
+ print_partial_compiled_pattern(buffer, buffer + bufp->used);
+ printf("%ld bytes used/%ld bytes allocated.\n",
+ bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap) {
+ printf("fastmap: ");
+ print_fastmap(bufp->fastmap);
+ }
+
+ printf("re_nsub: %d\t", bufp->re_nsub);
+ printf("regs_alloc: %d\t", bufp->regs_allocated);
+ printf("can_be_null: %d\t", bufp->can_be_null);
+ printf("newline_anchor: %d\n", bufp->newline_anchor);
+ printf("no_sub: %d\t", bufp->no_sub);
+ printf("not_bol: %d\t", bufp->not_bol);
+ printf("not_eol: %d\t", bufp->not_eol);
+ printf("syntax: %lx\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
+}
+
+
+void print_double_string(where, string1, size1, string2, size2)
+const char *where;
+const char *string1;
+const char *string2;
+int size1;
+int size2;
+{
+ int this_char;
+
+ if (where == NULL)
+ printf("(null)");
+ else {
+ if (FIRST_STRING_P(where)) {
+ for (this_char = where - string1; this_char < size1;
+ this_char++)
+ putchar(string1[this_char]);
+
+ where = string2;
+ }
+
+ for (this_char = where - string2; this_char < size2; this_char++)
+ putchar(string2[this_char]);
+ }
+}
+
+void printchar(c)
+int c;
+{
+ putc(c, stderr);
+}
+
+#else /* not DEBUG */
+
+# undef assert
+# define assert(e)
+
+# define DEBUG_STATEMENT(e)
+# define DEBUG_PRINT1(x)
+# define DEBUG_PRINT2(x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4)
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t re_set_syntax(syntax)
+reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+#ifdef DEBUG
+ if (syntax & RE_DEBUG)
+ debug = 1;
+ else if (debug) /* was on but now is not */
+ debug = 0;
+#endif /* DEBUG */
+ return ret;
+}
+
+#ifdef _LIBC
+weak_alias(__re_set_syntax, re_set_syntax)
+#endif
+ /* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+static const char re_error_msgid[] = {
+#define REG_NOERROR_IDX 0
+ gettext_noop("Success") /* REG_NOERROR */
+ "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+ gettext_noop("No match") /* REG_NOMATCH */
+ "\0"
+#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
+ gettext_noop("Invalid regular expression") /* REG_BADPAT */
+ "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+ gettext_noop("Invalid collation character") /* REG_ECOLLATE */
+ "\0"
+#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+ gettext_noop("Invalid character class name") /* REG_ECTYPE */
+ "\0"
+#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
+ gettext_noop("Trailing backslash") /* REG_EESCAPE */
+ "\0"
+#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
+ gettext_noop("Invalid back reference") /* REG_ESUBREG */
+ "\0"
+#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
+ gettext_noop("Unmatched [ or [^") /* REG_EBRACK */
+ "\0"
+#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+ gettext_noop("Unmatched ( or \\(") /* REG_EPAREN */
+ "\0"
+#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+ gettext_noop("Unmatched \\{") /* REG_EBRACE */
+ "\0"
+#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
+ gettext_noop("Invalid content of \\{\\}") /* REG_BADBR */
+ "\0"
+#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+ gettext_noop("Invalid range end") /* REG_ERANGE */
+ "\0"
+#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
+ gettext_noop("Memory exhausted") /* REG_ESPACE */
+ "\0"
+#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
+ gettext_noop("Invalid preceding regular expression") /* REG_BADRPT */
+ "\0"
+#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+ gettext_noop("Premature end of regular expression") /* REG_EEND */
+ "\0"
+#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
+ gettext_noop("Regular expression too big") /* REG_ESIZE */
+ "\0"
+#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
+ gettext_noop("Unmatched ) or \\)") /* REG_ERPAREN */
+};
+
+static const size_t re_error_msgid_idx[] = {
+ REG_NOERROR_IDX,
+ REG_NOMATCH_IDX,
+ REG_BADPAT_IDX,
+ REG_ECOLLATE_IDX,
+ REG_ECTYPE_IDX,
+ REG_EESCAPE_IDX,
+ REG_ESUBREG_IDX,
+ REG_EBRACK_IDX,
+ REG_EPAREN_IDX,
+ REG_EBRACE_IDX,
+ REG_BADBR_IDX,
+ REG_ERANGE_IDX,
+ REG_ESPACE_IDX,
+ REG_BADRPT_IDX,
+ REG_EEND_IDX,
+ REG_ESIZE_IDX,
+ REG_ERPAREN_IDX
+};
+
+/* Avoiding alloca during matching, to placate r_alloc. */
+
+/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
+ searching and matching functions should not call alloca. On some
+ systems, alloca is implemented in terms of malloc, and if we're
+ using the relocating allocator routines, then malloc could cause a
+ relocation, which might (if the strings being searched are in the
+ ralloc heap) shift the data out from underneath the regexp
+ routines.
+
+ Here's another reason to avoid allocation: Emacs
+ processes input from X in a signal handler; processing X input may
+ call malloc; if input arrives while a matching routine is calling
+ malloc, then we're scrod. But Emacs can't just block input while
+ calling matching routines; then we don't notice interrupts when
+ they come in. So, Emacs blocks input around all regexp calls
+ except the matching calls, which it leaves unprotected, in the
+ faith that they will not malloc. */
+
+/* Normally, this is fine. */
+#define MATCH_MAY_ALLOCATE
+
+/* When using GNU C, we are not REALLY using the C alloca, no matter
+ what config.h may say. So don't take precautions for it. */
+#ifdef __GNUC__
+# undef C_ALLOCA
+#endif
+
+/* The match routines may not allocate if (1) they would do it with malloc
+ and (2) it's not safe for them to use malloc.
+ Note that if REL_ALLOC is defined, matching would not use malloc for the
+ failure stack, but we would still use it for the register vectors;
+ so REL_ALLOC should not affect this. */
+#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
+# undef MATCH_MAY_ALLOCATE
+#endif
+
+
+/* Failure stack declarations and macros; both re_compile_fastmap and
+ re_match_2 use a failure stack. These have to be macros because of
+ REGEX_ALLOCATE_STACK. */
+
+
+/* Number of failure points for which to initially allocate space
+ when matching. If this number is exceeded, we allocate more
+ space, so it is not a hard limit. */
+#ifndef INIT_FAILURE_ALLOC
+# define INIT_FAILURE_ALLOC 5
+#endif
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
+ This is a variable only so users of regex can assign to it; we never
+ change it ourselves. */
+
+#ifdef INT_IS_16BIT
+
+# if defined MATCH_MAY_ALLOCATE
+/* 4400 was enough to cause a crash on Alpha OSF/1,
+ whose default stack limit is 2mb. */
+long int re_max_failures = 4000;
+# else
+long int re_max_failures = 2000;
+# endif
+
+union fail_stack_elt {
+ unsigned char *pointer;
+ long int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct {
+ fail_stack_elt_t *stack;
+ unsigned long int size;
+ unsigned long int avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#else /* not INT_IS_16BIT */
+
+# if defined MATCH_MAY_ALLOCATE
+/* 4400 was enough to cause a crash on Alpha OSF/1,
+ whose default stack limit is 2mb. */
+int re_max_failures = 20000;
+# else
+int re_max_failures = 2000;
+# endif
+
+union fail_stack_elt {
+ unsigned char *pointer;
+ int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct {
+ fail_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#endif /* INT_IS_16BIT */
+
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+
+
+/* Define macros to initialize and free the failure stack.
+ Do `return -2' if the alloc fails. */
+
+#ifdef MATCH_MAY_ALLOCATE
+# define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (fail_stack_elt_t *) \
+ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
+
+# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
+#else
+# define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.avail = 0; \
+ } while (0)
+
+# define RESET_FAIL_STACK()
+#endif
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+ Return 1 if succeeds, and 0 if either ran out of memory
+ allocating space for it or it was already too large.
+
+ REGEX_REALLOCATE_STACK requires `destination' be declared. */
+
+#define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
+ ? 0 \
+ : ((fail_stack).stack = (fail_stack_elt_t *) \
+ REGEX_REALLOCATE_STACK ((fail_stack).stack, \
+ (fail_stack).size * sizeof (fail_stack_elt_t), \
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
+ \
+ (fail_stack).stack == NULL \
+ ? 0 \
+ : ((fail_stack).size <<= 1, \
+ 1)))
+
+
+/* Push pointer POINTER on FAIL_STACK.
+ Return 1 if was able to do so and 0 if ran out of memory allocating
+ space to do so. */
+#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
+ ? 0 \
+ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
+ 1))
+
+/* Push a pointer value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_POINTER(item) \
+ fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
+
+/* This pushes an integer-valued item onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_INT(item) \
+ fail_stack.stack[fail_stack.avail++].integer = (item)
+
+/* Push a fail_stack_elt_t value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_ELT(item) \
+ fail_stack.stack[fail_stack.avail++] = (item)
+
+/* These three POP... operations complement the three PUSH... operations.
+ All assume that `fail_stack' is nonempty. */
+#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
+#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
+#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging. */
+#ifdef DEBUG
+# define DEBUG_PUSH PUSH_FAILURE_INT
+# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
+#else
+# define DEBUG_PUSH(item)
+# define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+ if we ever fail back to it.
+
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
+ be declared.
+
+ Does `return FAILURE_CODE' if runs out of memory. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
+ do { \
+ char *destination; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ /* Can't be int, since there is not a shred of a guarantee that int \
+ is wide enough to hold a value of something to which pointer can \
+ be assigned */ \
+ active_reg_t this_reg; \
+ \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+ \
+ DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
+ \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
+ (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+ } \
+ \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
+ \
+ if (1) \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
+ \
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
+ PUSH_FAILURE_POINTER (regstart[this_reg]); \
+ \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
+ PUSH_FAILURE_POINTER (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: %p\n ", \
+ reg_info[this_reg].word.pointer); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ELT (reg_info[this_reg].word); \
+ } \
+ \
+ DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
+ PUSH_FAILURE_INT (lowest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
+ PUSH_FAILURE_INT (highest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_POINTER (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_POINTER (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+ } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+ for each register. */
+#define NUM_REG_ITEMS 3
+
+/* Individual items aside from the registers. */
+#ifdef DEBUG
+# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+#else
+# define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack. */
+/* We used to use (num_regs - 1), which is the number of registers
+ this regexp will save; but that was changed to 5
+ to avoid stack overflow for a regexp with lots of parens. */
+#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items. */
+#define NUM_FAILURE_ITEMS \
+ (((0 \
+ ? 0 : highest_active_reg - lowest_active_reg + 1) \
+ * NUM_REG_ITEMS) \
+ + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it. */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (unsigned failure_id;) \
+ active_reg_t this_reg; \
+ const unsigned char *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_POINTER (); \
+ if (string_temp != NULL) \
+ str = (const char *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string %p: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (unsigned char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
+ \
+ low_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
+ \
+ if (1) \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ELT (); \
+ DEBUG_PRINT2 (" info: %p\n", \
+ reg_info[this_reg].word.pointer); \
+ \
+ regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
+ } \
+ else \
+ { \
+ for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
+ { \
+ reg_info[this_reg].word.integer = 0; \
+ regend[this_reg] = 0; \
+ regstart[this_reg] = 0; \
+ } \
+ highest_active_reg = high_reg; \
+ } \
+ \
+ set_regs_matched_done = 0; \
+ DEBUG_STATEMENT (nfailure_points_popped++); \
+} /* POP_FAILURE_POINT */
+
+
+
+/* Structure for per-register (a.k.a. per-group) information.
+ Other register information, such as the
+ starting and ending positions (which are addresses), and the list of
+ inner groups (which is a bits list) are maintained in separate
+ variables.
+
+ We are making a (strictly speaking) nonportable assumption here: that
+ the compiler will pack our bit fields into something that fits into
+ the type of `word', i.e., is something that fits into one item on the
+ failure stack. */
+
+
+/* Declarations and macros for re_match_2. */
+
+typedef union {
+ fail_stack_elt_t word;
+ struct {
+ /* This field is one if this group can match the empty string,
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+#define MATCH_NULL_UNSET_VALUE 3
+ unsigned match_null_string_p:2;
+ unsigned is_active:1;
+ unsigned matched_something:1;
+ unsigned ever_matched_something:1;
+ } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R) ((R).bits.is_active)
+#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+ for the subexpressions which we are currently inside. Also records
+ that those subexprs have matched. */
+#define SET_REGS_MATCHED() \
+ do \
+ { \
+ if (!set_regs_matched_done) \
+ { \
+ active_reg_t r; \
+ set_regs_matched_done = 1; \
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \
+ { \
+ MATCHED_SOMETHING (reg_info[r]) \
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \
+ = 1; \
+ } \
+ } \
+ } \
+ while (0)
+
+/* Registers are set to a sentinel when they haven't yet matched. */
+static char reg_unset_dummy;
+
+#define REG_UNSET_VALUE (&reg_unset_dummy)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+/* Subroutine declarations and macros for regex_compile. */
+
+static reg_errcode_t regex_compile
+_RE_ARGS(
+ (const char *pattern, size_t size, reg_syntax_t syntax,
+ struct re_pattern_buffer * bufp));
+static void store_op1
+
+_RE_ARGS((re_opcode_t op, unsigned char *loc, int arg));
+static void store_op2
+_RE_ARGS((re_opcode_t op, unsigned char *loc, int arg1, int arg2));
+static void insert_op1
+_RE_ARGS(
+
+ (re_opcode_t op, unsigned char *loc, int arg,
+ unsigned char *end));
+static void insert_op2
+_RE_ARGS(
+ (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
+
+ unsigned char *end));
+static boolean at_begline_loc_p
+_RE_ARGS((const char *pattern, const char *p, reg_syntax_t syntax));
+static boolean at_endline_loc_p
+_RE_ARGS((const char *p, const char *pend, reg_syntax_t syntax));
+static reg_errcode_t compile_range
+_RE_ARGS(
+ (const char **p_ptr, const char *pend, char *translate,
+ reg_syntax_t syntax, unsigned char *b));
+
+/* Fetch the next character in the uncompiled pattern---translating it
+ if necessary. Also cast from a signed character in the constant
+ string passed to us by the user to an unsigned char that we can use
+ as an array index (in, e.g., `translate'). */
+#ifndef PATFETCH
+# define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ if (translate) c = (unsigned char) translate[c]; \
+ } while (0)
+#endif
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D. We
+ cast the subscript to translate because some data is declared as
+ `char *', to avoid warnings when a string constant is passed. But
+ when we use a character as a subscript we must make it unsigned. */
+#ifndef TRANSLATE
+# define TRANSLATE(d) \
+ (translate ? (char) translate[(unsigned char) (d)] : (d))
+#endif
+
+
+/* Macros for outputting the compiled pattern into `buffer'. */
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE 32
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#define GET_BUFFER_SPACE(n) \
+ while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
+ EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it. */
+#define BUF_PUSH(c) \
+ do { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (unsigned char) (c); \
+ } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+#define BUF_PUSH_2(c1, c2) \
+ do { \
+ GET_BUFFER_SPACE (2); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes. */
+#define BUF_PUSH_3(c1, c2, c3) \
+ do { \
+ GET_BUFFER_SPACE (3); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ *b++ = (unsigned char) (c3); \
+ } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+#define STORE_JUMP(op, loc, to) \
+ store_op1 (op, loc, (int) ((to) - (loc) - 3))
+
+/* Likewise, for a two-argument jump. */
+#define STORE_JUMP2(op, loc, to, arg) \
+ store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP(op, loc, to) \
+ insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP2(op, loc, to, arg) \
+ insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
+/* Any other compiler which, like MSC, has allocation limit below 2^16
+ bytes will have to use approach similar to what was done below for
+ MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
+ reallocating to 0 bytes. Such thing is not going to work too well.
+ You have been warned!! */
+#if defined _MSC_VER && !defined WIN32
+/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
+ The REALLOC define eliminates a flurry of conversion warnings,
+ but is not required. */
+# define MAX_BUF_SIZE 65500L
+# define REALLOC(p,s) realloc ((p), (size_t) (s))
+#else
+# define MAX_BUF_SIZE (1L << 16)
+# define REALLOC(p,s) realloc ((p), (s))
+#endif
+
+/* Extend the buffer by twice its current size via realloc and
+ reset the pointers that pointed into the old block to point to the
+ correct places in the new one. If extending the buffer results in it
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+#define EXTEND_BUFFER() \
+ do { \
+ unsigned char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
+ if (bufp->buffer == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != bufp->buffer) \
+ { \
+ b = (b - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
+ if (fixup_alt_jump) \
+ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+ if (laststart) \
+ laststart = (laststart - old_buffer) + bufp->buffer; \
+ if (pending_exact) \
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+ } \
+ } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+ {start,stop}_memory, the maximum number of groups we can report
+ things about is what fits in that byte. */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack. */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+/* int may be not enough when sizeof(int) == 2. */
+typedef long pattern_offset_t;
+
+typedef struct {
+ pattern_offset_t begalt_offset;
+ pattern_offset_t fixup_alt_jump;
+ pattern_offset_t inner_group_offset;
+ pattern_offset_t laststart_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct {
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while ('0' <= c && c <= '9') \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+/* The GNU C library provides support for user-defined character classes
+ and the functions from ISO C amendement 1. */
+# ifdef CHARCLASS_NAME_MAX
+# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+# else
+/* This shouldn't happen but some implementation might still have this
+ problem. Use a reasonable default value. */
+# define CHAR_CLASS_MAX_LENGTH 256
+# endif
+
+# ifdef _LIBC
+# define IS_CHAR_CLASS(string) __wctype (string)
+# else
+# define IS_CHAR_CLASS(string) wctype (string)
+# endif
+#else
+# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+# define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+#endif
+
+#ifndef MATCH_MAY_ALLOCATE
+
+/* If we cannot allocate large objects within re_match_2_internal,
+ we make the fail stack and register vectors global.
+ The fail stack, we grow to the maximum size when a regexp
+ is compiled.
+ The register vectors, we adjust in size each time we
+ compile a regexp, according to the number of registers it needs. */
+
+static fail_stack_type fail_stack;
+
+/* Size with which the following vectors are currently allocated.
+ That is so we can make them bigger as needed,
+ but never make them smaller. */
+static int regs_allocated_size;
+
+static const char **regstart, **regend;
+static const char **old_regstart, **old_regend;
+static const char **best_regstart, **best_regend;
+static register_info_type *reg_info;
+static const char **reg_dummy;
+static register_info_type *reg_info_dummy;
+
+/* Make the register vectors big enough for NUM_REGS registers,
+ but don't make them smaller. */
+
+static regex_grow_registers(num_regs)
+int num_regs;
+{
+ if (num_regs > regs_allocated_size) {
+ RETALLOC_IF(regstart, num_regs, const char *);
+ RETALLOC_IF(regend, num_regs, const char *);
+ RETALLOC_IF(old_regstart, num_regs, const char *);
+ RETALLOC_IF(old_regend, num_regs, const char *);
+ RETALLOC_IF(best_regstart, num_regs, const char *);
+ RETALLOC_IF(best_regend, num_regs, const char *);
+
+ RETALLOC_IF(reg_info, num_regs, register_info_type);
+ RETALLOC_IF(reg_dummy, num_regs, const char *);
+
+ RETALLOC_IF(reg_info_dummy, num_regs, register_info_type);
+
+ regs_allocated_size = num_regs;
+ }
+}
+
+#endif /* not MATCH_MAY_ALLOCATE */
+
+static boolean group_in_compile_stack _RE_ARGS((compile_stack_type
+ compile_stack,
+
+ regnum_t regnum));
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is zero;
+ `re_nsub' is the number of subexpressions in PATTERN;
+ `not_bol' and `not_eol' are zero;
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+/* Return, freeing storage we allocated. */
+#define FREE_STACK_RETURN(value) \
+ return (free (compile_stack.stack), value)
+
+static reg_errcode_t regex_compile(pattern, size, syntax, bufp)
+const char *pattern;
+size_t size;
+reg_syntax_t syntax;
+struct re_pattern_buffer *bufp;
+{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register unsigned char c, c1;
+
+ /* A random temporary spot in PATTERN. */
+ const char *p1;
+
+ /* Points to the end of the buffer, where we should append. */
+ register unsigned char *b;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+ const char *p = pattern;
+ const char *pend = pattern + size;
+
+ /* How to translate the characters in the pattern. */
+ RE_TRANSLATE_TYPE translate = bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell if a new exact-match
+ character can be added to that command or if the character requires
+ a new `exactn' command. */
+ unsigned char *pending_exact = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells, e.g., postfix * where to find the start of its
+ operand. Reset at the beginning of groups and alternatives. */
+ unsigned char *laststart = 0;
+
+ /* Address of beginning of regexp, or inside of last group. */
+ unsigned char *begalt;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+ const char *beg_interval;
+
+ /* Address of the place where a forward jump should go to the end of
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
+ unsigned char *fixup_alt_jump = 0;
+
+ /* Counts open-groups as they are encountered. Remembered for the
+ matching close-group on the compile stack, so the same register
+ number is put in the stop_memory as the start_memory. */
+ regnum_t regnum = 0;
+
+#ifdef DEBUG
+ DEBUG_PRINT1("\nCompiling pattern: ");
+ if (debug) {
+ unsigned debug_count;
+
+ for (debug_count = 0; debug_count < size; debug_count++)
+ putchar(pattern[debug_count]);
+ putchar('\n');
+ }
+#endif /* DEBUG */
+
+ /* Initialize the compile stack. */
+ compile_stack.stack =
+ TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ return REG_ESPACE;
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ bufp->syntax = syntax;
+ bufp->fastmap_accurate = 0;
+ bufp->not_bol = bufp->not_eol = 0;
+
+ /* Set `used' to zero, so that if we return an error, the pattern
+ printer (for debugging) will think there's no pattern. We reset it
+ at the end. */
+ bufp->used = 0;
+
+ /* Always count groups, whether or not bufp->no_sub is set. */
+ bufp->re_nsub = 0;
+
+#if !defined emacs && !defined SYNTAX_TABLE
+ /* Initialize the syntax table. */
+ init_syntax_once();
+#endif
+
+ if (bufp->allocated == 0) {
+ if (bufp->buffer) { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
+ RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);
+ } else { /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);
+ }
+ if (!bufp->buffer)
+ FREE_STACK_RETURN(REG_ESPACE);
+
+ bufp->allocated = INIT_BUF_SIZE;
+ }
+
+ begalt = b = bufp->buffer;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend) {
+ PATFETCH(c);
+
+ switch (c) {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p(pattern, p, syntax))
+ BUF_PUSH(begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p(p, pend, syntax))
+ BUF_PUSH(endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart) {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN(REG_BADRPT);
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* Are we optimizing this jump? */
+ boolean keep_string_p = false;
+
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;) {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH(c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM)
+ && (c == '+' || c == '?')));
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\') {
+ if (p == pend)
+ FREE_STACK_RETURN(REG_EESCAPE);
+
+ PATFETCH(c1);
+ if (!(c1 == '+' || c1 == '?')) {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ } else {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok) { /* More than one repetition is allowed, so put in at the
+ end a backward relative jump from `b' to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump).
+
+ But if we are at the `*' in the exact sequence `.*\n',
+ insert an unconditional jump backwards to the .,
+ instead of the beginning of the loop. This way we only
+ push a failure point once, instead of every time
+ through the loop. */
+ assert(p - 1 > pattern);
+
+ /* Allocate the space for the jump. */
+ GET_BUFFER_SPACE(3);
+
+ /* We know we are not at the first character of the pattern,
+ because laststart was nonzero. And we've already
+ incremented `p', by the way, to be the character after
+ the `*'. Do we have to do something analogous here
+ for null bytes, because of RE_DOT_NOT_NULL? */
+ if (TRANSLATE(*(p - 2)) == TRANSLATE('.')
+ && zero_times_ok
+ && p < pend && TRANSLATE(*p) == TRANSLATE('\n')
+ && !(syntax & RE_DOT_NEWLINE)) { /* We have .*\n. */
+ STORE_JUMP(jump, b, laststart);
+ keep_string_p = true;
+ } else
+ /* Anything else. */
+ STORE_JUMP(maybe_pop_jump, b, laststart - 3);
+
+ /* We've added more stuff to the buffer. */
+ b += 3;
+ }
+
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ GET_BUFFER_SPACE(3);
+ INSERT_JUMP(keep_string_p ? on_failure_keep_string_jump
+ : on_failure_jump, laststart, b + 3);
+ pending_exact = 0;
+ b += 3;
+
+ if (!zero_times_ok) {
+ /* At least one repetition is required, so insert a
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
+ GET_BUFFER_SPACE(3);
+ INSERT_JUMP(dummy_failure_jump, laststart,
+ laststart + 6);
+ b += 3;
+ }
+ }
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH(anychar);
+ break;
+
+
+ case '[':
+ {
+ boolean had_char_class = false;
+
+ if (p == pend)
+ FREE_STACK_RETURN(REG_EBRACK);
+
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE(34);
+
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH(*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Push the number of bytes in the bitmap. */
+ BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* Clear the whole map. */
+ bzero(b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-2] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) SET_LIST_BIT('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;) {
+ if (p == pend)
+ FREE_STACK_RETURN(REG_EBRACK);
+
+ PATFETCH(c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') {
+ if (p == pend)
+ FREE_STACK_RETURN(REG_EESCAPE);
+
+ PATFETCH(c1);
+ SET_LIST_BIT(c1);
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ FREE_STACK_RETURN(REG_ERANGE);
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-' && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']') {
+ reg_errcode_t ret
+ = compile_range(&p, pend, translate, syntax, b);
+
+ if (ret != REG_NOERROR)
+ FREE_STACK_RETURN(ret);
+ }
+
+ else if (p[0] == '-' && p[1] != ']') { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH(c1);
+
+ ret = compile_range(&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR)
+ FREE_STACK_RETURN(ret);
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH(c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend)
+ FREE_STACK_RETURN(REG_EBRACK);
+
+ for (;;) {
+ PATFETCH(c);
+ if ((c == ':' && *p == ']') || p == pend)
+ break;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and `:]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']') {
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+ boolean is_lower = STREQ(str, "lower");
+ boolean is_upper = STREQ(str, "upper");
+ wctype_t wt;
+ int ch;
+
+ wt = IS_CHAR_CLASS(str);
+ if (wt == 0)
+ FREE_STACK_RETURN(REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH(c);
+
+ if (p == pend)
+ FREE_STACK_RETURN(REG_EBRACK);
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) {
+# ifdef _LIBC
+ if (__iswctype(__btowc(ch), wt))
+ SET_LIST_BIT(ch);
+# else
+ if (iswctype(btowc(ch), wt))
+ SET_LIST_BIT(ch);
+# endif
+
+ if (translate && (is_upper || is_lower)
+ && (ISUPPER(ch) || ISLOWER(ch)))
+ SET_LIST_BIT(ch);
+ }
+
+ had_char_class = true;
+#else
+ int ch;
+ boolean is_alnum = STREQ(str, "alnum");
+ boolean is_alpha = STREQ(str, "alpha");
+ boolean is_blank = STREQ(str, "blank");
+ boolean is_cntrl = STREQ(str, "cntrl");
+ boolean is_digit = STREQ(str, "digit");
+ boolean is_graph = STREQ(str, "graph");
+ boolean is_lower = STREQ(str, "lower");
+ boolean is_print = STREQ(str, "print");
+ boolean is_punct = STREQ(str, "punct");
+ boolean is_space = STREQ(str, "space");
+ boolean is_upper = STREQ(str, "upper");
+ boolean is_xdigit = STREQ(str, "xdigit");
+
+ if (!IS_CHAR_CLASS(str))
+ FREE_STACK_RETURN(REG_ECTYPE);
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH(c);
+
+ if (p == pend)
+ FREE_STACK_RETURN(REG_EBRACK);
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
+ /* This was split into 3 if's to
+ avoid an arbitrary limit in some compiler. */
+ if ((is_alnum && ISALNUM(ch))
+ || (is_alpha && ISALPHA(ch))
+ || (is_blank && ISBLANK(ch))
+ || (is_cntrl && ISCNTRL(ch)))
+ SET_LIST_BIT(ch);
+ if ((is_digit && ISDIGIT(ch))
+ || (is_graph && ISGRAPH(ch))
+ || (is_lower && ISLOWER(ch))
+ || (is_print && ISPRINT(ch)))
+ SET_LIST_BIT(ch);
+ if ((is_punct && ISPUNCT(ch))
+ || (is_space && ISSPACE(ch))
+ || (is_upper && ISUPPER(ch))
+ || (is_xdigit && ISXDIGIT(ch)))
+ SET_LIST_BIT(ch);
+ if (translate && (is_upper || is_lower)
+ && (ISUPPER(ch) || ISLOWER(ch)))
+ SET_LIST_BIT(ch);
+ }
+ had_char_class = true;
+#endif /* libc || wctype.h */
+ } else {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT('[');
+ SET_LIST_BIT(':');
+ had_char_class = false;
+ }
+ } else {
+ had_char_class = false;
+ SET_LIST_BIT(c);
+ }
+ }
+
+ /* Discard any (non)matching list bytes that are all 0 at the
+ end of the map. Decrease the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_close;
+ else
+ goto normal_char;
+
+
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '|':
+ if (syntax & RE_NO_BK_VBAR)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '{':
+ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+ goto handle_interval;
+ else
+ goto normal_char;
+
+
+ case '\\':
+ if (p == pend)
+ FREE_STACK_RETURN(REG_EESCAPE);
+
+ /* Do not translate the character after the \, so that we can
+ distinguish, e.g., \B from \b, even if we normally would
+ translate, e.g., B to b. */
+ PATFETCH_RAW(c);
+
+ switch (c) {
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ handle_open:
+ bufp->re_nsub++;
+ regnum++;
+
+ if (COMPILE_STACK_FULL) {
+ RETALLOC(compile_stack.stack, compile_stack.size << 1,
+ compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ return REG_ESPACE;
+
+ compile_stack.size <<= 1;
+ }
+
+ /* These are the values to restore when we hit end of this
+ group. They are all relative offsets, so that if the
+ whole pattern moves because of realloc, they will still
+ be valid. */
+ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
+ COMPILE_STACK_TOP.fixup_alt_jump
+ =
+ fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+ COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+ COMPILE_STACK_TOP.regnum = regnum;
+
+ /* We will eventually replace the 0 with the number of
+ groups inner to this one. But do not push a
+ start_memory for groups beyond the last one we can
+ represent in the compiled pattern. */
+ if (regnum <= MAX_REGNUM) {
+ COMPILE_STACK_TOP.inner_group_offset =
+ b - bufp->buffer + 2;
+ BUF_PUSH_3(start_memory, regnum, 0);
+ }
+
+ compile_stack.avail++;
+
+ fixup_alt_jump = 0;
+ laststart = 0;
+ begalt = b;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+ break;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ if (COMPILE_STACK_EMPTY) {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_backslash;
+ else
+ FREE_STACK_RETURN(REG_ERPAREN);
+ }
+
+ handle_close:
+ if (fixup_alt_jump) { /* Push a dummy failure point at the end of the
+ alternative for a possible future
+ `pop_failure_jump' to pop. See comments at
+ `push_dummy_failure' in `re_match_2'. */
+ BUF_PUSH(push_dummy_failure);
+
+ /* We allocated space for this jump when we assigned
+ to `fixup_alt_jump', in the `handle_alt' case below. */
+ STORE_JUMP(jump_past_alt, fixup_alt_jump, b - 1);
+ }
+
+ /* See similar code for backslashed left paren above. */
+ if (COMPILE_STACK_EMPTY) {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ FREE_STACK_RETURN(REG_ERPAREN);
+ }
+
+ /* Since we just checked for an empty stack above, this
+ ``can't happen''. */
+ assert(compile_stack.avail != 0);
+ {
+ /* We don't just want to restore into `regnum', because
+ later groups should continue to be numbered higher,
+ as in `(ab)c(de)' -- the second group is #2. */
+ regnum_t this_group_regnum;
+
+ compile_stack.avail--;
+ begalt =
+ bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
+ fixup_alt_jump =
+ COMPILE_STACK_TOP.fixup_alt_jump ? bufp->buffer +
+ COMPILE_STACK_TOP.fixup_alt_jump - 1 : 0;
+ laststart =
+ bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+
+ /* We're at the end of the group, so now we know how many
+ groups were inside this one. */
+ if (this_group_regnum <= MAX_REGNUM) {
+ unsigned char *inner_group_loc
+
+ =
+ bufp->buffer +
+ COMPILE_STACK_TOP.inner_group_offset;
+
+ *inner_group_loc = regnum - this_group_regnum;
+ BUF_PUSH_3(stop_memory, this_group_regnum,
+ regnum - this_group_regnum);
+ }
+ }
+ break;
+
+
+ case '|': /* `\|'. */
+ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+ goto normal_backslash;
+ handle_alt:
+ if (syntax & RE_LIMITED_OPS)
+ goto normal_char;
+
+ /* Insert before the previous alternative a jump which
+ jumps to this alternative if the former fails. */
+ GET_BUFFER_SPACE(3);
+ INSERT_JUMP(on_failure_jump, begalt, b + 6);
+ pending_exact = 0;
+ b += 3;
+
+ /* The alternative before this one has a jump after it
+ which gets executed if it gets matched. Adjust that
+ jump so it will jump to this alternative's analogous
+ jump (put in below, which in turn will jump to the next
+ (if any) alternative's such jump, etc.). The last such
+ jump jumps to the correct final destination. A picture:
+ _____ _____
+ | | | |
+ | v | v
+ a | b | c
+
+ If we are at `b', then fixup_alt_jump right now points to a
+ three-byte space after `a'. We'll put in the jump, set
+ fixup_alt_jump to right after `b', and leave behind three
+ bytes which we'll fill in when we get to after `c'. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
+
+ /* Mark and leave space for a jump after this alternative,
+ to be filled in later either by next alternative or
+ when know we're at the end of a series of alternatives. */
+ fixup_alt_jump = b;
+ GET_BUFFER_SPACE(3);
+ b += 3;
+
+ laststart = 0;
+ begalt = b;
+ break;
+
+
+ case '{':
+ /* If \{ is a literal. */
+ if (!(syntax & RE_INTERVALS)
+ /* If we're at `\{' and it's not the open-interval
+ operator. */
+ || ((syntax & RE_INTERVALS)
+ && (syntax & RE_NO_BK_BRACES)) || (p - 2 == pattern
+ && p == pend))
+ goto normal_backslash;
+
+ handle_interval:
+ {
+ /* If got here, then the syntax allows intervals. */
+
+ /* At least (most) this many matches must be made. */
+ int lower_bound = -1, upper_bound = -1;
+
+ beg_interval = p - 1;
+
+ if (p == pend) {
+ if (!(syntax & RE_INTERVALS)
+ && (syntax & RE_NO_BK_BRACES)) goto
+ unfetch_interval;
+ else
+ FREE_STACK_RETURN(REG_EBRACE);
+ }
+
+ GET_UNSIGNED_NUMBER(lower_bound);
+
+ if (c == ',') {
+ GET_UNSIGNED_NUMBER(upper_bound);
+ if ((!(syntax & RE_NO_BK_BRACES) && c != '\\')
+ || ((syntax & RE_NO_BK_BRACES) && c != '}'))
+ FREE_STACK_RETURN(REG_BADBR);
+
+ if (upper_bound < 0)
+ upper_bound = RE_DUP_MAX;
+ } else
+ /* Interval such as `{1}' => match exactly once. */
+ upper_bound = lower_bound;
+
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound) {
+ if (!(syntax & RE_INTERVALS)
+ && (syntax & RE_NO_BK_BRACES)) goto
+ unfetch_interval;
+ else
+ FREE_STACK_RETURN(REG_BADBR);
+ }
+
+ if (!(syntax & RE_NO_BK_BRACES)) {
+ if (c != '\\')
+ FREE_STACK_RETURN(REG_EBRACE);
+
+ PATFETCH(c);
+ }
+
+ if (c != '}') {
+ if (!(syntax & RE_INTERVALS)
+ && (syntax & RE_NO_BK_BRACES)) goto
+ unfetch_interval;
+ else
+ FREE_STACK_RETURN(REG_BADBR);
+ }
+
+ /* We just parsed a valid interval. */
+
+ /* If it's invalid to have no preceding re. */
+ if (!laststart) {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ FREE_STACK_RETURN(REG_BADRPT);
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ laststart = b;
+ else
+ goto unfetch_interval;
+ }
+
+ /* If the upper bound is zero, don't want to succeed at
+ all; jump from `laststart' to `b + 3', which will be
+ the end of the buffer after we insert the jump. */
+ if (upper_bound == 0) {
+ GET_BUFFER_SPACE(3);
+ INSERT_JUMP(jump, laststart, b + 3);
+ b += 3;
+ }
+
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succeed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
+ else { /* If the upper bound is > 1, we need to insert
+ more at the end of the loop. */
+ unsigned nbytes = 10 + (upper_bound > 1) * 10;
+
+ GET_BUFFER_SPACE(nbytes);
+
+ /* Initialize lower bound of the `succeed_n', even
+ though it will be set during matching by its
+ attendant `set_number_at' (inserted next),
+ because `re_compile_fastmap' needs to know.
+ Jump to the `jump_n' we might insert below. */
+ INSERT_JUMP2(succeed_n, laststart,
+ b + 5 + (upper_bound > 1) * 5,
+ lower_bound);
+ b += 5;
+
+ /* Code to initialize the lower bound. Insert
+ before the `succeed_n'. The `5' is the last two
+ bytes of this `set_number_at', plus 3 bytes of
+ the following `succeed_n'. */
+ insert_op2(set_number_at, laststart, 5,
+ lower_bound, b);
+ b += 5;
+
+ if (upper_bound > 1) { /* More than one repetition is allowed, so
+ append a backward jump to the `succeed_n'
+ that starts this interval.
+
+ When we've reached this during matching,
+ we'll have matched the interval once, so
+ jump back only `upper_bound - 1' times. */
+ STORE_JUMP2(jump_n, b, laststart + 5,
+ upper_bound - 1);
+ b += 5;
+
+ /* The location we want to set is the second
+ parameter of the `jump_n'; that is `b-2' as
+ an absolute address. `laststart' will be
+ the `set_number_at' we're about to insert;
+ `laststart+3' the number to set, the source
+ for the relative address. But we are
+ inserting into the middle of the pattern --
+ so everything is getting moved up by 5.
+ Conclusion: (b - 2) - (laststart + 3) + 5,
+ i.e., b - laststart.
+
+ We insert this at the beginning of the loop
+ so that if we fail during matching, we'll
+ reinitialize the bounds. */
+ insert_op2(set_number_at, laststart,
+ b - laststart, upper_bound - 1, b);
+ b += 5;
+ }
+ }
+ pending_exact = 0;
+ beg_interval = NULL;
+ }
+ break;
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ assert(beg_interval);
+ p = beg_interval;
+ beg_interval = NULL;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH(c);
+
+ if (!(syntax & RE_NO_BK_BRACES)) {
+ if (p > pattern && p[-1] == '\\')
+ goto normal_backslash;
+ }
+ goto normal_char;
+
+#ifdef emacs
+ /* There is no way to specify the before_dot and after_dot
+ operators. rms says this is ok. --karl */
+ case '=':
+ BUF_PUSH(at_dot);
+ break;
+
+ case 's':
+ laststart = b;
+ PATFETCH(c);
+ BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);
+ break;
+
+ case 'S':
+ laststart = b;
+ PATFETCH(c);
+ BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);
+ break;
+#endif /* emacs */
+
+
+ case 'w':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ laststart = b;
+ BUF_PUSH(wordchar);
+ break;
+
+
+ case 'W':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ laststart = b;
+ BUF_PUSH(notwordchar);
+ break;
+
+
+ case '<':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH(wordbeg);
+ break;
+
+ case '>':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH(wordend);
+ break;
+
+ case 'b':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH(wordbound);
+ break;
+
+ case 'B':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH(notwordbound);
+ break;
+
+ case '`':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH(begbuf);
+ break;
+
+ case '\'':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ BUF_PUSH(endbuf);
+ break;
+
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
+
+ c1 = c - '0';
+
+ if (c1 > regnum)
+ FREE_STACK_RETURN(REG_ESUBREG);
+
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack(compile_stack, (regnum_t) c1))
+ goto normal_char;
+
+ laststart = b;
+ BUF_PUSH_2(duplicate, c1);
+ break;
+
+
+ case '+':
+ case '?':
+ if (syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backslash;
+
+ default:
+ normal_backslash:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ c = TRANSLATE(c);
+ goto normal_char;
+ }
+ break;
+
+
+ default:
+ /* Expects the character in `c'. */
+ normal_char:
+ /* If no exactn currently being built. */
+ if (!pending_exact
+ /* If last exactn not at current position. */
+ || pending_exact + *pending_exact + 1 != b
+ /* We have only one byte following the exactn for the count. */
+ || *pending_exact == (1 << BYTEWIDTH) - 1
+ /* If followed by a repetition operator. */
+ || *p == '*' || *p == '^' || ((syntax & RE_BK_PLUS_QM)
+ ? *p == '\\' && (p[1] == '+'
+ || p[1] ==
+ '?') : (*p
+ ==
+ '+'
+ ||
+ *p
+ ==
+ '?'))
+ || ((syntax & RE_INTERVALS)
+ && ((syntax & RE_NO_BK_BRACES)
+ ? *p == '{' : (p[0] == '\\' && p[1] == '{')))) {
+ /* Start building a new exactn. */
+
+ laststart = b;
+
+ BUF_PUSH_2(exactn, 0);
+ pending_exact = b - 1;
+ }
+
+ BUF_PUSH(c);
+ (*pending_exact)++;
+ break;
+ } /* switch (c) */
+ } /* while p != pend */
+
+
+ /* Through the pattern now. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
+
+ if (!COMPILE_STACK_EMPTY)
+ FREE_STACK_RETURN(REG_EPAREN);
+
+ /* If we don't want backtracking, force success
+ the first time we reach the end of the compiled pattern. */
+ if (syntax & RE_NO_POSIX_BACKTRACKING)
+ BUF_PUSH(succeed);
+
+ free(compile_stack.stack);
+
+ /* We have succeeded; set the length of the buffer. */
+ bufp->used = b - bufp->buffer;
+
+#ifdef DEBUG
+ if (debug) {
+ DEBUG_PRINT1("\nCompiled pattern: \n");
+ print_compiled_pattern(bufp);
+ }
+#endif /* DEBUG */
+
+#ifndef MATCH_MAY_ALLOCATE
+ /* Initialize the failure stack to the largest possible stack. This
+ isn't necessary unless we're trying to avoid calling alloca in
+ the search and match routines. */
+ {
+ int num_regs = bufp->re_nsub + 1;
+
+ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
+ is strictly greater than re_max_failures, the largest possible stack
+ is 2 * re_max_failures failure points. */
+ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) {
+ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+
+# ifdef emacs
+ if (!fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) xmalloc(fail_stack.size
+ *
+ sizeof
+ (fail_stack_elt_t));
+ else
+ fail_stack.stack =
+ (fail_stack_elt_t *) xrealloc(fail_stack.stack,
+ (fail_stack.size *
+ sizeof
+ (fail_stack_elt_t)));
+# else /* not emacs */
+ if (!fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) malloc(fail_stack.size
+ *
+ sizeof
+ (fail_stack_elt_t));
+ else
+ fail_stack.stack =
+ (fail_stack_elt_t *) realloc(fail_stack.stack,
+ (fail_stack.size *
+ sizeof
+ (fail_stack_elt_t)));
+# endif /* not emacs */
+ }
+
+ regex_grow_registers(num_regs);
+ }
+#endif /* not MATCH_MAY_ALLOCATE */
+
+ return REG_NOERROR;
+} /* regex_compile */
+
+/* Subroutines for `regex_compile'. */
+
+/* Store OP at LOC followed by two-byte integer parameter ARG. */
+
+static void store_op1(op, loc, arg)
+re_opcode_t op;
+unsigned char *loc;
+int arg;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER(loc + 1, arg);
+}
+
+
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void store_op2(op, loc, arg1, arg2)
+re_opcode_t op;
+unsigned char *loc;
+int arg1, arg2;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER(loc + 1, arg1);
+ STORE_NUMBER(loc + 3, arg2);
+}
+
+
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+ for OP followed by two-byte integer parameter ARG. */
+
+static void insert_op1(op, loc, arg, end)
+re_opcode_t op;
+unsigned char *loc;
+int arg;
+unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 3;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op1(op, loc, arg);
+}
+
+
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void insert_op2(op, loc, arg1, arg2, end)
+re_opcode_t op;
+unsigned char *loc;
+int arg1, arg2;
+unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 5;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op2(op, loc, arg1, arg2);
+}
+
+
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ after an alternative or a begin-subexpression. We assume there is at
+ least one character before the ^. */
+
+static boolean at_begline_loc_p(pattern, p, syntax)
+const char *pattern, *p;
+reg_syntax_t syntax;
+{
+ const char *prev = p - 2;
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+
+ return
+ /* After a subexpression? */
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+ /* After an alternative? */
+ || (*prev == '|'
+ && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
+
+
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ at least one character after the $, i.e., `P < PEND'. */
+
+static boolean at_endline_loc_p(p, pend, syntax)
+const char *p, *pend;
+reg_syntax_t syntax;
+{
+ const char *next = p;
+ boolean next_backslash = *next == '\\';
+ const char *next_next = p + 1 < pend ? p + 1 : 0;
+
+ return
+ /* Before a subexpression? */
+ (syntax & RE_NO_BK_PARENS ? *next == ')'
+ : next_backslash && next_next && *next_next == ')')
+ /* Before an alternative? */
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'
+ : next_backslash && next_next && *next_next == '|');
+}
+
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+ false if it's not. */
+
+static boolean group_in_compile_stack(compile_stack, regnum)
+compile_stack_type compile_stack;
+regnum_t regnum;
+{
+ int this_element;
+
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0; this_element--)
+ if (compile_stack.stack[this_element].regnum == regnum)
+ return true;
+
+ return false;
+}
+
+
+/* Read the ending character of a range (in a bracket expression) from the
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ Then we set the translation of all bits between the starting and
+ ending characters (inclusive) in the compiled pattern B.
+
+ Return an error code.
+
+ We use these short variable names so we can use the same macros as
+ `regex_compile' itself. */
+
+static reg_errcode_t compile_range(p_ptr, pend, translate, syntax, b)
+const char **p_ptr, *pend;
+RE_TRANSLATE_TYPE translate;
+reg_syntax_t syntax;
+unsigned char *b;
+{
+ unsigned this_char;
+
+ const char *p = *p_ptr;
+ reg_errcode_t ret;
+ char range_start[2];
+ char range_end[2];
+ char ch[2];
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ /* Fetch the endpoints without translating them; the
+ appropriate translation is done in the bit-setting loop below. */
+ range_start[0] = p[-2];
+ range_start[1] = '\0';
+ range_end[0] = p[0];
+ range_end[1] = '\0';
+
+ /* Have to increment the pointer into the pattern string, so the
+ caller isn't still at the ending character. */
+ (*p_ptr)++;
+
+ /* Report an error if the range is empty and the syntax prohibits this. */
+ ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+ /* Here we see why `this_char' has to be larger than an `unsigned
+ char' -- we would otherwise go into an infinite loop, since all
+ characters <= 0xff. */
+ ch[1] = '\0';
+ for (this_char = 0; this_char <= (unsigned char) -1; ++this_char) {
+ ch[0] = this_char;
+ if (strcoll(range_start, ch) <= 0 && strcoll(ch, range_end) <= 0) {
+ SET_LIST_BIT(TRANSLATE(this_char));
+ ret = REG_NOERROR;
+ }
+ }
+
+ return ret;
+}
+
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
+ characters can start a string that matches the pattern. This fastmap
+ is used by re_search to skip quickly over impossible starting points.
+
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+ area as BUFP->fastmap.
+
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+ the pattern buffer.
+
+ Returns 0 if we succeed, -2 if an internal error. */
+
+int re_compile_fastmap(bufp)
+struct re_pattern_buffer *bufp;
+{
+ int j, k;
+
+#ifdef MATCH_MAY_ALLOCATE
+ fail_stack_type fail_stack;
+#endif
+#ifndef REGEX_MALLOC
+ char *destination;
+#endif
+
+ register char *fastmap = bufp->fastmap;
+ unsigned char *pattern = bufp->buffer;
+ unsigned char *p = pattern;
+ register unsigned char *pend = pattern + bufp->used;
+
+#ifdef REL_ALLOC
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
+ /* Assume that each path through the pattern can be null until
+ proven otherwise. We set this false at the bottom of switch
+ statement, to which we get only if a particular path doesn't
+ match the empty string. */
+ boolean path_can_be_null = true;
+
+ /* We aren't doing a `succeed_n' to begin with. */
+ boolean succeed_n_p = false;
+
+ assert(fastmap != NULL && p != NULL);
+
+ INIT_FAIL_STACK();
+ bzero(fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ bufp->fastmap_accurate = 1; /* It will be when we're done. */
+ bufp->can_be_null = 0;
+
+ while (1) {
+ if (p == pend || *p == succeed) {
+ /* We have reached the (effective) end of pattern. */
+ if (!FAIL_STACK_EMPTY()) {
+ bufp->can_be_null |= path_can_be_null;
+
+ /* Reset for next path. */
+ path_can_be_null = true;
+
+ p = fail_stack.stack[--fail_stack.avail].pointer;
+
+ continue;
+ } else
+ break;
+ }
+
+ /* We should never be about to go beyond the end of the pattern. */
+ assert(p < pend);
+
+ switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) {
+
+ /* I guess the idea here is to simply not bother with a fastmap
+ if a backreference is used, since it's too hard to figure out
+ the fastmap for the corresponding group. Setting
+ `can_be_null' stops `re_search_2' from using the fastmap, so
+ that is all we do. */
+ case duplicate:
+ bufp->can_be_null = 1;
+ goto done;
+
+
+ /* Following are the cases which match a character. These end
+ with `break'. */
+
+ case exactn:
+ fastmap[p[1]] = 1;
+ break;
+
+
+ case charset:
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+ fastmap[j] = 1;
+ break;
+
+
+ case charset_not:
+ /* Chars beyond end of map must be allowed. */
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+ fastmap[j] = 1;
+ break;
+
+
+ case wordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX(j) == Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case notwordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX(j) != Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case anychar:
+ {
+ int fastmap_newline = fastmap['\n'];
+
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = fastmap_newline;
+
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ goto done;
+
+ /* Otherwise, have to check alternative paths. */
+ break;
+ }
+
+#ifdef emacs
+ case syntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX(j) == (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ case notsyntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX(j) != (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ /* All cases after this match the empty string. These end with
+ `continue'. */
+
+
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ continue;
+#endif /* emacs */
+
+
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ case push_dummy_failure:
+ continue;
+
+
+ case jump_n:
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case jump_past_alt:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR(j, p);
+ p += j;
+ if (j > 0)
+ continue;
+
+ /* Jump backward implies we just went through the body of a
+ loop and matched nothing. Opcode jumped to should be
+ `on_failure_jump' or `succeed_n'. Just treat it like an
+ ordinary jump. For a * loop, it has pushed its failure
+ point already; if so, discard that as redundant. */
+ if ((re_opcode_t) * p != on_failure_jump
+ && (re_opcode_t) * p != succeed_n)
+ continue;
+
+ p++;
+ EXTRACT_NUMBER_AND_INCR(j, p);
+ p += j;
+
+ /* If what's on the stack is where we are now, pop it. */
+ if (!FAIL_STACK_EMPTY()
+ && fail_stack.stack[fail_stack.avail - 1].pointer == p)
+ fail_stack.avail--;
+
+ continue;
+
+
+ case on_failure_jump:
+ case on_failure_keep_string_jump:
+ handle_on_failure_jump:
+ EXTRACT_NUMBER_AND_INCR(j, p);
+
+ /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+ end of the pattern. We don't want to push such a point,
+ since when we restore it above, entering the switch will
+ increment `p' past the end of the pattern. We don't need
+ to push such a point since we obviously won't find any more
+ fastmap entries beyond `pend'. Such a pattern can match
+ the null string, though. */
+ if (p + j < pend) {
+ if (!PUSH_PATTERN_OP(p + j, fail_stack)) {
+ RESET_FAIL_STACK();
+ return -2;
+ }
+ } else
+ bufp->can_be_null = 1;
+
+ if (succeed_n_p) {
+ EXTRACT_NUMBER_AND_INCR(k, p); /* Skip the n. */
+ succeed_n_p = false;
+ }
+
+ continue;
+
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p += 2;
+
+ /* Increment p past the n for when k != 0. */
+ EXTRACT_NUMBER_AND_INCR(k, p);
+ if (k == 0) {
+ p -= 4;
+ succeed_n_p = true; /* Spaghetti code alert. */
+ goto handle_on_failure_jump;
+ }
+ continue;
+
+
+ case set_number_at:
+ p += 4;
+ continue;
+
+
+ case start_memory:
+ case stop_memory:
+ p += 2;
+ continue;
+
+
+ default:
+ abort(); /* We have listed all the cases. */
+ } /* switch *p++ */
+
+ /* Getting here means we have found the possible starting
+ characters for one path of the pattern -- and that the empty
+ string does not match. We need not follow this path further.
+ Instead, look at the next alternative (remembered on the
+ stack), or quit if no more. The test at the top of the loop
+ does these things. */
+ path_can_be_null = false;
+ p = pend;
+ } /* while p */
+
+ /* Set `can_be_null' for the last path (also the first path, if the
+ pattern is empty). */
+ bufp->can_be_null |= path_can_be_null;
+
+ done:
+ RESET_FAIL_STACK();
+ return 0;
+} /* re_compile_fastmap */
+
+#ifdef _LIBC
+weak_alias(__re_compile_fastmap, re_compile_fastmap)
+#endif
+ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+void re_set_registers(bufp, regs, num_regs, starts, ends)
+struct re_pattern_buffer *bufp;
+struct re_registers *regs;
+unsigned num_regs;
+regoff_t *starts, *ends;
+{
+ if (num_regs) {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ } else {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t *) 0;
+ }
+}
+
+#ifdef _LIBC
+weak_alias(__re_set_registers, re_set_registers)
+#endif
+ /* Searching routines. */
+/* Like re_search_2, below, but only one string is specified, and
+ doesn't let you say where to stop matching. */
+int re_search(bufp, string, size, startpos, range, regs)
+struct re_pattern_buffer *bufp;
+const char *string;
+int size, startpos, range;
+struct re_registers *regs;
+{
+ return re_search_2(bufp, NULL, 0, string, size, startpos, range,
+ regs, size);
+}
+
+#ifdef _LIBC
+weak_alias(__re_search, re_search)
+#endif
+/* Using the compiled pattern in BUFP->buffer, first tries to match the
+ virtual concatenation of STRING1 and STRING2, starting first at index
+ STARTPOS, then at STARTPOS + 1, and so on.
+
+ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+
+ RANGE is how far to scan while trying to match. RANGE = 0 means try
+ only at STARTPOS; in general, the last start tried is STARTPOS +
+ RANGE.
+
+ In REGS, return the indices of the virtual concatenation of STRING1
+ and STRING2 that matched the entire BUFP->buffer and its contained
+ subexpressions.
+
+ Do not consider matching one past the index STOP in the virtual
+ concatenation of STRING1 and STRING2.
+
+ We return either the position in the strings at which the match was
+ found, -1 if no match, or -2 if error (such as failure
+ stack overflow). */
+int
+re_search_2(bufp, string1, size1, string2, size2, startpos, range, regs,
+ stop)
+struct re_pattern_buffer *bufp;
+const char *string1, *string2;
+int size1, size2;
+int startpos;
+int range;
+struct re_registers *regs;
+int stop;
+{
+ int val;
+ register char *fastmap = bufp->fastmap;
+ register RE_TRANSLATE_TYPE translate = bufp->translate;
+ int total_size = size1 + size2;
+ int endpos = startpos + range;
+
+ /* Check for out-of-range STARTPOS. */
+ if (startpos < 0 || startpos > total_size)
+ return -1;
+
+ /* Fix up RANGE if it might eventually take us outside
+ the virtual concatenation of STRING1 and STRING2.
+ Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
+ if (endpos < 0)
+ range = 0 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* If the search isn't to be a backwards one, don't waste time in a
+ search for a pattern that must be anchored. */
+ if (bufp->used > 0 && range > 0
+ && ((re_opcode_t) bufp->buffer[0] == begbuf
+ /* `begline' is like `begbuf' if it cannot match at newlines. */
+ || ((re_opcode_t) bufp->buffer[0] == begline
+ && !bufp->newline_anchor))) {
+ if (startpos > 0)
+ return -1;
+ else
+ range = 1;
+ }
+#ifdef emacs
+ /* In a forward search for something that starts with \=.
+ don't keep searching past point. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot
+ && range > 0) {
+ range = PT - startpos;
+ if (range <= 0)
+ return -1;
+ }
+#endif /* emacs */
+
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !bufp->fastmap_accurate)
+ if (re_compile_fastmap(bufp) == -2)
+ return -2;
+
+ /* Loop through the string, looking for a place to start matching. */
+ for (;;) {
+ /* If a fastmap is supplied, skip quickly over characters that
+ cannot be the start of a match. If the pattern can match the
+ null string, however, we don't need to skip characters; we want
+ the first null string. */
+ if (fastmap && startpos < total_size && !bufp->can_be_null) {
+ if (range > 0) { /* Searching forwards. */
+ register const char *d;
+ register int lim = 0;
+ int irange = range;
+
+ if (startpos < size1 && startpos + range >= size1)
+ lim = range - (size1 - startpos);
+
+ d =
+ (startpos >=
+ size1 ? string2 - size1 : string1) + startpos;
+
+ /* Written out as an if-else to avoid testing `translate'
+ inside the loop. */
+ if (translate)
+ while (range > lim && !fastmap[(unsigned char)
+ translate[
+ (unsigned
+ char) *d++]])
+ range--;
+ else
+ while (range > lim && !fastmap[(unsigned char) *d++])
+ range--;
+
+ startpos += irange - range;
+ } else { /* Searching backwards. */
+
+ register char c = (size1 == 0 || startpos >= size1
+ ? string2[startpos - size1]
+ : string1[startpos]);
+
+ if (!fastmap[(unsigned char) TRANSLATE(c)])
+ goto advance;
+ }
+ }
+
+ /* If can't match the null string, and that's all we have left, fail. */
+ if (range >= 0 && startpos == total_size && fastmap
+ && !bufp->can_be_null) return -1;
+
+ val = re_match_2_internal(bufp, string1, size1, string2, size2,
+ startpos, regs, stop);
+#ifndef REGEX_MALLOC
+# ifdef C_ALLOCA
+ alloca(0);
+# endif
+#endif
+
+ if (val >= 0)
+ return startpos;
+
+ if (val == -2)
+ return -2;
+
+ advance:
+ if (!range)
+ break;
+ else if (range > 0) {
+ range--;
+ startpos++;
+ } else {
+ range++;
+ startpos--;
+ }
+ }
+ return -1;
+} /* re_search_2 */
+
+#ifdef _LIBC
+weak_alias(__re_search_2, re_search_2)
+#endif
+ /* This converts PTR, a pointer into one of the search strings `string1'
+ and `string2' into an offset from the beginning of that string. */
+#define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) \
+ ? ((regoff_t) ((ptr) - string1)) \
+ : ((regoff_t) ((ptr) - string2 + size1)))
+/* Macros for dealing with the split strings in re_match_2. */
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
+/* Call before fetching a character with *d. This switches over to
+ string2 if necessary. */
+#define PREFETCH() \
+ while (d == dend) \
+ { \
+ /* End of string2 => fail. */ \
+ if (dend == end_match_2) \
+ goto fail; \
+ /* End of string1 => advance to string2. */ \
+ d = string2; \
+ dend = end_match_2; \
+ }
+/* Test if at very beginning or at very end of the virtual concatenation
+ of `string1' and `string2'. If only one string, it's `string2'. */
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END(d) ((d) == end2)
+/* Test if D points to a character which is word-constituent. We have
+ two special cases to check for: if past the end of string1, look at
+ the first character in string2; and if before the beginning of
+ string2, look at the last character in string1. */
+#define WORDCHAR_P(d) \
+ (SYNTAX ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
+ == Sword)
+/* Disabled due to a compiler bug -- see comment at case wordbound */
+#if 0
+/* Test if the character before D and the one at D differ with respect
+ to being word-constituent. */
+#define AT_WORD_BOUNDARY(d) \
+ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
+ || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
+#endif
+/* Free everything we malloc. */
+#ifdef MATCH_MAY_ALLOCATE
+# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
+# define FREE_VARIABLES() \
+ do { \
+ REGEX_FREE_STACK (fail_stack.stack); \
+ FREE_VAR (regstart); \
+ FREE_VAR (regend); \
+ FREE_VAR (old_regstart); \
+ FREE_VAR (old_regend); \
+ FREE_VAR (best_regstart); \
+ FREE_VAR (best_regend); \
+ FREE_VAR (reg_info); \
+ FREE_VAR (reg_dummy); \
+ FREE_VAR (reg_info_dummy); \
+ } while (0)
+#else
+# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
+#endif /* not MATCH_MAY_ALLOCATE */
+/* These values must meet several constraints. They must not be valid
+ register values; since we have a limit of 255 registers (because
+ we use only one byte in the pattern for the register number), we can
+ use numbers larger than 255. They must differ by 1, because of
+ NUM_FAILURE_ITEMS above. And the value for the lowest register must
+ be larger than the value for the highest register, so we do not try
+ to actually save any registers when none are active. */
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+ /* Matching routines. */
+#ifndef emacs /* Emacs never uses this. */
+/* re_match is like re_match_2 except it takes only a single string. */
+int re_match(bufp, string, size, pos, regs)
+struct re_pattern_buffer *bufp;
+const char *string;
+int size, pos;
+struct re_registers *regs;
+{
+ int result = re_match_2_internal(bufp, NULL, 0, string, size,
+ pos, regs, size);
+
+# ifndef REGEX_MALLOC
+# ifdef C_ALLOCA
+ alloca(0);
+# endif
+# endif
+ return result;
+}
+
+# ifdef _LIBC
+weak_alias(__re_match, re_match)
+# endif
+#endif /* not emacs */
+static boolean group_match_null_string_p _RE_ARGS((unsigned char **p,
+ unsigned char *end,
+ register_info_type *
+
+ reg_info));
+static boolean alt_match_null_string_p
+_RE_ARGS(
+
+ (unsigned char *p, unsigned char *end,
+ register_info_type * reg_info));
+static boolean common_op_match_null_string_p
+_RE_ARGS(
+
+ (unsigned char **p, unsigned char *end,
+ register_info_type * reg_info));
+static int bcmp_translate
+_RE_ARGS((const char *s1, const char *s2, int len, char *translate));
+
+/* re_match_2 matches the compiled pattern in BUFP against the
+ the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+ and SIZE2, respectively). We start matching at POS, and stop
+ matching at STOP.
+
+ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+ store offsets for the substring each group matched in REGS. See the
+ documentation for exactly how many groups we fill.
+
+ We return -1 if no match, -2 if an internal error (such as the
+ failure stack overflowing). Otherwise, we return the length of the
+ matched substring. */
+
+int re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop)
+struct re_pattern_buffer *bufp;
+const char *string1, *string2;
+int size1, size2;
+int pos;
+struct re_registers *regs;
+int stop;
+{
+ int result = re_match_2_internal(bufp, string1, size1, string2, size2,
+ pos, regs, stop);
+
+#ifndef REGEX_MALLOC
+# ifdef C_ALLOCA
+ alloca(0);
+# endif
+#endif
+ return result;
+}
+
+#ifdef _LIBC
+weak_alias(__re_match_2, re_match_2)
+#endif
+/* This is a separate function so that we can force an alloca cleanup
+ afterwards. */
+static int
+re_match_2_internal(bufp, string1, size1, string2, size2, pos, regs, stop)
+struct re_pattern_buffer *bufp;
+const char *string1, *string2;
+int size1, size2;
+int pos;
+struct re_registers *regs;
+int stop;
+{
+ /* General temporaries. */
+ int mcnt;
+ unsigned char *p1;
+
+ /* Just past the end of the corresponding string. */
+ const char *end1, *end2;
+
+ /* Pointers into string1 and string2, just past the last characters in
+ each to consider matching. */
+ const char *end_match_1, *end_match_2;
+
+ /* Where we are in the data, and the end of the current string. */
+ const char *d, *dend;
+
+ /* Where we are in the pattern, and the end of the pattern. */
+ unsigned char *p = bufp->buffer;
+ register unsigned char *pend = p + bufp->used;
+
+ /* Mark the opcode just after a start_memory, so we can test for an
+ empty subpattern when we get to the stop_memory. */
+ unsigned char *just_past_start_mem = 0;
+
+ /* We use this to map every character in the string. */
+ RE_TRANSLATE_TYPE translate = bufp->translate;
+
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to
+ the subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where
+ to resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is
+ a ``dummy''; if a failure happens and the failure point is a dummy,
+ it gets discarded and the next next one is tried. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
+ fail_stack_type fail_stack;
+#endif
+#ifdef DEBUG
+ static unsigned failure_id;
+ unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+#endif
+
+#ifdef REL_ALLOC
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
+ /* We fill all the registers internally, independent of what we
+ return, for use in backreferences. The number here includes
+ an element for register zero. */
+ size_t num_regs = bufp->re_nsub + 1;
+
+ /* The currently active registers. */
+ active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+
+ /* Information on the contents of registers. These are pointers into
+ the input strings; they record just what was matched (on this
+ attempt) by a subexpression part of the pattern, that is, the
+ regnum-th regstart pointer points to where in the pattern we began
+ matching and the regnum-th regend points to right after where we
+ stopped matching the regnum-th subexpression. (The zeroth register
+ keeps track of what the whole pattern matches.) */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **regstart, **regend;
+#endif
+
+ /* If a group that's operated upon by a repetition operator fails to
+ match anything, then the register for its start will need to be
+ restored because it will have been set to wherever in the string we
+ are when we last see its open-group operator. Similarly for a
+ register's end. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **old_regstart, **old_regend;
+#endif
+
+ /* The is_active field of reg_info helps us keep track of which (possibly
+ nested) subexpressions we are currently in. The matched_something
+ field of reg_info[reg_num] helps us tell whether or not we have
+ matched any of the pattern so far this time through the reg_num-th
+ subexpression. These two fields get reset each time through any
+ loop their register is in. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
+ register_info_type *reg_info;
+#endif
+
+ /* The following record the register info as found in the above
+ variables when we find a match better than any we've seen before.
+ This happens as we backtrack through the failure points, which in
+ turn happens only if we have not yet matched the entire string. */
+ unsigned best_regs_set = false;
+
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **best_regstart, **best_regend;
+#endif
+
+ /* Logically, this is `best_regend[0]'. But we don't want to have to
+ allocate space for that if we're not allocating space for anything
+ else (see below). Also, we never need info about register 0 for
+ any of the other register vectors, and it seems rather a kludge to
+ treat `best_regend' differently than the rest. So we keep track of
+ the end of the best match so far in a separate variable. We
+ initialize this to NULL so that when we backtrack the first time
+ and need to test it, it's not garbage. */
+ const char *match_end = NULL;
+
+ /* This helps SET_REGS_MATCHED avoid doing redundant work. */
+ int set_regs_matched_done = 0;
+
+ /* Used when we pop values we don't care about. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **reg_dummy;
+ register_info_type *reg_info_dummy;
+#endif
+
+#ifdef DEBUG
+ /* Counts the total number of registers pushed. */
+ unsigned num_regs_pushed = 0;
+#endif
+
+ DEBUG_PRINT1("\n\nEntering re_match_2.\n");
+
+ INIT_FAIL_STACK();
+
+#ifdef MATCH_MAY_ALLOCATE
+ /* Do not bother to initialize all the register variables if there are
+ no groups in the pattern, as it takes a fair amount of time. If
+ there are groups, we include space for register 0 (the whole
+ pattern), even though we never use it, since it simplifies the
+ array indexing. We should fix this. */
+ if (bufp->re_nsub) {
+ regstart = REGEX_TALLOC(num_regs, const char *);
+ regend = REGEX_TALLOC(num_regs, const char *);
+ old_regstart = REGEX_TALLOC(num_regs, const char *);
+ old_regend = REGEX_TALLOC(num_regs, const char *);
+ best_regstart = REGEX_TALLOC(num_regs, const char *);
+ best_regend = REGEX_TALLOC(num_regs, const char *);
+
+ reg_info = REGEX_TALLOC(num_regs, register_info_type);
+ reg_dummy = REGEX_TALLOC(num_regs, const char *);
+
+ reg_info_dummy = REGEX_TALLOC(num_regs, register_info_type);
+
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
+ && best_regstart && best_regend && reg_dummy
+ && reg_info_dummy)) {
+ FREE_VARIABLES();
+ return -2;
+ }
+ } else {
+ /* We must initialize all our variables to NULL, so that
+ `FREE_VARIABLES' doesn't try to free them. */
+ regstart = regend = old_regstart = old_regend = best_regstart
+ = best_regend = reg_dummy = NULL;
+ reg_info = reg_info_dummy = (register_info_type *) NULL;
+ }
+#endif /* MATCH_MAY_ALLOCATE */
+
+ /* The starting position is bogus. */
+ if (pos < 0 || pos > size1 + size2) {
+ FREE_VARIABLES();
+ return -1;
+ }
+
+ /* Initialize subexpression text positions to -1 to mark ones that no
+ start_memory/stop_memory has been seen for. Also initialize the
+ register information struct. */
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) {
+ regstart[mcnt] = regend[mcnt]
+ = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+
+ REG_MATCH_NULL_STRING_P(reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+ IS_ACTIVE(reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING(reg_info[mcnt]) = 0;
+ EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0;
+ }
+
+ /* We move `string1' into `string2' if the latter's empty -- but not if
+ `string1' is null. */
+ if (size2 == 0 && string1 != NULL) {
+ string2 = string1;
+ size2 = size1;
+ string1 = 0;
+ size1 = 0;
+ }
+ end1 = string1 + size1;
+ end2 = string2 + size2;
+
+ /* Compute where to stop matching, within the two strings. */
+ if (stop <= size1) {
+ end_match_1 = string1 + stop;
+ end_match_2 = string2;
+ } else {
+ end_match_1 = end1;
+ end_match_2 = string2 + stop - size1;
+ }
+
+ /* `p' scans through the pattern as `d' scans through the data.
+ `dend' is the end of the input string that `d' points within. `d'
+ is advanced into the following input string whenever necessary, but
+ this happens before fetching; therefore, at the beginning of the
+ loop, `d' can be pointing at the end of a string, but it cannot
+ equal `string2'. */
+ if (size1 > 0 && pos <= size1) {
+ d = string1 + pos;
+ dend = end_match_1;
+ } else {
+ d = string2 + pos - size1;
+ dend = end_match_2;
+ }
+
+ DEBUG_PRINT1("The compiled pattern is:\n");
+ DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend);
+ DEBUG_PRINT1("The string to match is: `");
+ DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2);
+ DEBUG_PRINT1("'\n");
+
+ /* This loops over pattern commands. It exits by returning from the
+ function if the match is complete, or it drops through if the match
+ fails at this starting point in the input data. */
+ for (;;) {
+#ifdef _LIBC
+ DEBUG_PRINT2("\n%p: ", p);
+#else
+ DEBUG_PRINT2("\n0x%x: ", p);
+#endif
+
+ if (p == pend) { /* End of pattern means we might have succeeded. */
+ DEBUG_PRINT1("end of pattern ... ");
+
+ /* If we haven't matched the entire string, and we want the
+ longest match, try backtracking. */
+ if (d != end_match_2) {
+ /* 1 if this match ends in the same string (string1 or string2)
+ as the best previous match. */
+ boolean same_str_p = (FIRST_STRING_P(match_end)
+ == MATCHING_IN_FIRST_STRING);
+
+ /* 1 if this match is the best seen so far. */
+ boolean best_match_p;
+
+ /* AIX compiler got confused when this was combined
+ with the previous declaration. */
+ if (same_str_p)
+ best_match_p = d > match_end;
+ else
+ best_match_p = !MATCHING_IN_FIRST_STRING;
+
+ DEBUG_PRINT1("backtracking.\n");
+
+ if (!FAIL_STACK_EMPTY()) { /* More failure points to try. */
+
+ /* If exceeds best match so far, save it. */
+ if (!best_regs_set || best_match_p) {
+ best_regs_set = true;
+ match_end = d;
+
+ DEBUG_PRINT1("\nSAVING match as best so far.\n");
+
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) {
+ best_regstart[mcnt] = regstart[mcnt];
+ best_regend[mcnt] = regend[mcnt];
+ }
+ }
+ goto fail;
+ }
+
+ /* If no failure points, don't restore garbage. And if
+ last match is real best match, don't restore second
+ best one. */
+ else if (best_regs_set && !best_match_p) {
+ restore_best_regs:
+ /* Restore best match. It may happen that `dend ==
+ end_match_1' while the restored d is in string2.
+ For example, the pattern `x.*y.*z' against the
+ strings `x-' and `y-z-', if the two strings are
+ not consecutive in memory. */
+ DEBUG_PRINT1("Restoring best registers.\n");
+
+ d = match_end;
+ dend = ((d >= string1 && d <= end1)
+ ? end_match_1 : end_match_2);
+
+ for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) {
+ regstart[mcnt] = best_regstart[mcnt];
+ regend[mcnt] = best_regend[mcnt];
+ }
+ }
+ }
+ /* d != end_match_2 */
+ succeed_label:
+ DEBUG_PRINT1("Accepting match.\n");
+
+ /* If caller wants register contents data back, do it. */
+ if (regs && !bufp->no_sub) {
+ /* Have the register data arrays been allocated? */
+ if (bufp->regs_allocated == REGS_UNALLOCATED) { /* No. So allocate them with malloc. We need one
+ extra element beyond `num_regs' for the `-1' marker
+ GNU code uses. */
+ regs->num_regs = MAX(RE_NREGS, num_regs + 1);
+ regs->start = TALLOC(regs->num_regs, regoff_t);
+ regs->end = TALLOC(regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL) {
+ FREE_VARIABLES();
+ return -2;
+ }
+ bufp->regs_allocated = REGS_REALLOCATE;
+ } else if (bufp->regs_allocated == REGS_REALLOCATE) { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < num_regs + 1) {
+ regs->num_regs = num_regs + 1;
+ RETALLOC(regs->start, regs->num_regs, regoff_t);
+ RETALLOC(regs->end, regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL) {
+ FREE_VARIABLES();
+ return -2;
+ }
+ }
+ } else {
+ /* These braces fend off a "empty body in an else-statement"
+ warning under GCC when assert expands to nothing. */
+ assert(bufp->regs_allocated == REGS_FIXED);
+ }
+
+ /* Convert the pointer data in `regstart' and `regend' to
+ indices. Register zero has to be set differently,
+ since we haven't kept track of any info for it. */
+ if (regs->num_regs > 0) {
+ regs->start[0] = pos;
+ regs->end[0] = (MATCHING_IN_FIRST_STRING
+ ? ((regoff_t) (d - string1))
+ : ((regoff_t) (d - string2 + size1)));
+ }
+
+ /* Go through the first `min (num_regs, regs->num_regs)'
+ registers, since that is all we initialized. */
+ for (mcnt = 1;
+ (unsigned) mcnt < MIN(num_regs, regs->num_regs);
+ mcnt++) {
+ if (REG_UNSET(regstart[mcnt])
+ || REG_UNSET(regend[mcnt])) regs->start[mcnt] =
+ regs->end[mcnt] = -1;
+ else {
+ regs->start[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET(regstart[mcnt]);
+ regs->end[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET(regend[mcnt]);
+ }
+ }
+
+ /* If the regs structure we return has more elements than
+ were in the pattern, set the extra elements to -1. If
+ we (re)allocated the registers, this is the case,
+ because we always allocate enough to have at least one
+ -1 at the end. */
+ for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs;
+ mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ }
+ /* regs && !bufp->no_sub */
+ DEBUG_PRINT4
+ ("%u failure points pushed, %u popped (%u remain).\n",
+ nfailure_points_pushed, nfailure_points_popped,
+ nfailure_points_pushed - nfailure_points_popped);
+ DEBUG_PRINT2("%u registers pushed.\n", num_regs_pushed);
+
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+ ? string1 : string2 - size1);
+
+ DEBUG_PRINT2("Returning %d from re_match_2.\n", mcnt);
+
+ FREE_VARIABLES();
+ return mcnt;
+ }
+
+ /* Otherwise match next pattern command. */
+ switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) {
+ /* Ignore these. Used to ignore the n of succeed_n's which
+ currently have n == 0. */
+ case no_op:
+ DEBUG_PRINT1("EXECUTING no_op.\n");
+ break;
+
+ case succeed:
+ DEBUG_PRINT1("EXECUTING succeed.\n");
+ goto succeed_label;
+
+ /* Match the next n pattern characters exactly. The following
+ byte in the pattern defines n, and the n bytes after that
+ are the characters to match. */
+ case exactn:
+ mcnt = *p++;
+ DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt);
+
+ /* This is written out as an if-else so we don't waste time
+ testing `translate' inside the loop. */
+ if (translate) {
+ do {
+ PREFETCH();
+ if ((unsigned char) translate[(unsigned char) *d++]
+ != (unsigned char) *p++)
+ goto fail;
+ }
+ while (--mcnt);
+ } else {
+ do {
+ PREFETCH();
+ if (*d++ != (char) *p++)
+ goto fail;
+ }
+ while (--mcnt);
+ }
+ SET_REGS_MATCHED();
+ break;
+
+
+ /* Match any character except possibly a newline or a null. */
+ case anychar:
+ DEBUG_PRINT1("EXECUTING anychar.\n");
+
+ PREFETCH();
+
+ if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE(*d) == '\n')
+ || (bufp->syntax & RE_DOT_NOT_NULL
+ && TRANSLATE(*d) == '\000')) goto fail;
+
+ SET_REGS_MATCHED();
+ DEBUG_PRINT2(" Matched `%d'.\n", *d);
+ d++;
+ break;
+
+
+ case charset:
+ case charset_not:
+ {
+ register unsigned char c;
+ boolean not = (re_opcode_t) * (p - 1) == charset_not;
+
+ DEBUG_PRINT2("EXECUTING charset%s.\n", not ? "_not" : "");
+
+ PREFETCH();
+ c = TRANSLATE(*d); /* The character to match. */
+
+ /* Cast to `unsigned' instead of `unsigned char' in case the
+ bit list is a full 32 bytes long. */
+ if (c < (unsigned) (*p * BYTEWIDTH)
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ p += 1 + *p;
+
+ if (!not)
+ goto fail;
+
+ SET_REGS_MATCHED();
+ d++;
+ break;
+ }
+
+
+ /* The beginning of a group is represented by start_memory.
+ The arguments are the register number in the next byte, and the
+ number of groups inner to this one in the next. The text
+ matched within the group is recorded (in the internal
+ registers data structure) under the register number. */
+ case start_memory:
+ DEBUG_PRINT3("EXECUTING start_memory %d (%d):\n", *p, p[1]);
+
+ /* Find out if this group can match the empty string. */
+ p1 = p; /* To send to group_match_null_string_p. */
+
+ if (REG_MATCH_NULL_STRING_P(reg_info[*p]) ==
+ MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P(reg_info[*p]) =
+ group_match_null_string_p(&p1, pend, reg_info);
+
+ /* Save the position in the string where we were the last time
+ we were at this open-group operator in case the group is
+ operated upon by a repetition operator, e.g., with `(a*)*b'
+ against `ab'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
+ ? REG_UNSET(regstart[*p]) ? d : regstart[*p]
+ : regstart[*p];
+ DEBUG_PRINT2(" old_regstart: %d\n",
+ POINTER_TO_OFFSET(old_regstart[*p]));
+
+ regstart[*p] = d;
+ DEBUG_PRINT2(" regstart: %d\n",
+ POINTER_TO_OFFSET(regstart[*p]));
+
+ IS_ACTIVE(reg_info[*p]) = 1;
+ MATCHED_SOMETHING(reg_info[*p]) = 0;
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
+ /* This is the new highest active register. */
+ highest_active_reg = *p;
+
+ /* If nothing was active before, this is the new lowest active
+ register. */
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *p;
+
+ /* Move past the register number and inner group count. */
+ p += 2;
+ just_past_start_mem = p;
+
+ break;
+
+
+ /* The stop_memory opcode represents the end of a group. Its
+ arguments are the same as start_memory's: the register
+ number, and the number of inner groups. */
+ case stop_memory:
+ DEBUG_PRINT3("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
+
+ /* We need to save the string position the last time we were at
+ this close-group operator in case the group is operated
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+ against `aba'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regend[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
+ ? REG_UNSET(regend[*p]) ? d : regend[*p]
+ : regend[*p];
+ DEBUG_PRINT2(" old_regend: %d\n",
+ POINTER_TO_OFFSET(old_regend[*p]));
+
+ regend[*p] = d;
+ DEBUG_PRINT2(" regend: %d\n",
+ POINTER_TO_OFFSET(regend[*p]));
+
+ /* This register isn't active anymore. */
+ IS_ACTIVE(reg_info[*p]) = 0;
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
+ /* If this was the only register active, nothing is active
+ anymore. */
+ if (lowest_active_reg == highest_active_reg) {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ } else { /* We must scan for the new highest active register, since
+ it isn't necessarily one less than now: consider
+ (a(b)c(d(e)f)g). When group 3 ends, after the f), the
+ new highest active register is 1. */
+ unsigned char r = *p - 1;
+
+ while (r > 0 && !IS_ACTIVE(reg_info[r]))
+ r--;
+
+ /* If we end up at register zero, that means that we saved
+ the registers as the result of an `on_failure_jump', not
+ a `start_memory', and we jumped to past the innermost
+ `stop_memory'. For example, in ((.)*) we save
+ registers 1 and 2 as a result of the *, but when we pop
+ back to the second ), we are at the stop_memory 1.
+ Thus, nothing is active. */
+ if (r == 0) {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ } else
+ highest_active_reg = r;
+ }
+
+ /* If just failed to match something this time around with a
+ group that's operated on by a repetition operator, try to
+ force exit from the ``loop'', and restore the register
+ information for this group that we had before trying this
+ last match. */
+ if ((!MATCHED_SOMETHING(reg_info[*p])
+ || just_past_start_mem == p - 1)
+ && (p + 2) < pend) {
+ boolean is_a_jump_n = false;
+
+ p1 = p + 2;
+ mcnt = 0;
+ switch ((re_opcode_t) * p1++) {
+ case jump_n:
+ is_a_jump_n = true;
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
+ if (is_a_jump_n)
+ p1 += 2;
+ break;
+
+ default:
+ /* do nothing */ ;
+ }
+ p1 += mcnt;
+
+ /* If the next operation is a jump backwards in the pattern
+ to an on_failure_jump right before the start_memory
+ corresponding to this stop_memory, exit from the loop
+ by forcing a failure after pushing on the stack the
+ on_failure_jump's jump in the pattern, and d. */
+ if (mcnt < 0 && (re_opcode_t) * p1 == on_failure_jump
+ && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) {
+ /* If this group ever matched anything, then restore
+ what its registers were before trying this last
+ failed match, e.g., with `(a*)*b' against `ab' for
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'
+ against `aba' for regend[3].
+
+ Also restore the registers for inner groups for,
+ e.g., `((a*)(b*))*' against `aba' (register 3 would
+ otherwise get trashed). */
+
+ if (EVER_MATCHED_SOMETHING(reg_info[*p])) {
+ unsigned r;
+
+ EVER_MATCHED_SOMETHING(reg_info[*p]) = 0;
+
+ /* Restore this and inner groups' (if any) registers. */
+ for (r = *p;
+ r < (unsigned) *p + (unsigned) *(p + 1); r++) {
+ regstart[r] = old_regstart[r];
+
+ /* xx why this test? */
+ if (old_regend[r] >= regstart[r])
+ regend[r] = old_regend[r];
+ }
+ }
+ p1++;
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
+ PUSH_FAILURE_POINT(p1 + mcnt, d, -2);
+
+ goto fail;
+ }
+ }
+
+ /* Move past the register number and the inner group count. */
+ p += 2;
+ break;
+
+
+ /* \<digit> has been turned into a `duplicate' command which is
+ followed by the numeric value of <digit> as the register number. */
+ case duplicate:
+ {
+ register const char *d2, *dend2;
+ int regno = *p++; /* Get which register to match against. */
+
+ DEBUG_PRINT2("EXECUTING duplicate %d.\n", regno);
+
+ /* Can't back reference a group which we've never matched. */
+ if (REG_UNSET(regstart[regno]) || REG_UNSET(regend[regno]))
+ goto fail;
+
+ /* Where in input to try to start matching. */
+ d2 = regstart[regno];
+
+ /* Where to stop matching; if both the place to start and
+ the place to stop matching are in the same string, then
+ set to the place to stop, otherwise, for now have to use
+ the end of the first string. */
+
+ dend2 = ((FIRST_STRING_P(regstart[regno])
+ == FIRST_STRING_P(regend[regno]))
+ ? regend[regno] : end_match_1);
+ for (;;) {
+ /* If necessary, advance to next segment in register
+ contents. */
+ while (d2 == dend2) {
+ if (dend2 == end_match_2)
+ break;
+ if (dend2 == regend[regno])
+ break;
+
+ /* End of string1 => advance to string2. */
+ d2 = string2;
+ dend2 = regend[regno];
+ }
+ /* At end of register contents => success */
+ if (d2 == dend2)
+ break;
+
+ /* If necessary, advance to next segment in data. */
+ PREFETCH();
+
+ /* How many characters left in this segment to match. */
+ mcnt = dend - d;
+
+ /* Want how many consecutive characters we can match in
+ one shot, so, if necessary, adjust the count. */
+ if (mcnt > dend2 - d2)
+ mcnt = dend2 - d2;
+
+ /* Compare that many; failure if mismatch, else move
+ past them. */
+ if (translate ? bcmp_translate(d, d2, mcnt, translate)
+ : memcmp(d, d2, mcnt))
+ goto fail;
+ d += mcnt, d2 += mcnt;
+
+ /* Do this because we've match some characters. */
+ SET_REGS_MATCHED();
+ }
+ }
+ break;
+
+
+ /* begline matches the empty string at the beginning of the string
+ (unless `not_bol' is set in `bufp'), and, if
+ `newline_anchor' is set, after newlines. */
+ case begline:
+ DEBUG_PRINT1("EXECUTING begline.\n");
+
+ if (AT_STRINGS_BEG(d)) {
+ if (!bufp->not_bol)
+ break;
+ } else if (d[-1] == '\n' && bufp->newline_anchor) {
+ break;
+ }
+ /* In all other cases, we fail. */
+ goto fail;
+
+
+ /* endline is the dual of begline. */
+ case endline:
+ DEBUG_PRINT1("EXECUTING endline.\n");
+
+ if (AT_STRINGS_END(d)) {
+ if (!bufp->not_eol)
+ break;
+ }
+
+ /* We have to ``prefetch'' the next character. */
+ else if ((d == end1 ? *string2 : *d) == '\n'
+ && bufp->newline_anchor) {
+ break;
+ }
+ goto fail;
+
+
+ /* Match at the very beginning of the data. */
+ case begbuf:
+ DEBUG_PRINT1("EXECUTING begbuf.\n");
+ if (AT_STRINGS_BEG(d))
+ break;
+ goto fail;
+
+
+ /* Match at the very end of the data. */
+ case endbuf:
+ DEBUG_PRINT1("EXECUTING endbuf.\n");
+ if (AT_STRINGS_END(d))
+ break;
+ goto fail;
+
+
+ /* on_failure_keep_string_jump is used to optimize `.*\n'. It
+ pushes NULL as the value for the string on the stack. Then
+ `pop_failure_point' will keep the current value for the
+ string, instead of restoring it. To see why, consider
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;
+ then the . fails against the \n. But the next thing we want
+ to do is match the \n against the \n; if we restored the
+ string value, we would be back at the foo.
+
+ Because this is used only in specific cases, we don't need to
+ check all the things that `on_failure_jump' does, to make
+ sure the right things get saved on the stack. Hence we don't
+ share its code. The only reason to push anything on the
+ stack at all is that otherwise we would have to change
+ `anychar's code to do something besides goto fail in this
+ case; that seems worse than this. */
+ case on_failure_keep_string_jump:
+ DEBUG_PRINT1("EXECUTING on_failure_keep_string_jump");
+
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3(" %d (to %p):\n", mcnt, p + mcnt);
+#else
+ DEBUG_PRINT3(" %d (to 0x%x):\n", mcnt, p + mcnt);
+#endif
+
+ PUSH_FAILURE_POINT(p + mcnt, NULL, -2);
+ break;
+
+
+ /* Uses of on_failure_jump:
+
+ Each alternative starts with an on_failure_jump that points
+ to the beginning of the next alternative. Each alternative
+ except the last ends with a jump that in effect jumps past
+ the rest of the alternatives. (They really jump to the
+ ending jump of the following alternative, because tensioning
+ these jumps is a hassle.)
+
+ Repeats start with an on_failure_jump that points past both
+ the repetition text and either the following jump or
+ pop_failure_jump back to this on_failure_jump. */
+ case on_failure_jump:
+ on_failure:
+ DEBUG_PRINT1("EXECUTING on_failure_jump");
+
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3(" %d (to %p)", mcnt, p + mcnt);
+#else
+ DEBUG_PRINT3(" %d (to 0x%x)", mcnt, p + mcnt);
+#endif
+
+ /* If this on_failure_jump comes right before a group (i.e.,
+ the original * applied to a group), save the information
+ for that group and all inner ones, so that if we fail back
+ to this point, the group's information will be correct.
+ For example, in \(a*\)*\1, we need the preceding group,
+ and in \(zz\(a*\)b*\)\2, we need the inner group. */
+
+ /* We can't use `p' to check ahead because we push
+ a failure point to `p + mcnt' after we do this. */
+ p1 = p;
+
+ /* We need to skip no_op's before we look for the
+ start_memory in case this on_failure_jump is happening as
+ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+ against aba. */
+ while (p1 < pend && (re_opcode_t) * p1 == no_op)
+ p1++;
+
+ if (p1 < pend && (re_opcode_t) * p1 == start_memory) {
+ /* We have a new highest active register now. This will
+ get reset at the start_memory we are about to get to,
+ but we will have saved all the registers relevant to
+ this repetition op, as described above. */
+ highest_active_reg = *(p1 + 1) + *(p1 + 2);
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *(p1 + 1);
+ }
+
+ DEBUG_PRINT1(":\n");
+ PUSH_FAILURE_POINT(p + mcnt, d, -2);
+ break;
+
+
+ /* A smart repeat ends with `maybe_pop_jump'.
+ We change it to either `pop_failure_jump' or `jump'. */
+ case maybe_pop_jump:
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
+ DEBUG_PRINT2("EXECUTING maybe_pop_jump %d.\n", mcnt);
+ {
+ register unsigned char *p2 = p;
+
+ /* Compare the beginning of the repeat with what in the
+ pattern follows its end. If we can establish that there
+ is nothing that they would both match, i.e., that we
+ would have to backtrack because of (as in, e.g., `a*a')
+ then we can change to pop_failure_jump, because we'll
+ never have to backtrack.
+
+ This is not true in the case of alternatives: in
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
+ (e.g., if the string was `ab'). But instead of trying to
+ detect that here, the alternative has put on a dummy
+ failure point which is what we will end up popping. */
+
+ /* Skip over open/close-group commands.
+ If what follows this loop is a ...+ construct,
+ look at what begins its body, since we will have to
+ match at least one of that. */
+ while (1) {
+ if (p2 + 2 < pend
+ && ((re_opcode_t) * p2 == stop_memory
+ || (re_opcode_t) * p2 == start_memory))
+ p2 += 3;
+ else if (p2 + 6 < pend
+ && (re_opcode_t) * p2 == dummy_failure_jump)
+ p2 += 6;
+ else
+ break;
+ }
+
+ p1 = p + mcnt;
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
+
+ /* If we're at the end of the pattern, we can change. */
+ if (p2 == pend) {
+ /* Consider what happens when matching ":\(.*\)"
+ against ":/". I don't really understand this code
+ yet. */
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" End of pattern: change to `pop_failure_jump'.\n");
+ }
+
+ else if ((re_opcode_t) * p2 == exactn
+ || (bufp->newline_anchor
+ && (re_opcode_t) * p2 == endline)) {
+ register unsigned char c =
+ *p2 == (unsigned char) endline ? '\n' : p2[2];
+
+ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3(" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset
+ || (re_opcode_t) p1[3] == charset_not) {
+ int not = (re_opcode_t) p1[3] == charset_not;
+
+ if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+ && p1[5 +
+ c / BYTEWIDTH] & (1 << (c %
+ BYTEWIDTH))) not
+ = !not;
+
+ /* `not' is equal to 1 if c would match, which means
+ that we can't change to pop_failure_jump. */
+ if (!not) {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" No match => pop_failure_jump.\n");
+ }
+ }
+ } else if ((re_opcode_t) * p2 == charset) {
+ /* We win if the first character of the loop is not part
+ of the charset. */
+ if ((re_opcode_t) p1[3] == exactn
+ && !((int) p2[1] * BYTEWIDTH > (int) p1[5]
+ && (p2[2 + p1[5] / BYTEWIDTH]
+ & (1 << (p1[5] % BYTEWIDTH))))) {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1(" No match => pop_failure_jump.\n");
+ }
+
+ else if ((re_opcode_t) p1[3] == charset_not) {
+ int idx;
+
+ /* We win if the charset_not inside the loop
+ lists every character listed in the charset after. */
+ for (idx = 0; idx < (int) p2[1]; idx++)
+ if (!(p2[2 + idx] == 0 || (idx < (int) p1[4]
+ &&
+ ((p2
+ [2 +
+ idx] & ~p1[5 +
+ idx])
+ == 0))))
+ break;
+
+ if (idx == p2[1]) {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" No match => pop_failure_jump.\n");
+ }
+ } else if ((re_opcode_t) p1[3] == charset) {
+ int idx;
+
+ /* We win if the charset inside the loop
+ has no overlap with the one after the loop. */
+ for (idx = 0;
+ idx < (int) p2[1] && idx < (int) p1[4]; idx++)
+ if ((p2[2 + idx] & p1[5 + idx]) != 0)
+ break;
+
+ if (idx == p2[1] || idx == p1[4]) {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
+ }
+ p -= 2; /* Point at relative address again. */
+ if ((re_opcode_t) p[-1] != pop_failure_jump) {
+ p[-1] = (unsigned char) jump;
+ DEBUG_PRINT1(" Match => jump.\n");
+ goto unconditional_jump;
+ }
+ /* Note fall through. */
+
+
+ /* The end of a simple repeat has a pop_failure_jump back to
+ its matching on_failure_jump, where the latter will push a
+ failure point. The pop_failure_jump takes off failure
+ points put on by this pop_failure_jump's matching
+ on_failure_jump; we got through the pattern to here from the
+ matching on_failure_jump, so didn't fail. */
+ case pop_failure_jump:
+ {
+ /* We need to pass separate storage for the lowest and
+ highest registers, even though we don't care about the
+ actual values. Otherwise, we will restore only one
+ register from the stack, since lowest will == highest in
+ `pop_failure_point'. */
+ active_reg_t dummy_low_reg, dummy_high_reg;
+ unsigned char *pdummy;
+ const char *sdummy;
+
+ DEBUG_PRINT1("EXECUTING pop_failure_jump.\n");
+ POP_FAILURE_POINT(sdummy, pdummy,
+ dummy_low_reg, dummy_high_reg,
+ reg_dummy, reg_dummy, reg_info_dummy);
+ }
+ /* Note fall through. */
+
+ unconditional_jump:
+#ifdef _LIBC
+ DEBUG_PRINT2("\n%p: ", p);
+#else
+ DEBUG_PRINT2("\n0x%x: ", p);
+#endif
+ /* Note fall through. */
+
+ /* Unconditionally jump (without popping any failure points). */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR(mcnt, p); /* Get the amount to jump. */
+ DEBUG_PRINT2("EXECUTING jump %d ", mcnt);
+ p += mcnt; /* Do the jump. */
+#ifdef _LIBC
+ DEBUG_PRINT2("(to %p).\n", p);
+#else
+ DEBUG_PRINT2("(to 0x%x).\n", p);
+#endif
+ break;
+
+
+ /* We need this opcode so we can detect where alternatives end
+ in `group_match_null_string_p' et al. */
+ case jump_past_alt:
+ DEBUG_PRINT1("EXECUTING jump_past_alt.\n");
+ goto unconditional_jump;
+
+
+ /* Normally, the on_failure_jump pushes a failure point, which
+ then gets popped at pop_failure_jump. We will end up at
+ pop_failure_jump, also, and with a pattern of, say, `a+', we
+ are skipping over the on_failure_jump, so we have to push
+ something meaningless for pop_failure_jump to pop. */
+ case dummy_failure_jump:
+ DEBUG_PRINT1("EXECUTING dummy_failure_jump.\n");
+ /* It doesn't matter what we push for the string here. What
+ the code at `fail' tests is the value for the pattern. */
+ PUSH_FAILURE_POINT(NULL, NULL, -2);
+ goto unconditional_jump;
+
+
+ /* At the end of an alternative, we need to push a dummy failure
+ point in case we are followed by a `pop_failure_jump', because
+ we don't want the failure point for the alternative to be
+ popped. For example, matching `(a|ab)*' against `aab'
+ requires that we match the `ab' alternative. */
+ case push_dummy_failure:
+ DEBUG_PRINT1("EXECUTING push_dummy_failure.\n");
+ /* See comments just above at `dummy_failure_jump' about the
+ two zeroes. */
+ PUSH_FAILURE_POINT(NULL, NULL, -2);
+ break;
+
+ /* Have to succeed matching what follows at least n times.
+ After that, handle like `on_failure_jump'. */
+ case succeed_n:
+ EXTRACT_NUMBER(mcnt, p + 2);
+ DEBUG_PRINT2("EXECUTING succeed_n %d.\n", mcnt);
+
+ assert(mcnt >= 0);
+ /* Originally, this is how many times we HAVE to succeed. */
+ if (mcnt > 0) {
+ mcnt--;
+ p += 2;
+ STORE_NUMBER_AND_INCR(p, mcnt);
+#ifdef _LIBC
+ DEBUG_PRINT3(" Setting %p to %d.\n", p - 2, mcnt);
+#else
+ DEBUG_PRINT3(" Setting 0x%x to %d.\n", p - 2, mcnt);
+#endif
+ } else if (mcnt == 0) {
+#ifdef _LIBC
+ DEBUG_PRINT2(" Setting two bytes from %p to no_op.\n",
+ p + 2);
+#else
+ DEBUG_PRINT2(" Setting two bytes from 0x%x to no_op.\n",
+ p + 2);
+#endif
+ p[2] = (unsigned char) no_op;
+ p[3] = (unsigned char) no_op;
+ goto on_failure;
+ }
+ break;
+
+ case jump_n:
+ EXTRACT_NUMBER(mcnt, p + 2);
+ DEBUG_PRINT2("EXECUTING jump_n %d.\n", mcnt);
+
+ /* Originally, this is how many times we CAN jump. */
+ if (mcnt) {
+ mcnt--;
+ STORE_NUMBER(p + 2, mcnt);
+#ifdef _LIBC
+ DEBUG_PRINT3(" Setting %p to %d.\n", p + 2, mcnt);
+#else
+ DEBUG_PRINT3(" Setting 0x%x to %d.\n", p + 2, mcnt);
+#endif
+ goto unconditional_jump;
+ }
+ /* If don't have to jump any more, skip over the rest of command. */
+ else
+ p += 4;
+ break;
+
+ case set_number_at:
+ {
+ DEBUG_PRINT1("EXECUTING set_number_at.\n");
+
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
+ p1 = p + mcnt;
+ EXTRACT_NUMBER_AND_INCR(mcnt, p);
+#ifdef _LIBC
+ DEBUG_PRINT3(" Setting %p to %d.\n", p1, mcnt);
+#else
+ DEBUG_PRINT3(" Setting 0x%x to %d.\n", p1, mcnt);
+#endif
+ STORE_NUMBER(p1, mcnt);
+ break;
+ }
+
+#if 0
+ /* The DEC Alpha C compiler 3.x generates incorrect code for the
+ test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
+ AT_WORD_BOUNDARY, so this code is disabled. Expanding the
+ macro and introducing temporary variables works around the bug. */
+
+ case wordbound:
+ DEBUG_PRINT1("EXECUTING wordbound.\n");
+ if (AT_WORD_BOUNDARY(d))
+ break;
+ goto fail;
+
+ case notwordbound:
+ DEBUG_PRINT1("EXECUTING notwordbound.\n");
+ if (AT_WORD_BOUNDARY(d))
+ goto fail;
+ break;
+#else
+ case wordbound:
+ {
+ boolean prevchar, thischar;
+
+ DEBUG_PRINT1("EXECUTING wordbound.\n");
+ if (AT_STRINGS_BEG(d) || AT_STRINGS_END(d))
+ break;
+
+ prevchar = WORDCHAR_P(d - 1);
+ thischar = WORDCHAR_P(d);
+ if (prevchar != thischar)
+ break;
+ goto fail;
+ }
+
+ case notwordbound:
+ {
+ boolean prevchar, thischar;
+
+ DEBUG_PRINT1("EXECUTING notwordbound.\n");
+ if (AT_STRINGS_BEG(d) || AT_STRINGS_END(d))
+ goto fail;
+
+ prevchar = WORDCHAR_P(d - 1);
+ thischar = WORDCHAR_P(d);
+ if (prevchar != thischar)
+ goto fail;
+ break;
+ }
+#endif
+
+ case wordbeg:
+ DEBUG_PRINT1("EXECUTING wordbeg.\n");
+ if (WORDCHAR_P(d) && (AT_STRINGS_BEG(d) || !WORDCHAR_P(d - 1)))
+ break;
+ goto fail;
+
+ case wordend:
+ DEBUG_PRINT1("EXECUTING wordend.\n");
+ if (!AT_STRINGS_BEG(d) && WORDCHAR_P(d - 1)
+ && (!WORDCHAR_P(d) || AT_STRINGS_END(d)))
+ break;
+ goto fail;
+
+#ifdef emacs
+ case before_dot:
+ DEBUG_PRINT1("EXECUTING before_dot.\n");
+ if (PTR_CHAR_POS((unsigned char *) d) >= point)
+ goto fail;
+ break;
+
+ case at_dot:
+ DEBUG_PRINT1("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS((unsigned char *) d) != point)
+ goto fail;
+ break;
+
+ case after_dot:
+ DEBUG_PRINT1("EXECUTING after_dot.\n");
+ if (PTR_CHAR_POS((unsigned char *) d) <= point)
+ goto fail;
+ break;
+
+ case syntaxspec:
+ DEBUG_PRINT2("EXECUTING syntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchsyntax;
+
+ case wordchar:
+ DEBUG_PRINT1("EXECUTING Emacs wordchar.\n");
+ mcnt = (int) Sword;
+ matchsyntax:
+ PREFETCH();
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX(d[-1]) != (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED();
+ break;
+
+ case notsyntaxspec:
+ DEBUG_PRINT2("EXECUTING notsyntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchnotsyntax;
+
+ case notwordchar:
+ DEBUG_PRINT1("EXECUTING Emacs notwordchar.\n");
+ mcnt = (int) Sword;
+ matchnotsyntax:
+ PREFETCH();
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX(d[-1]) == (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED();
+ break;
+
+#else /* not emacs */
+ case wordchar:
+ DEBUG_PRINT1("EXECUTING non-Emacs wordchar.\n");
+ PREFETCH();
+ if (!WORDCHAR_P(d))
+ goto fail;
+ SET_REGS_MATCHED();
+ d++;
+ break;
+
+ case notwordchar:
+ DEBUG_PRINT1("EXECUTING non-Emacs notwordchar.\n");
+ PREFETCH();
+ if (WORDCHAR_P(d))
+ goto fail;
+ SET_REGS_MATCHED();
+ d++;
+ break;
+#endif /* not emacs */
+
+ default:
+ abort();
+ }
+ continue; /* Successfully executed one pattern command; keep going. */
+
+
+ /* We goto here if a matching operation fails. */
+ fail:
+ if (!FAIL_STACK_EMPTY()) { /* A restart point is known. Restore to that state. */
+ DEBUG_PRINT1("\nFAIL:\n");
+ POP_FAILURE_POINT(d, p,
+ lowest_active_reg, highest_active_reg,
+ regstart, regend, reg_info);
+
+ /* If this failure point is a dummy, try the next one. */
+ if (!p)
+ goto fail;
+
+ /* If we failed to the end of the pattern, don't examine *p. */
+ assert(p <= pend);
+ if (p < pend) {
+ boolean is_a_jump_n = false;
+
+ /* If failed to a backwards jump that's part of a repetition
+ loop, need to pop this failure point and use the next one. */
+ switch ((re_opcode_t) * p) {
+ case jump_n:
+ is_a_jump_n = true;
+ case maybe_pop_jump:
+ case pop_failure_jump:
+ case jump:
+ p1 = p + 1;
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
+ p1 += mcnt;
+
+ if ((is_a_jump_n && (re_opcode_t) * p1 == succeed_n)
+ || (!is_a_jump_n
+ && (re_opcode_t) * p1 == on_failure_jump))
+ goto fail;
+ break;
+ default:
+ /* do nothing */ ;
+ }
+ }
+
+ if (d >= string1 && d <= end1)
+ dend = end_match_1;
+ } else
+ break; /* Matching at this starting point really fails. */
+ } /* for (;;) */
+
+ if (best_regs_set)
+ goto restore_best_regs;
+
+ FREE_VARIABLES();
+
+ return -1; /* Failure to match. */
+} /* re_match_2 */
+
+/* Subroutine definitions for re_match_2. */
+
+
+/* We are passed P pointing to a register number after a start_memory.
+
+ Return true if the pattern up to the corresponding stop_memory can
+ match the empty string, and false otherwise.
+
+ If we find the matching stop_memory, sets P to point to one past its number.
+ Otherwise, sets P to an undefined byte less than or equal to END.
+
+ We don't handle duplicates properly (yet). */
+
+static boolean group_match_null_string_p(p, end, reg_info)
+unsigned char **p, *end;
+register_info_type *reg_info;
+{
+ int mcnt;
+
+ /* Point to after the args to the start_memory. */
+ unsigned char *p1 = *p + 2;
+
+ while (p1 < end) {
+ /* Skip over opcodes that can match nothing, and return true or
+ false, as appropriate, when we get to one that can't, or to the
+ matching stop_memory. */
+
+ switch ((re_opcode_t) * p1) {
+ /* Could be either a loop or a series of alternatives. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
+
+ /* If the next operation is not a jump backwards in the
+ pattern. */
+
+ if (mcnt >= 0) {
+ /* Go through the on_failure_jumps of the alternatives,
+ seeing if any of the alternatives cannot match nothing.
+ The last alternative starts with only a jump,
+ whereas the rest start with on_failure_jump and end
+ with a jump, e.g., here is the pattern for `a|b|c':
+
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+ /exactn/1/c
+
+ So, we have to first go through the first (n-1)
+ alternatives and then deal with the last one separately. */
+
+
+ /* Deal with the first (n-1) alternatives, which start
+ with an on_failure_jump (see above) that jumps to right
+ past a jump_past_alt. */
+
+ while ((re_opcode_t) p1[mcnt - 3] == jump_past_alt) {
+ /* `mcnt' holds how many bytes long the alternative
+ is, including the ending `jump_past_alt' and
+ its number. */
+
+ if (!alt_match_null_string_p(p1, p1 + mcnt - 3,
+ reg_info)) return false;
+
+ /* Move to right after this alternative, including the
+ jump_past_alt. */
+ p1 += mcnt;
+
+ /* Break if it's the beginning of an n-th alternative
+ that doesn't begin with an on_failure_jump. */
+ if ((re_opcode_t) * p1 != on_failure_jump)
+ break;
+
+ /* Still have to check that it's not an n-th
+ alternative that starts with an on_failure_jump. */
+ p1++;
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
+ if ((re_opcode_t) p1[mcnt - 3] != jump_past_alt) {
+ /* Get to the beginning of the n-th alternative. */
+ p1 -= 3;
+ break;
+ }
+ }
+
+ /* Deal with the last alternative: go back and get number
+ of the `jump_past_alt' just before it. `mcnt' contains
+ the length of the alternative. */
+ EXTRACT_NUMBER(mcnt, p1 - 2);
+
+ if (!alt_match_null_string_p(p1, p1 + mcnt, reg_info))
+ return false;
+
+ p1 += mcnt; /* Get past the n-th alternative. */
+ } /* if mcnt > 0 */
+ break;
+
+
+ case stop_memory:
+ assert(p1[1] == **p);
+ *p = p1 + 2;
+ return true;
+
+
+ default:
+ if (!common_op_match_null_string_p(&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return false;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+ It expects P to be the first byte of a single alternative and END one
+ byte past the last. The alternative can contain groups. */
+
+static boolean alt_match_null_string_p(p, end, reg_info)
+unsigned char *p, *end;
+register_info_type *reg_info;
+{
+ int mcnt;
+ unsigned char *p1 = p;
+
+ while (p1 < end) {
+ /* Skip over opcodes that can match nothing, and break when we get
+ to one that can't. */
+
+ switch ((re_opcode_t) * p1) {
+ /* It's a loop. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
+ p1 += mcnt;
+ break;
+
+ default:
+ if (!common_op_match_null_string_p(&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return true;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+ alt_match_null_string_p.
+
+ Sets P to one after the op and its arguments, if any. */
+
+static boolean common_op_match_null_string_p(p, end, reg_info)
+unsigned char **p, *end;
+register_info_type *reg_info;
+{
+ int mcnt;
+ boolean ret;
+ int reg_no;
+ unsigned char *p1 = *p;
+
+ switch ((re_opcode_t) * p1++) {
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case wordbound:
+ case notwordbound:
+#ifdef emacs
+ case before_dot:
+ case at_dot:
+ case after_dot:
+#endif
+ break;
+
+ case start_memory:
+ reg_no = *p1;
+ assert(reg_no > 0 && reg_no <= MAX_REGNUM);
+ ret = group_match_null_string_p(&p1, end, reg_info);
+
+ /* Have to set this here in case we're checking a group which
+ contains a group and a back reference to it. */
+
+ if (REG_MATCH_NULL_STRING_P(reg_info[reg_no]) ==
+ MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P(reg_info[reg_no]) = ret;
+
+ if (!ret)
+ return false;
+ break;
+
+ /* If this is an optimized succeed_n for zero times, make the jump. */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
+ if (mcnt >= 0)
+ p1 += mcnt;
+ else
+ return false;
+ break;
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p1 += 2;
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
+
+ if (mcnt == 0) {
+ p1 -= 4;
+ EXTRACT_NUMBER_AND_INCR(mcnt, p1);
+ p1 += mcnt;
+ } else
+ return false;
+ break;
+
+ case duplicate:
+ if (!REG_MATCH_NULL_STRING_P(reg_info[*p1]))
+ return false;
+ break;
+
+ case set_number_at:
+ p1 += 4;
+
+ default:
+ /* All other opcodes mean we cannot match the empty string. */
+ return false;
+ }
+
+ *p = p1;
+ return true;
+} /* common_op_match_null_string_p */
+
+
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+ bytes; nonzero otherwise. */
+
+static int bcmp_translate(s1, s2, len, translate)
+const char *s1, *s2;
+register int len;
+RE_TRANSLATE_TYPE translate;
+{
+ register const unsigned char *p1 = (const unsigned char *) s1;
+ register const unsigned char *p2 = (const unsigned char *) s2;
+
+ while (len) {
+ if (translate[*p1++] != translate[*p2++])
+ return 1;
+ len--;
+ }
+ return 0;
+}
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length SIZE) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry.
+
+ We call regex_compile to do the actual compilation. */
+
+const char *re_compile_pattern(pattern, length, bufp)
+const char *pattern;
+size_t length;
+struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ bufp->regs_allocated = REGS_UNALLOCATED;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ bufp->no_sub = 0;
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = regex_compile(pattern, length, re_syntax_options, bufp);
+
+ if (!ret)
+ return NULL;
+ return gettext(re_error_msgid + re_error_msgid_idx[(int) ret]);
+}
+
+#ifdef _LIBC
+weak_alias(__re_compile_pattern, re_compile_pattern)
+#endif
+ /* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+#if defined _REGEX_RE_COMP || defined _LIBC
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+#ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+ these names if they don't use our functions, and still use
+ regcomp/regexec below without link errors. */ weak_function
+#endif
+re_comp(s)
+const char *s;
+{
+ reg_errcode_t ret;
+
+ if (!s) {
+ if (!re_comp_buf.buffer)
+ return gettext("No previous regular expression");
+ return 0;
+ }
+
+ if (!re_comp_buf.buffer) {
+ re_comp_buf.buffer = (unsigned char *) malloc(200);
+ if (re_comp_buf.buffer == NULL)
+ return (char *) gettext(re_error_msgid
+ +
+ re_error_msgid_idx[(int) REG_ESPACE]);
+ re_comp_buf.allocated = 200;
+
+ re_comp_buf.fastmap = (char *) malloc(1 << BYTEWIDTH);
+ if (re_comp_buf.fastmap == NULL)
+ return (char *) gettext(re_error_msgid
+ +
+ re_error_msgid_idx[(int) REG_ESPACE]);
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = regex_compile(s, strlen(s), re_syntax_options, &re_comp_buf);
+
+ if (!ret)
+ return NULL;
+
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
+ return (char *) gettext(re_error_msgid +
+ re_error_msgid_idx[(int) ret]);
+}
+
+
+int
+#ifdef _LIBC
+ weak_function
+#endif
+re_exec(s)
+const char *s;
+{
+ const int len = strlen(s);
+
+ return
+ 0 <= re_search(&re_comp_buf, s, len, 0, len,
+ (struct re_registers *) 0);
+}
+
+#endif /* _REGEX_RE_COMP */
+
+/* POSIX.2 functions. Don't define these for Emacs. */
+
+#ifndef emacs
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int regcomp(preg, pattern, cflags)
+regex_t *preg;
+const char *pattern;
+int cflags;
+{
+ reg_errcode_t ret;
+ reg_syntax_t syntax
+ = (cflags & REG_EXTENDED) ?
+
+ RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+ /* regex_compile will allocate the space for the compiled pattern. */
+ preg->buffer = 0;
+ preg->allocated = 0;
+ preg->used = 0;
+
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = (char *) malloc(1 << BYTEWIDTH);
+
+ if (cflags & REG_ICASE) {
+ unsigned i;
+
+ preg->translate
+ = (RE_TRANSLATE_TYPE) malloc(CHAR_SET_SIZE
+ * sizeof(*(RE_TRANSLATE_TYPE) 0));
+ if (preg->translate == NULL)
+ return (int) REG_ESPACE;
+
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ preg->translate[i] = ISUPPER(i) ? TOLOWER(i) : i;
+ } else
+ preg->translate = NULL;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE) { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ } else
+ preg->newline_anchor = 0;
+
+ preg->no_sub = !!(cflags & REG_NOSUB);
+
+ /* POSIX says a null character in the pattern terminates it, so we
+ can use strlen here in compiling the pattern. */
+ ret = regex_compile(pattern, strlen(pattern), syntax, preg);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN)
+ ret = REG_EPAREN;
+
+ if (ret == REG_NOERROR && preg->fastmap) {
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. */
+ if (re_compile_fastmap(preg) == -2) {
+ /* Some error occurred while computing the fastmap, just forget
+ about it. */
+ free(preg->fastmap);
+ preg->fastmap = NULL;
+ }
+ }
+
+ return (int) ret;
+}
+
+#ifdef _LIBC
+weak_alias(__regcomp, regcomp)
+#endif
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+int regexec(preg, string, nmatch, pmatch, eflags)
+const regex_t *preg;
+const char *string;
+size_t nmatch;
+regmatch_t pmatch[];
+int eflags;
+{
+ int ret;
+ struct re_registers regs;
+ regex_t private_preg;
+ int len = strlen(string);
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+ private_preg = *preg;
+
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+ /* The user has told us exactly how many registers to return
+ information about, via `nmatch'. We have to pass that on to the
+ matching routines. */
+ private_preg.regs_allocated = REGS_FIXED;
+
+ if (want_reg_info) {
+ regs.num_regs = nmatch;
+ regs.start = TALLOC(nmatch * 2, regoff_t);
+ if (regs.start == NULL)
+ return (int) REG_NOMATCH;
+ regs.end = regs.start + nmatch;
+ }
+
+ /* Perform the searching operation. */
+ ret = re_search(&private_preg, string, len,
+ /* start: */ 0, /* range: */ len,
+ want_reg_info ? &regs : (struct re_registers *) 0);
+
+ /* Copy the register information to the POSIX structure. */
+ if (want_reg_info) {
+ if (ret >= 0) {
+ unsigned r;
+
+ for (r = 0; r < nmatch; r++) {
+ pmatch[r].rm_so = regs.start[r];
+ pmatch[r].rm_eo = regs.end[r];
+ }
+ }
+
+ /* If we needed the temporary register info, free the space now. */
+ free(regs.start);
+ }
+
+ /* We want zero return to mean success, unlike `re_search'. */
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+
+#ifdef _LIBC
+weak_alias(__regexec, regexec)
+#endif
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+ size_t regerror(errcode, preg, errbuf, errbuf_size)
+int errcode;
+const regex_t *preg;
+char *errbuf;
+size_t errbuf_size;
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (errcode < 0 || errcode >= (int) (sizeof(re_error_msgid_idx)
+ / sizeof(re_error_msgid_idx[0])))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort();
+
+ msg = gettext(re_error_msgid + re_error_msgid_idx[errcode]);
+
+ msg_size = strlen(msg) + 1; /* Includes the null. */
+
+ if (errbuf_size != 0) {
+ if (msg_size > errbuf_size) {
+#if defined HAVE_MEMPCPY || defined _LIBC
+ *((char *) __mempcpy(errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+ memcpy(errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+#endif
+ } else
+ memcpy(errbuf, msg, msg_size);
+ }
+
+ return msg_size;
+}
+
+#ifdef _LIBC
+weak_alias(__regerror, regerror)
+#endif
+/* Free dynamically allocated space used by PREG. */
+void regfree(preg)
+regex_t *preg;
+{
+ if (preg->buffer != NULL)
+ free(preg->buffer);
+ preg->buffer = NULL;
+
+ preg->allocated = 0;
+ preg->used = 0;
+
+ if (preg->fastmap != NULL)
+ free(preg->fastmap);
+ preg->fastmap = NULL;
+ preg->fastmap_accurate = 0;
+
+ if (preg->translate != NULL)
+ free(preg->translate);
+ preg->translate = NULL;
+}
+
+#ifdef _LIBC
+weak_alias(__regfree, regfree)
+#endif
+#endif /* not emacs */
diff --git a/libc/misc/regex/rx.c b/libc/misc/regex/rx.c
deleted file mode 100644
index 39f77adb6..000000000
--- a/libc/misc/regex/rx.c
+++ /dev/null
@@ -1,7273 +0,0 @@
-/* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of the librx library.
-
-Librx is free software; you can redistribute it and/or modify it under
-the terms of the GNU Library General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-Librx is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU Library General Public
-License along with this software; see the file COPYING.LIB. If not,
-write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA
-02139, USA. */
-
-/* NOTE!!! AIX is so losing it requires this to be the first thing in the
- * file.
- * Do not put ANYTHING before it!
- */
-#if !defined (__GNUC__) && defined (_AIX)
-#pragma alloca
-#endif
-
-/* To make linux happy? */
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#ifndef isgraph
-#define isgraph(c) (isprint (c) && !isspace (c))
-#endif
-#ifndef isblank
-#define isblank(c) ((c) == ' ' || (c) == '\t')
-#endif
-
-#include <sys/types.h>
-
-#undef MAX
-#undef MIN
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-
-typedef char boolean;
-
-#define false 0
-#define true 1
-
-#ifndef __GCC__
-#undef __inline__
-#define __inline__
-#endif
-
-/* Emacs already defines alloca, sometimes. */
-#ifndef alloca
-
-/* Make alloca work the best possible way. */
-#ifdef __GNUC__
-#define alloca __builtin_alloca
-#else /* not __GNUC__ */
-#if HAVE_ALLOCA_H
-#include <alloca.h>
-#else /* not __GNUC__ or HAVE_ALLOCA_H */
-#ifndef _AIX /* Already did AIX, up at the top. */
-char *alloca();
-#endif /* not _AIX */
-#endif /* not HAVE_ALLOCA_H */
-#endif /* not __GNUC__ */
-
-#endif /* not alloca */
-
-/* Memory management and stuff for emacs. */
-
-#define CHARBITS 8
-#define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
-
-
-/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
- * use `alloca' instead of `malloc' for the backtracking stack.
- *
- * Emacs will die miserably if we don't do this.
- */
-
-#ifdef REGEX_MALLOC
-#define REGEX_ALLOCATE malloc
-#else /* not REGEX_MALLOC */
-#define REGEX_ALLOCATE alloca
-#endif /* not REGEX_MALLOC */
-
-
-#ifdef RX_WANT_RX_DEFS
-#define RX_DECL extern
-#define RX_DEF_QUAL
-#else
-#define RX_WANT_RX_DEFS
-#define RX_DECL static
-#define RX_DEF_QUAL static
-#endif
-
-#include <regex.h>
-#undef RX_DECL
-#define RX_DECL RX_DEF_QUAL
-
-
-/*
- * Prototypes.
- */
-#ifdef __STDC__
-RX_DECL struct rx_hash_item
-*rx_hash_find(struct rx_hash *, unsigned long,
-
- void *, struct rx_hash_rules *);
-RX_DECL struct rx_hash_item
-*rx_hash_find(struct rx_hash *, unsigned long,
-
- void *, struct rx_hash_rules *);
-RX_DECL struct rx_hash_item
-*rx_hash_store(struct rx_hash *, unsigned long,
-
- void *, struct rx_hash_rules *);
-RX_DECL void rx_hash_free(struct rx_hash_item *, struct rx_hash_rules *);
-RX_DECL void rx_free_hash_table(struct rx_hash *, rx_hash_freefn,
-
- struct rx_hash_rules *);
-RX_DECL rx_Bitset rx_cset(struct rx *);
-RX_DECL rx_Bitset rx_copy_cset(struct rx *, rx_Bitset);
-RX_DECL void rx_free_cset(struct rx *, rx_Bitset);
-static struct rx_hash_item
-*compiler_hash_item_alloc(struct rx_hash_rules *, void *);
-static struct rx_hash
-*compiler_hash_alloc(struct rx_hash_rules *);
-static void compiler_free_hash(struct rx_hash *, struct rx_hash_rules *);
-static void compiler_free_hash_item(struct rx_hash_item *,
-
- struct rx_hash_rules *);
-RX_DECL struct rexp_node
-*rexp_node(struct rx *, enum rexp_node_type);
-RX_DECL struct rexp_node
-*rx_mk_r_cset(struct rx *, rx_Bitset);
-RX_DECL struct rexp_node
-*rx_mk_r_concat(struct rx *, struct rexp_node *, struct rexp_node *);
-RX_DECL struct rexp_node
-*rx_mk_r_alternate(struct rx *, struct rexp_node *, struct rexp_node *);
-RX_DECL struct rexp_node
-*rx_mk_r_alternate(struct rx *, struct rexp_node *, struct rexp_node *);
-RX_DECL struct rexp_node
-*rx_mk_r_opt(struct rx *, struct rexp_node *);
-RX_DECL struct rexp_node
-*rx_mk_r_star(struct rx *, struct rexp_node *);
-RX_DECL struct rexp_node
-*rx_mk_r_2phase_star(struct rx *, struct rexp_node *, struct rexp_node *);
-RX_DECL struct rexp_node
-*rx_mk_r_side_effect(struct rx *, rx_side_effect);
-
-//RX_DECL struct rexp_node
-// *rx_mk_r_data (struct rx *, void *);
-RX_DECL void rx_free_rexp(struct rx *, struct rexp_node *);
-RX_DECL struct rexp_node
-*rx_copy_rexp(struct rx *, struct rexp_node *);
-RX_DECL struct rx_nfa_state
-*rx_nfa_state(struct rx *);
-RX_DECL void rx_free_nfa_state(struct rx_nfa_state *);
-RX_DECL struct rx_nfa_state
-*rx_id_to_nfa_state(struct rx *, int);
-RX_DECL struct rx_nfa_edge
-*rx_nfa_edge(struct rx *, enum rx_nfa_etype,
-
- struct rx_nfa_state *, struct rx_nfa_state *);
-RX_DECL void rx_free_nfa_edge(struct rx_nfa_edge *);
-static struct rx_possible_future
-*rx_possible_future(struct rx *, struct rx_se_list *);
-static void rx_free_possible_future(struct rx_possible_future *);
-RX_DECL void rx_free_nfa(struct rx *);
-RX_DECL int rx_build_nfa(struct rx *, struct rexp_node *,
- struct rx_nfa_state **, struct rx_nfa_state **);
-RX_DECL void rx_name_nfa_states(struct rx *);
-static int se_list_cmp(void *, void *);
-static int se_list_equal(void *, void *);
-static struct rx_se_list
-*hash_cons_se_prog(struct rx *, struct rx_hash *,
-
- void *, struct rx_se_list *);
-static struct rx_se_list
-*hash_se_prog(struct rx *, struct rx_hash *, struct rx_se_list *);
-static int nfa_set_cmp(void *, void *);
-static int nfa_set_equal(void *, void *);
-static struct rx_nfa_state_set
-*nfa_set_cons(struct rx *, struct rx_hash *,
-
- struct rx_nfa_state *, struct rx_nfa_state_set *);
-static struct rx_nfa_state_set
-*nfa_set_enjoin(struct rx *, struct rx_hash *,
-
- struct rx_nfa_state *, struct rx_nfa_state_set *);
-#endif
-
-#ifndef emacs
-
-#ifdef SYNTAX_TABLE
-extern char *re_syntax_table;
-#else /* not SYNTAX_TABLE */
-
-#ifndef RX_WANT_RX_DEFS
-RX_DECL char re_syntax_table[CHAR_SET_SIZE];
-#endif
-
-#ifdef __STDC__
-static void init_syntax_once(void)
-#else
-static void init_syntax_once()
-#endif
-{
- register int c;
- static int done = 0;
-
- if (done)
- return;
-
- bzero(re_syntax_table, sizeof re_syntax_table);
-
- for (c = 'a'; c <= 'z'; c++)
- re_syntax_table[c] = Sword;
-
- for (c = 'A'; c <= 'Z'; c++)
- re_syntax_table[c] = Sword;
-
- for (c = '0'; c <= '9'; c++)
- re_syntax_table[c] = Sword;
-
- re_syntax_table['_'] = Sword;
-
- done = 1;
-}
-#endif /* not SYNTAX_TABLE */
-#endif /* not emacs */
-
-/* Compile with `-DRX_DEBUG' and use the following flags.
- *
- * Debugging flags:
- * rx_debug - print information as a regexp is compiled
- * rx_debug_trace - print information as a regexp is executed
- */
-
-#ifdef RX_DEBUG
-
-int rx_debug_compile = 0;
-int rx_debug_trace = 0;
-static struct re_pattern_buffer *dbug_rxb = 0;
-
-
-/*
- * More Prototypes
- */
-#ifdef __STDC__
-typedef void (*side_effect_printer) (struct rx *, void *, FILE *);
-static void print_cset(struct rx *, rx_Bitset, FILE *);
-static void print_rexp(struct rx *, struct rexp_node *, int,
- side_effect_printer, FILE *);
-static void print_nfa(struct rx *, struct rx_nfa_state *,
- side_effect_printer, FILE *);
-static void re_seprint(struct rx *, void *, FILE *);
-void print_compiled_pattern(struct re_pattern_buffer *);
-void print_fastmap(char *);
-#else
-typedef void (*side_effect_printer) ();
-static void print_cset();
-#endif
-
-#ifdef __STDC__
-static void
-print_rexp(struct rx *rx,
- struct rexp_node *node, int depth,
- side_effect_printer seprint, FILE * fp)
-#else
-static void print_rexp(rx, node, depth, seprint, fp)
-struct rx *rx;
-struct rexp_node *node;
-int depth;
-side_effect_printer seprint;
-FILE *fp;
-#endif
-{
- if (!node)
- return;
- else {
- switch (node->type) {
- case r_cset:
- {
- fprintf(fp, "%*s", depth, "");
- print_cset(rx, node->params.cset, fp);
- fputc('\n', fp);
- break;
- }
-
- case r_opt:
- case r_star:
- fprintf(fp, "%*s%s\n", depth, "",
- node->type == r_opt ? "opt" : "star");
- print_rexp(rx, node->params.pair.left, depth + 3, seprint, fp);
- break;
-
- case r_2phase_star:
- fprintf(fp, "%*s2phase star\n", depth, "");
- print_rexp(rx, node->params.pair.right, depth + 3, seprint,
- fp);
- print_rexp(rx, node->params.pair.left, depth + 3, seprint, fp);
- break;
-
-
- case r_alternate:
- case r_concat:
- fprintf(fp, "%*s%s\n", depth, "",
- node->type == r_alternate ? "alt" : "concat");
- print_rexp(rx, node->params.pair.left, depth + 3, seprint, fp);
- print_rexp(rx, node->params.pair.right, depth + 3, seprint,
- fp);
- break;
- case r_side_effect:
- fprintf(fp, "%*sSide effect: ", depth, "");
- seprint(rx, node->params.side_effect, fp);
- fputc('\n', fp);
- }
- }
-}
-
-#ifdef __STDC__
-static void
-print_nfa(struct rx *rx,
- struct rx_nfa_state *n, side_effect_printer seprint, FILE * fp)
-#else
-static void print_nfa(rx, n, seprint, fp)
-struct rx *rx;
-struct rx_nfa_state *n;
-side_effect_printer seprint;
-FILE *fp;
-#endif
-{
- while (n) {
- struct rx_nfa_edge *e = n->edges;
- struct rx_possible_future *ec = n->futures;
-
- fprintf(fp, "node %d %s\n", n->id,
- n->is_final ? "final" : (n->is_start ? "start" : ""));
- while (e) {
- fprintf(fp, " edge to %d, ", e->dest->id);
- switch (e->type) {
- case ne_epsilon:
- fprintf(fp, "epsilon\n");
- break;
- case ne_side_effect:
- fprintf(fp, "side effect ");
- seprint(rx, e->params.side_effect, fp);
- fputc('\n', fp);
- break;
- case ne_cset:
- fprintf(fp, "cset ");
- print_cset(rx, e->params.cset, fp);
- fputc('\n', fp);
- break;
- }
- e = e->next;
- }
-
- while (ec) {
- int x;
- struct rx_nfa_state_set *s;
- struct rx_se_list *l;
-
- fprintf(fp, " eclosure to {");
- for (s = ec->destset; s; s = s->cdr)
- fprintf(fp, "%d ", s->car->id);
- fprintf(fp, "} (");
- for (l = ec->effects; l; l = l->cdr) {
- seprint(rx, l->car, fp);
- fputc(' ', fp);
- }
- fprintf(fp, ")\n");
- ec = ec->next;
- }
- n = n->next;
- }
-}
-
-static char *efnames[] = {
- "bogon",
- "re_se_try",
- "re_se_pushback",
- "re_se_push0",
- "re_se_pushpos",
- "re_se_chkpos",
- "re_se_poppos",
- "re_se_at_dot",
- "re_se_syntax",
- "re_se_not_syntax",
- "re_se_begbuf",
- "re_se_hat",
- "re_se_wordbeg",
- "re_se_wordbound",
- "re_se_notwordbound",
- "re_se_wordend",
- "re_se_endbuf",
- "re_se_dollar",
- "re_se_fail",
-};
-
-static char *efnames2[] = {
- "re_se_win",
- "re_se_lparen",
- "re_se_rparen",
- "re_se_backref",
- "re_se_iter",
- "re_se_end_iter",
- "re_se_tv"
-};
-
-static char *inx_names[] = {
- "rx_backtrack_point",
- "rx_do_side_effects",
- "rx_cache_miss",
- "rx_next_char",
- "rx_backtrack",
- "rx_error_inx",
- "rx_num_instructions"
-};
-
-
-#ifdef __STDC__
-static void re_seprint(struct rx *rx, void *effect, FILE * fp)
-#else
-static void re_seprint(rx, effect, fp)
-struct rx *rx;
-void *effect;
-FILE *fp;
-#endif
-{
- if ((int) effect < 0)
- fputs(efnames[-(int) effect], fp);
- else if (dbug_rxb) {
- struct re_se_params *p = &dbug_rxb->se_params[(int) effect];
-
- fprintf(fp, "%s(%d,%d)", efnames2[p->se], p->op1, p->op2);
- } else
- fprintf(fp, "[complex op # %d]", (int) effect);
-}
-
-/* These are so the regex.c regression tests will compile. */
-void print_compiled_pattern(rxb)
-struct re_pattern_buffer *rxb;
-{
-}
-
-void print_fastmap(fm)
-char *fm;
-{
-}
-
-#endif /* RX_DEBUG */
-
-
-
-/* This page: Bitsets. Completely unintersting. */
-
-//RX_DECL int rx_bitset_is_equal (int, rx_Bitset, rx_Bitset);
-RX_DECL int rx_bitset_is_subset(int, rx_Bitset, rx_Bitset);
-
-//RX_DECL int rx_bitset_empty (int, rx_Bitset);
-RX_DECL void rx_bitset_null(int, rx_Bitset);
-RX_DECL void rx_bitset_complement(int, rx_Bitset);
-RX_DECL void rx_bitset_complement(int, rx_Bitset);
-RX_DECL void rx_bitset_assign(int, rx_Bitset, rx_Bitset);
-RX_DECL void rx_bitset_union(int, rx_Bitset, rx_Bitset);
-RX_DECL void rx_bitset_intersection(int, rx_Bitset, rx_Bitset);
-RX_DECL void rx_bitset_difference(int, rx_Bitset, rx_Bitset);
-
-//RX_DECL void rx_bitset_revdifference (int, rx_Bitset, rx_Bitset);
-#ifdef emacs
-RX_DECL void rx_bitset_xor(int, rx_Bitset, rx_Bitset);
-#endif
-RX_DECL unsigned long rx_bitset_hash(int, rx_Bitset);
-
-#if 0
-#ifdef __STDC__
-RX_DECL int rx_bitset_is_equal(int size, rx_Bitset a, rx_Bitset b)
-#else
-RX_DECL int rx_bitset_is_equal(size, a, b)
-int size;
-rx_Bitset a;
-rx_Bitset b;
-#endif
-{
- int x;
- RX_subset s = b[0];
-
- b[0] = ~a[0];
-
- for (x = rx_bitset_numb_subsets(size) - 1; a[x] == b[x]; --x);
-
- b[0] = s;
- return !x && s == a[0];
-}
-#endif
-
-#ifdef __STDC__
-RX_DECL int rx_bitset_is_subset(int size, rx_Bitset a, rx_Bitset b)
-#else
-RX_DECL int rx_bitset_is_subset(size, a, b)
-int size;
-rx_Bitset a;
-rx_Bitset b;
-#endif
-{
- int x = rx_bitset_numb_subsets(size) - 1;
-
- while (x-- && (a[x] & b[x]) == a[x]);
- return x == -1;
-}
-
-#if 0
-#ifdef __STDC__
-RX_DECL int rx_bitset_empty(int size, rx_Bitset set)
-#else
-RX_DECL int rx_bitset_empty(size, set)
-int size;
-rx_Bitset set;
-#endif
-{
- int x;
- RX_subset s = set[0];
-
- set[0] = 1;
- for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x);
- set[0] = s;
- return !s;
-}
-#endif
-
-#ifdef __STDC__
-RX_DECL void rx_bitset_null(int size, rx_Bitset b)
-#else
-RX_DECL void rx_bitset_null(size, b)
-int size;
-rx_Bitset b;
-#endif
-{
- bzero(b, rx_sizeof_bitset(size));
-}
-
-
-#ifdef __STDC__
-RX_DECL void rx_bitset_universe(int size, rx_Bitset b)
-#else
-RX_DECL void rx_bitset_universe(size, b)
-int size;
-rx_Bitset b;
-#endif
-{
- int x = rx_bitset_numb_subsets(size);
-
- while (x--)
- *b++ = ~(RX_subset) 0;
-}
-
-
-#ifdef __STDC__
-RX_DECL void rx_bitset_complement(int size, rx_Bitset b)
-#else
-RX_DECL void rx_bitset_complement(size, b)
-int size;
-rx_Bitset b;
-#endif
-{
- int x = rx_bitset_numb_subsets(size);
-
- while (x--) {
- *b = ~*b;
- ++b;
- }
-}
-
-
-#ifdef __STDC__
-RX_DECL void rx_bitset_assign(int size, rx_Bitset a, rx_Bitset b)
-#else
-RX_DECL void rx_bitset_assign(size, a, b)
-int size;
-rx_Bitset a;
-rx_Bitset b;
-#endif
-{
- int x;
-
- for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
- a[x] = b[x];
-}
-
-#ifdef __STDC__
-RX_DECL void rx_bitset_union(int size, rx_Bitset a, rx_Bitset b)
-#else
-RX_DECL void rx_bitset_union(size, a, b)
-int size;
-rx_Bitset a;
-rx_Bitset b;
-#endif
-{
- int x;
-
- for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
- a[x] |= b[x];
-}
-
-
-#ifdef __STDC__
-RX_DECL void rx_bitset_intersection(int size, rx_Bitset a, rx_Bitset b)
-#else
-RX_DECL void rx_bitset_intersection(size, a, b)
-int size;
-rx_Bitset a;
-rx_Bitset b;
-#endif
-{
- int x;
-
- for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
- a[x] &= b[x];
-}
-
-
-#ifdef __STDC__
-RX_DECL void rx_bitset_difference(int size, rx_Bitset a, rx_Bitset b)
-#else
-RX_DECL void rx_bitset_difference(size, a, b)
-int size;
-rx_Bitset a;
-rx_Bitset b;
-#endif
-{
- int x;
-
- for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
- a[x] &= ~b[x];
-}
-
-
-#if 0
-#ifdef __STDC__
-RX_DECL void rx_bitset_revdifference(int size, rx_Bitset a, rx_Bitset b)
-#else
-RX_DECL void rx_bitset_revdifference(size, a, b)
-int size;
-rx_Bitset a;
-rx_Bitset b;
-#endif
-{
- int x;
-
- for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
- a[x] = ~a[x] & b[x];
-}
-#endif
-
-
-#ifdef emacs
-#ifdef __STDC__
-RX_DECL void rx_bitset_xor(int size, rx_Bitset a, rx_Bitset b)
-#else
-RX_DECL void rx_bitset_xor(size, a, b)
-int size;
-rx_Bitset a;
-rx_Bitset b;
-#endif
-{
- int x;
-
- for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
- a[x] ^= b[x];
-}
-#endif
-
-
-#ifdef __STDC__
-RX_DECL unsigned long rx_bitset_hash(int size, rx_Bitset b)
-#else
-RX_DECL unsigned long rx_bitset_hash(size, b)
-int size;
-rx_Bitset b;
-#endif
-{
- int x;
- unsigned long hash = (unsigned long) rx_bitset_hash;
-
- for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
- hash ^= rx_bitset_subset_val(b, x);
-
- return hash;
-}
-
-RX_DECL RX_subset rx_subset_singletons[RX_subset_bits] = {
- 0x1,
- 0x2,
- 0x4,
- 0x8,
- 0x10,
- 0x20,
- 0x40,
- 0x80,
- 0x100,
- 0x200,
- 0x400,
- 0x800,
- 0x1000,
- 0x2000,
- 0x4000,
- 0x8000,
- 0x10000,
- 0x20000,
- 0x40000,
- 0x80000,
- 0x100000,
- 0x200000,
- 0x400000,
- 0x800000,
- 0x1000000,
- 0x2000000,
- 0x4000000,
- 0x8000000,
- 0x10000000,
- 0x20000000,
- 0x40000000,
- 0x80000000
-};
-
-#ifdef RX_DEBUG
-
-#ifdef __STDC__
-static void print_cset(struct rx *rx, rx_Bitset cset, FILE * fp)
-#else
-static void print_cset(rx, cset, fp)
-struct rx *rx;
-rx_Bitset cset;
-FILE *fp;
-#endif
-{
- int x;
-
- fputc('[', fp);
- for (x = 0; x < rx->local_cset_size; ++x)
- if (RX_bitset_member(cset, x)) {
- if (isprint(x))
- fputc(x, fp);
- else
- fprintf(fp, "\\0%o ", x);
- }
- fputc(']', fp);
-}
-
-#endif /* RX_DEBUG */
-
-
-
-static unsigned long rx_hash_masks[4] = {
- 0x12488421,
- 0x96699669,
- 0xbe7dd7eb,
- 0xffffffff
-};
-
-
-/* Hash tables */
-#ifdef __STDC__
-RX_DECL struct rx_hash_item *rx_hash_find(struct rx_hash *table,
- unsigned long hash,
- void *value,
- struct rx_hash_rules *rules)
-#else
-RX_DECL struct rx_hash_item *rx_hash_find(table, hash, value, rules)
-struct rx_hash *table;
-unsigned long hash;
-void *value;
-struct rx_hash_rules *rules;
-#endif
-{
- rx_hash_eq eq = rules->eq;
- int maskc = 0;
- long mask = rx_hash_masks[0];
- int bucket = (hash & mask) % 13;
-
- while (table->children[bucket]) {
- table = table->children[bucket];
- ++maskc;
- mask = rx_hash_masks[maskc];
- bucket = (hash & mask) % 13;
- }
-
- {
- struct rx_hash_item *it = table->buckets[bucket];
-
- while (it)
- if (eq(it->data, value))
- return it;
- else
- it = it->next_same_hash;
- }
-
- return 0;
-}
-
-#ifdef __STDC__
-RX_DECL struct rx_hash_item *rx_hash_store(struct rx_hash *table,
- unsigned long hash,
- void *value,
- struct rx_hash_rules *rules)
-#else
-RX_DECL struct rx_hash_item *rx_hash_store(table, hash, value, rules)
-struct rx_hash *table;
-unsigned long hash;
-void *value;
-struct rx_hash_rules *rules;
-#endif
-{
- rx_hash_eq eq = rules->eq;
- int maskc = 0;
- long mask = rx_hash_masks[0];
- int bucket = (hash & mask) % 13;
- int depth = 0;
-
- while (table->children[bucket]) {
- table = table->children[bucket];
- ++maskc;
- mask = rx_hash_masks[maskc];
- bucket = (hash & mask) % 13;
- ++depth;
- }
-
- {
- struct rx_hash_item *it = table->buckets[bucket];
-
- while (it)
- if (eq(it->data, value))
- return it;
- else
- it = it->next_same_hash;
- }
-
- {
- if ((depth < 3)
- && (table->bucket_size[bucket] >= 4)) {
- struct rx_hash *newtab = ((struct rx_hash *)
- rules->hash_alloc(rules));
-
- if (!newtab)
- goto add_to_bucket;
- bzero(newtab, sizeof(*newtab));
- newtab->parent = table;
- {
- struct rx_hash_item *them = table->buckets[bucket];
- unsigned long newmask = rx_hash_masks[maskc + 1];
-
- while (them) {
- struct rx_hash_item *save = them->next_same_hash;
- int new_buck = (them->hash & newmask) % 13;
-
- them->next_same_hash = newtab->buckets[new_buck];
- newtab->buckets[new_buck] = them;
- them->table = newtab;
- them = save;
- ++newtab->bucket_size[new_buck];
- ++newtab->refs;
- }
- table->refs =
- (table->refs - table->bucket_size[bucket] + 1);
- table->bucket_size[bucket] = 0;
- table->buckets[bucket] = 0;
- table->children[bucket] = newtab;
- table = newtab;
- bucket = (hash & newmask) % 13;
- }
- }
- }
- add_to_bucket:
- {
- struct rx_hash_item *it = ((struct rx_hash_item *)
- rules->hash_item_alloc(rules, value));
-
- if (!it)
- return 0;
- it->hash = hash;
- it->table = table;
- /* DATA and BINDING are to be set in hash_item_alloc */
- it->next_same_hash = table->buckets[bucket];
- table->buckets[bucket] = it;
- ++table->bucket_size[bucket];
- ++table->refs;
- return it;
- }
-}
-
-
-#ifdef __STDC__
-RX_DECL void
-rx_hash_free(struct rx_hash_item *it, struct rx_hash_rules *rules)
-#else
-RX_DECL void rx_hash_free(it, rules)
-struct rx_hash_item *it;
-struct rx_hash_rules *rules;
-#endif
-{
- if (it) {
- struct rx_hash *table = it->table;
- unsigned long hash = it->hash;
- int depth = (table->parent
- ? (table->parent->parent
- ? (table->parent->parent->parent ? 3 : 2)
- : 1)
- : 0);
- int bucket = (hash & rx_hash_masks[depth]) % 13;
- struct rx_hash_item **pos = &table->buckets[bucket];
-
- while (*pos != it)
- pos = &(*pos)->next_same_hash;
- *pos = it->next_same_hash;
- rules->free_hash_item(it, rules);
- --table->bucket_size[bucket];
- --table->refs;
- while (!table->refs && depth) {
- struct rx_hash *save = table;
-
- table = table->parent;
- --depth;
- bucket = (hash & rx_hash_masks[depth]) % 13;
- --table->refs;
- table->children[bucket] = 0;
- rules->free_hash(save, rules);
- }
- }
-}
-
-#ifdef __STDC__
-RX_DECL void
-rx_free_hash_table(struct rx_hash *tab, rx_hash_freefn freefn,
- struct rx_hash_rules *rules)
-#else
-RX_DECL void rx_free_hash_table(tab, freefn, rules)
-struct rx_hash *tab;
-rx_hash_freefn freefn;
-struct rx_hash_rules *rules;
-#endif
-{
- int x;
-
- for (x = 0; x < 13; ++x)
- if (tab->children[x]) {
- rx_free_hash_table(tab->children[x], freefn, rules);
- rules->free_hash(tab->children[x], rules);
- } else {
- struct rx_hash_item *them = tab->buckets[x];
-
- while (them) {
- struct rx_hash_item *that = them;
-
- them = that->next_same_hash;
- freefn(that);
- rules->free_hash_item(that, rules);
- }
- }
-}
-
-
-
-/* Utilities for manipulating bitset represntations of characters sets. */
-
-#ifdef __STDC__
-RX_DECL rx_Bitset rx_cset(struct rx *rx)
-#else
-RX_DECL rx_Bitset rx_cset(rx)
-struct rx *rx;
-#endif
-{
- rx_Bitset b =
-
- (rx_Bitset) malloc(rx_sizeof_bitset(rx->local_cset_size));
- if (b)
- rx_bitset_null(rx->local_cset_size, b);
- return b;
-}
-
-
-#ifdef __STDC__
-RX_DECL rx_Bitset rx_copy_cset(struct rx * rx, rx_Bitset a)
-#else
-RX_DECL rx_Bitset rx_copy_cset(rx, a)
-struct rx *rx;
-rx_Bitset a;
-#endif
-{
- rx_Bitset cs = rx_cset(rx);
-
- if (cs)
- rx_bitset_union(rx->local_cset_size, cs, a);
-
- return cs;
-}
-
-
-#ifdef __STDC__
-RX_DECL void rx_free_cset(struct rx *rx, rx_Bitset c)
-#else
-RX_DECL void rx_free_cset(rx, c)
-struct rx *rx;
-rx_Bitset c;
-#endif
-{
- if (c)
- free((char *) c);
-}
-
-
-/* Hash table memory allocation policy for the regexp compiler */
-
-#ifdef __STDC__
-static struct rx_hash *compiler_hash_alloc(struct rx_hash_rules *rules)
-#else
-static struct rx_hash *compiler_hash_alloc(rules)
-struct rx_hash_rules *rules;
-#endif
-{
- return (struct rx_hash *) malloc(sizeof(struct rx_hash));
-}
-
-
-#ifdef __STDC__
-static struct rx_hash_item *compiler_hash_item_alloc(struct rx_hash_rules
- *rules, void *value)
-#else
-static struct rx_hash_item *compiler_hash_item_alloc(rules, value)
-struct rx_hash_rules *rules;
-void *value;
-#endif
-{
- struct rx_hash_item *it;
-
- it = (struct rx_hash_item *) malloc(sizeof(*it));
- if (it) {
- it->data = value;
- it->binding = 0;
- }
- return it;
-}
-
-#ifdef __STDC__
-static void
-compiler_free_hash(struct rx_hash *tab, struct rx_hash_rules *rules)
-#else
-static void compiler_free_hash(tab, rules)
-struct rx_hash *tab;
-struct rx_hash_rules *rules;
-#endif
-{
- free((char *) tab);
-}
-
-
-#ifdef __STDC__
-static void
-compiler_free_hash_item(struct rx_hash_item *item,
- struct rx_hash_rules *rules)
-#else
-static void compiler_free_hash_item(item, rules)
-struct rx_hash_item *item;
-struct rx_hash_rules *rules;
-#endif
-{
- free((char *) item);
-}
-
-
-/* This page: REXP_NODE (expression tree) structures. */
-
-#ifdef __STDC__
-RX_DECL struct rexp_node *rexp_node(struct rx *rx,
- enum rexp_node_type type)
-#else
-RX_DECL struct rexp_node *rexp_node(rx, type)
-struct rx *rx;
-enum rexp_node_type type;
-#endif
-{
- struct rexp_node *n;
-
- n = (struct rexp_node *) malloc(sizeof(*n));
- if (n) {
- bzero(n, sizeof(*n));
- n->type = type;
- }
- return n;
-}
-
-
-/* free_rexp_node assumes that the bitset passed to rx_mk_r_cset
- * can be freed using rx_free_cset.
- */
-#ifdef __STDC__
-RX_DECL struct rexp_node *rx_mk_r_cset(struct rx *rx, rx_Bitset b)
-#else
-RX_DECL struct rexp_node *rx_mk_r_cset(rx, b)
-struct rx *rx;
-rx_Bitset b;
-#endif
-{
- struct rexp_node *n = rexp_node(rx, r_cset);
-
- if (n)
- n->params.cset = b;
- return n;
-}
-
-#ifdef __STDC__
-RX_DECL struct rexp_node *rx_mk_r_concat(struct rx *rx,
- struct rexp_node *a,
- struct rexp_node *b)
-#else
-RX_DECL struct rexp_node *rx_mk_r_concat(rx, a, b)
-struct rx *rx;
-struct rexp_node *a;
-struct rexp_node *b;
-#endif
-{
- struct rexp_node *n = rexp_node(rx, r_concat);
-
- if (n) {
- n->params.pair.left = a;
- n->params.pair.right = b;
- }
- return n;
-}
-
-
-#ifdef __STDC__
-RX_DECL struct rexp_node *rx_mk_r_alternate(struct rx *rx,
- struct rexp_node *a,
- struct rexp_node *b)
-#else
-RX_DECL struct rexp_node *rx_mk_r_alternate(rx, a, b)
-struct rx *rx;
-struct rexp_node *a;
-struct rexp_node *b;
-#endif
-{
- struct rexp_node *n = rexp_node(rx, r_alternate);
-
- if (n) {
- n->params.pair.left = a;
- n->params.pair.right = b;
- }
- return n;
-}
-
-
-#ifdef __STDC__
-RX_DECL struct rexp_node *rx_mk_r_opt(struct rx *rx, struct rexp_node *a)
-#else
-RX_DECL struct rexp_node *rx_mk_r_opt(rx, a)
-struct rx *rx;
-struct rexp_node *a;
-#endif
-{
- struct rexp_node *n = rexp_node(rx, r_opt);
-
- if (n) {
- n->params.pair.left = a;
- n->params.pair.right = 0;
- }
- return n;
-}
-
-#ifdef __STDC__
-RX_DECL struct rexp_node *rx_mk_r_star(struct rx *rx, struct rexp_node *a)
-#else
-RX_DECL struct rexp_node *rx_mk_r_star(rx, a)
-struct rx *rx;
-struct rexp_node *a;
-#endif
-{
- struct rexp_node *n = rexp_node(rx, r_star);
-
- if (n) {
- n->params.pair.left = a;
- n->params.pair.right = 0;
- }
- return n;
-}
-
-
-#ifdef __STDC__
-RX_DECL struct rexp_node *rx_mk_r_2phase_star(struct rx *rx,
- struct rexp_node *a,
- struct rexp_node *b)
-#else
-RX_DECL struct rexp_node *rx_mk_r_2phase_star(rx, a, b)
-struct rx *rx;
-struct rexp_node *a;
-struct rexp_node *b;
-#endif
-{
- struct rexp_node *n = rexp_node(rx, r_2phase_star);
-
- if (n) {
- n->params.pair.left = a;
- n->params.pair.right = b;
- }
- return n;
-}
-
-#ifdef __STDC__
-RX_DECL struct rexp_node *rx_mk_r_side_effect(struct rx *rx,
- rx_side_effect a)
-#else
-RX_DECL struct rexp_node *rx_mk_r_side_effect(rx, a)
-struct rx *rx;
-rx_side_effect a;
-#endif
-{
- struct rexp_node *n = rexp_node(rx, r_side_effect);
-
- if (n) {
- n->params.side_effect = a;
- n->params.pair.right = 0;
- }
- return n;
-}
-
-
-#if 0
-#ifdef __STDC__
-RX_DECL struct rexp_node *rx_mk_r_data(struct rx *rx, void *a)
-#else
-RX_DECL struct rexp_node *rx_mk_r_data(rx, a)
-struct rx *rx;
-void *a;
-#endif
-{
- struct rexp_node *n = rexp_node(rx, r_data);
-
- if (n) {
- n->params.pair.left = a;
- n->params.pair.right = 0;
- }
- return n;
-}
-#endif
-
-#ifdef __STDC__
-RX_DECL void rx_free_rexp(struct rx *rx, struct rexp_node *node)
-#else
-RX_DECL void rx_free_rexp(rx, node)
-struct rx *rx;
-struct rexp_node *node;
-#endif
-{
- if (node) {
- switch (node->type) {
- case r_cset:
- if (node->params.cset)
- rx_free_cset(rx, node->params.cset);
-
- case r_side_effect:
- break;
-
- case r_concat:
- case r_alternate:
- case r_2phase_star:
- case r_opt:
- case r_star:
- rx_free_rexp(rx, node->params.pair.left);
- rx_free_rexp(rx, node->params.pair.right);
- break;
-
- case r_data:
- /* This shouldn't occur. */
- break;
- }
- free((char *) node);
- }
-}
-
-#ifdef __STDC__
-RX_DECL struct rexp_node *rx_copy_rexp(struct rx *rx,
- struct rexp_node *node)
-#else
-RX_DECL struct rexp_node *rx_copy_rexp(rx, node)
-struct rx *rx;
-struct rexp_node *node;
-#endif
-{
- if (!node)
- return 0;
- else {
- struct rexp_node *n = rexp_node(rx, node->type);
-
- if (!n)
- return 0;
- switch (node->type) {
- case r_cset:
- n->params.cset = rx_copy_cset(rx, node->params.cset);
- if (!n->params.cset) {
- rx_free_rexp(rx, n);
- return 0;
- }
- break;
-
- case r_side_effect:
- n->params.side_effect = node->params.side_effect;
- break;
-
- case r_concat:
- case r_alternate:
- case r_opt:
- case r_2phase_star:
- case r_star:
- n->params.pair.left = rx_copy_rexp(rx, node->params.pair.left);
- n->params.pair.right =
- rx_copy_rexp(rx, node->params.pair.right);
- if ((node->params.pair.left && !n->params.pair.left)
- || (node->params.pair.right && !n->params.pair.right)) {
- rx_free_rexp(rx, n);
- return 0;
- }
- break;
- case r_data:
- /* shouldn't happen */
- break;
- }
- return n;
- }
-}
-
-
-
-/* This page: functions to build and destroy graphs that describe nfa's */
-
-/* Constructs a new nfa node. */
-#ifdef __STDC__
-RX_DECL struct rx_nfa_state *rx_nfa_state(struct rx *rx)
-#else
-RX_DECL struct rx_nfa_state *rx_nfa_state(rx)
-struct rx *rx;
-#endif
-{
- struct rx_nfa_state *n = (struct rx_nfa_state *) malloc(sizeof(*n));
-
- if (!n)
- return 0;
- bzero(n, sizeof(*n));
- n->next = rx->nfa_states;
- rx->nfa_states = n;
- return n;
-}
-
-
-#ifdef __STDC__
-RX_DECL void rx_free_nfa_state(struct rx_nfa_state *n)
-#else
-RX_DECL void rx_free_nfa_state(n)
-struct rx_nfa_state *n;
-#endif
-{
- free((char *) n);
-}
-
-
-/* This looks up an nfa node, given a numeric id. Numeric id's are
- * assigned after the nfa has been built.
- */
-#ifdef __STDC__
-RX_DECL struct rx_nfa_state *rx_id_to_nfa_state(struct rx *rx, int id)
-#else
-RX_DECL struct rx_nfa_state *rx_id_to_nfa_state(rx, id)
-struct rx *rx;
-int id;
-#endif
-{
- struct rx_nfa_state *n;
-
- for (n = rx->nfa_states; n; n = n->next)
- if (n->id == id)
- return n;
- return 0;
-}
-
-
-/* This adds an edge between two nodes, but doesn't initialize the
- * edge label.
- */
-
-#ifdef __STDC__
-RX_DECL struct rx_nfa_edge *rx_nfa_edge(struct rx *rx,
- enum rx_nfa_etype type,
- struct rx_nfa_state *start,
- struct rx_nfa_state *dest)
-#else
-RX_DECL struct rx_nfa_edge *rx_nfa_edge(rx, type, start, dest)
-struct rx *rx;
-enum rx_nfa_etype type;
-struct rx_nfa_state *start;
-struct rx_nfa_state *dest;
-#endif
-{
- struct rx_nfa_edge *e;
-
- e = (struct rx_nfa_edge *) malloc(sizeof(*e));
- if (!e)
- return 0;
- e->next = start->edges;
- start->edges = e;
- e->type = type;
- e->dest = dest;
- return e;
-}
-
-
-#ifdef __STDC__
-RX_DECL void rx_free_nfa_edge(struct rx_nfa_edge *e)
-#else
-RX_DECL void rx_free_nfa_edge(e)
-struct rx_nfa_edge *e;
-#endif
-{
- free((char *) e);
-}
-
-
-/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
- * of an NFA. These are added to an nfa automaticly by eclose_nfa.
- */
-
-#ifdef __STDC__
-static struct rx_possible_future *rx_possible_future(struct rx *rx, struct rx_se_list
- *effects)
-#else
-static struct rx_possible_future *rx_possible_future(rx, effects)
-struct rx *rx;
-struct rx_se_list *effects;
-#endif
-{
- struct rx_possible_future *ec;
-
- ec = (struct rx_possible_future *) malloc(sizeof(*ec));
- if (!ec)
- return 0;
- ec->destset = 0;
- ec->next = 0;
- ec->effects = effects;
- return ec;
-}
-
-
-#ifdef __STDC__
-static void rx_free_possible_future(struct rx_possible_future *pf)
-#else
-static void rx_free_possible_future(pf)
-struct rx_possible_future *pf;
-#endif
-{
- free((char *) pf);
-}
-
-
-#ifdef __STDC__
-RX_DECL void rx_free_nfa(struct rx *rx)
-#else
-RX_DECL void rx_free_nfa(rx)
-struct rx *rx;
-#endif
-{
- while (rx->nfa_states) {
- while (rx->nfa_states->edges) {
- switch (rx->nfa_states->edges->type) {
- case ne_cset:
- rx_free_cset(rx, rx->nfa_states->edges->params.cset);
- break;
- default:
- break;
- }
- {
- struct rx_nfa_edge *e;
-
- e = rx->nfa_states->edges;
- rx->nfa_states->edges = rx->nfa_states->edges->next;
- rx_free_nfa_edge(e);
- }
- } /* while (rx->nfa_states->edges) */
- {
- /* Iterate over the partial epsilon closures of rx->nfa_states */
- struct rx_possible_future *pf = rx->nfa_states->futures;
-
- while (pf) {
- struct rx_possible_future *pft = pf;
-
- pf = pf->next;
- rx_free_possible_future(pft);
- }
- }
- {
- struct rx_nfa_state *n;
-
- n = rx->nfa_states;
- rx->nfa_states = rx->nfa_states->next;
- rx_free_nfa_state(n);
- }
- }
-}
-
-
-
-/* This page: translating a pattern expression into an nfa and doing the
- * static part of the nfa->super-nfa translation.
- */
-
-/* This is the thompson regexp->nfa algorithm.
- * It is modified to allow for `side-effect epsilons.' Those are
- * edges that are taken whenever a similar epsilon edge would be,
- * but which imply that some side effect occurs when the edge
- * is taken.
- *
- * Side effects are used to model parts of the pattern langauge
- * that are not regular (in the formal sense).
- */
-
-#ifdef __STDC__
-RX_DECL int
-rx_build_nfa(struct rx *rx,
- struct rexp_node *rexp,
- struct rx_nfa_state **start, struct rx_nfa_state **end)
-#else
-RX_DECL int rx_build_nfa(rx, rexp, start, end)
-struct rx *rx;
-struct rexp_node *rexp;
-struct rx_nfa_state **start;
-struct rx_nfa_state **end;
-#endif
-{
- struct rx_nfa_edge *edge;
-
- /* Start & end nodes may have been allocated by the caller. */
- *start = *start ? *start : rx_nfa_state(rx);
-
- if (!*start)
- return 0;
-
- if (!rexp) {
- *end = *start;
- return 1;
- }
-
- *end = *end ? *end : rx_nfa_state(rx);
-
- if (!*end) {
- rx_free_nfa_state(*start);
- return 0;
- }
-
- switch (rexp->type) {
- case r_data:
- return 0;
-
- case r_cset:
- edge = rx_nfa_edge(rx, ne_cset, *start, *end);
- if (!edge)
- return 0;
- edge->params.cset = rx_copy_cset(rx, rexp->params.cset);
- if (!edge->params.cset) {
- rx_free_nfa_edge(edge);
- return 0;
- }
- return 1;
-
- case r_opt:
- return (rx_build_nfa(rx, rexp->params.pair.left, start, end)
- && rx_nfa_edge(rx, ne_epsilon, *start, *end));
-
- case r_star:
- {
- struct rx_nfa_state *star_start = 0;
- struct rx_nfa_state *star_end = 0;
-
- return (rx_build_nfa(rx, rexp->params.pair.left,
- &star_start, &star_end)
- && star_start
- && star_end
- && rx_nfa_edge(rx, ne_epsilon, star_start, star_end)
- && rx_nfa_edge(rx, ne_epsilon, *start, star_start)
- && rx_nfa_edge(rx, ne_epsilon, star_end, *end)
-
- && rx_nfa_edge(rx, ne_epsilon, star_end, star_start));
- }
-
- case r_2phase_star:
- {
- struct rx_nfa_state *star_start = 0;
- struct rx_nfa_state *star_end = 0;
- struct rx_nfa_state *loop_exp_start = 0;
- struct rx_nfa_state *loop_exp_end = 0;
-
- return (rx_build_nfa(rx, rexp->params.pair.left,
- &star_start, &star_end)
- && rx_build_nfa(rx, rexp->params.pair.right,
- &loop_exp_start, &loop_exp_end)
- && star_start
- && star_end
- && loop_exp_end
- && loop_exp_start
- && rx_nfa_edge(rx, ne_epsilon, star_start, *end)
- && rx_nfa_edge(rx, ne_epsilon, *start, star_start)
- && rx_nfa_edge(rx, ne_epsilon, star_end, *end)
-
- && rx_nfa_edge(rx, ne_epsilon, star_end, loop_exp_start)
- && rx_nfa_edge(rx, ne_epsilon, loop_exp_end, star_start));
- }
-
-
- case r_concat:
- {
- struct rx_nfa_state *shared = 0;
-
- return (rx_build_nfa(rx, rexp->params.pair.left, start, &shared)
- && rx_build_nfa(rx, rexp->params.pair.right, &shared,
- end));
- }
-
- case r_alternate:
- {
- struct rx_nfa_state *ls = 0;
- struct rx_nfa_state *le = 0;
- struct rx_nfa_state *rs = 0;
- struct rx_nfa_state *re = 0;
-
- return (rx_build_nfa(rx, rexp->params.pair.left, &ls, &le)
- && rx_build_nfa(rx, rexp->params.pair.right, &rs, &re)
- && rx_nfa_edge(rx, ne_epsilon, *start, ls)
- && rx_nfa_edge(rx, ne_epsilon, *start, rs)
- && rx_nfa_edge(rx, ne_epsilon, le, *end)
- && rx_nfa_edge(rx, ne_epsilon, re, *end));
- }
-
- case r_side_effect:
- edge = rx_nfa_edge(rx, ne_side_effect, *start, *end);
- if (!edge)
- return 0;
- edge->params.side_effect = rexp->params.side_effect;
- return 1;
- }
-
- /* this should never happen */
- return 0;
-}
-
-
-/* RX_NAME_NFA_STATES identifies all nodes with outgoing non-epsilon
- * transitions. Only these nodes can occur in super-states.
- * All nodes are given an integer id.
- * The id is non-negative if the node has non-epsilon out-transitions, negative
- * otherwise (this is because we want the non-negative ids to be used as
- * array indexes in a few places).
- */
-
-#ifdef __STDC__
-RX_DECL void rx_name_nfa_states(struct rx *rx)
-#else
-RX_DECL void rx_name_nfa_states(rx)
-struct rx *rx;
-#endif
-{
- struct rx_nfa_state *n = rx->nfa_states;
-
- rx->nodec = 0;
- rx->epsnodec = -1;
-
- while (n) {
- struct rx_nfa_edge *e = n->edges;
-
- if (n->is_start)
- n->eclosure_needed = 1;
-
- while (e) {
- switch (e->type) {
- case ne_epsilon:
- case ne_side_effect:
- break;
-
- case ne_cset:
- n->id = rx->nodec++;
- {
- struct rx_nfa_edge *from_n = n->edges;
-
- while (from_n) {
- from_n->dest->eclosure_needed = 1;
- from_n = from_n->next;
- }
- }
- goto cont;
- }
- e = e->next;
- }
- n->id = rx->epsnodec--;
- cont:
- n = n->next;
- }
- rx->epsnodec = -rx->epsnodec;
-}
-
-
-/* This page: data structures for the static part of the nfa->supernfa
- * translation.
- *
- * There are side effect lists -- lists of side effects occuring
- * along an uninterrupted, acyclic path of side-effect epsilon edges.
- * Such paths are collapsed to single edges in the course of computing
- * epsilon closures. Such single edges are labled with a list of all
- * the side effects entailed in crossing them. Like lists of side
- * effects are made == by the constructors below.
- *
- * There are also nfa state sets. These are used to hold a list of all
- * states reachable from a starting state for a given type of transition
- * and side effect list. These are also hash-consed.
- */
-
-/* The next several functions compare, construct, etc. lists of side
- * effects. See ECLOSE_NFA (below) for details.
- */
-
-/* Ordering of rx_se_list
- * (-1, 0, 1 return value convention).
- */
-
-#ifdef __STDC__
-static int se_list_cmp(void *va, void *vb)
-#else
-static int se_list_cmp(va, vb)
-void *va;
-void *vb;
-#endif
-{
- struct rx_se_list *a = (struct rx_se_list *) va;
- struct rx_se_list *b = (struct rx_se_list *) vb;
-
- return ((va == vb)
- ? 0
- : (!va
- ? -1
- : (!vb
- ? 1
- : ((long) a->car < (long) b->car
- ? 1
- : ((long) a->car > (long) b->car
- ? -1
- : se_list_cmp((void *) a->cdr,
- (void *) b->cdr))))));
-}
-
-
-#ifdef __STDC__
-static int se_list_equal(void *va, void *vb)
-#else
-static int se_list_equal(va, vb)
-void *va;
-void *vb;
-#endif
-{
- return !(se_list_cmp(va, vb));
-}
-
-static struct rx_hash_rules se_list_hash_rules = {
- se_list_equal,
- compiler_hash_alloc,
- compiler_free_hash,
- compiler_hash_item_alloc,
- compiler_free_hash_item
-};
-
-
-#ifdef __STDC__
-static struct rx_se_list *side_effect_cons(struct rx *rx,
- void *se,
- struct rx_se_list *list)
-#else
-static struct rx_se_list *side_effect_cons(rx, se, list)
-struct rx *rx;
-void *se;
-struct rx_se_list *list;
-#endif
-{
- struct rx_se_list *l;
-
- l = ((struct rx_se_list *) malloc(sizeof(*l)));
- if (!l)
- return 0;
- l->car = se;
- l->cdr = list;
- return l;
-}
-
-
-#ifdef __STDC__
-static struct rx_se_list *hash_cons_se_prog(struct rx *rx,
- struct rx_hash *memo,
- void *car,
- struct rx_se_list *cdr)
-#else
-static struct rx_se_list *hash_cons_se_prog(rx, memo, car, cdr)
-struct rx *rx;
-struct rx_hash *memo;
-void *car;
-struct rx_se_list *cdr;
-#endif
-{
- long hash = (long) car ^ (long) cdr;
- struct rx_se_list template;
-
- template.car = car;
- template.cdr = cdr;
- {
- struct rx_hash_item *it = rx_hash_store(memo, hash,
- (void *) &template,
- &se_list_hash_rules);
-
- if (!it)
- return 0;
- if (it->data == (void *) &template) {
- struct rx_se_list *consed;
-
- consed = (struct rx_se_list *) malloc(sizeof(*consed));
- if (!consed) {
- free((char *) it);
- return 0;
- }
- *consed = template;
- it->data = (void *) consed;
- }
- return (struct rx_se_list *) it->data;
- }
-}
-
-
-#ifdef __STDC__
-static struct rx_se_list *hash_se_prog(struct rx *rx, struct rx_hash *memo,
- struct rx_se_list *prog)
-#else
-static struct rx_se_list *hash_se_prog(rx, memo, prog)
-struct rx *rx;
-struct rx_hash *memo;
-struct rx_se_list *prog;
-#endif
-{
- struct rx_se_list *answer = 0;
-
- while (prog) {
- answer = hash_cons_se_prog(rx, memo, prog->car, answer);
- if (!answer)
- return 0;
- prog = prog->cdr;
- }
- return answer;
-}
-
-#ifdef __STDC__
-static int nfa_set_cmp(void *va, void *vb)
-#else
-static int nfa_set_cmp(va, vb)
-void *va;
-void *vb;
-#endif
-{
- struct rx_nfa_state_set *a = (struct rx_nfa_state_set *) va;
- struct rx_nfa_state_set *b = (struct rx_nfa_state_set *) vb;
-
- return ((va == vb)
- ? 0
- : (!va
- ? -1
- : (!vb
- ? 1
- : (a->car->id < b->car->id
- ? 1
- : (a->car->id > b->car->id
- ? -1
- : nfa_set_cmp((void *) a->cdr,
- (void *) b->cdr))))));
-}
-
-#ifdef __STDC__
-static int nfa_set_equal(void *va, void *vb)
-#else
-static int nfa_set_equal(va, vb)
-void *va;
-void *vb;
-#endif
-{
- return !nfa_set_cmp(va, vb);
-}
-
-static struct rx_hash_rules nfa_set_hash_rules = {
- nfa_set_equal,
- compiler_hash_alloc,
- compiler_free_hash,
- compiler_hash_item_alloc,
- compiler_free_hash_item
-};
-
-
-#ifdef __STDC__
-static struct rx_nfa_state_set *nfa_set_cons(struct rx *rx,
- struct rx_hash *memo,
- struct rx_nfa_state *state,
- struct rx_nfa_state_set *set)
-#else
-static struct rx_nfa_state_set *nfa_set_cons(rx, memo, state, set)
-struct rx *rx;
-struct rx_hash *memo;
-struct rx_nfa_state *state;
-struct rx_nfa_state_set *set;
-#endif
-{
- struct rx_nfa_state_set template;
- struct rx_hash_item *node;
-
- template.car = state;
- template.cdr = set;
- node = rx_hash_store(memo,
- (((long) state) >> 8) ^ (long) set,
- &template, &nfa_set_hash_rules);
- if (!node)
- return 0;
- if (node->data == &template) {
- struct rx_nfa_state_set *l;
-
- l = (struct rx_nfa_state_set *) malloc(sizeof(*l));
- node->data = (void *) l;
- if (!l)
- return 0;
- *l = template;
- }
- return (struct rx_nfa_state_set *) node->data;
-}
-
-#ifdef __STDC__
-static struct rx_nfa_state_set *nfa_set_enjoin(struct rx *rx,
- struct rx_hash *memo,
- struct rx_nfa_state *state,
- struct rx_nfa_state_set
- *set)
-#else
-static struct rx_nfa_state_set *nfa_set_enjoin(rx, memo, state, set)
-struct rx *rx;
-struct rx_hash *memo;
-struct rx_nfa_state *state;
-struct rx_nfa_state_set *set;
-#endif
-{
- if (!set || state->id < set->car->id)
- return nfa_set_cons(rx, memo, state, set);
- if (state->id == set->car->id)
- return set;
- else {
- struct rx_nfa_state_set *newcdr
-
- = nfa_set_enjoin(rx, memo, state, set->cdr);
- if (newcdr != set->cdr)
- set = nfa_set_cons(rx, memo, set->car, newcdr);
- return set;
- }
-}
-
-
-
-/* This page: computing epsilon closures. The closures aren't total.
- * Each node's closures are partitioned according to the side effects entailed
- * along the epsilon edges. Return true on success.
- */
-
-struct eclose_frame {
- struct rx_se_list *prog_backwards;
-};
-static int eclose_node(struct rx *, struct rx_nfa_state *,
- struct rx_nfa_state *, struct eclose_frame *);
-RX_DECL int rx_eclose_nfa(struct rx *);
-RX_DECL void rx_delete_epsilon_transitions(struct rx *);
-static int nfacmp(void *, void *);
-static int count_hash_nodes(struct rx_hash *);
-static void nfa_set_freer(struct rx_hash_item *);
-RX_DECL int rx_compactify_nfa(struct rx *, void **, unsigned long *);
-static char *rx_cache_malloc(struct rx_cache *, int);
-static void rx_cache_free(struct rx_cache *,
-
- struct rx_freelist **, char *);
-static void install_transition(struct rx_superstate *,
-
- struct rx_inx *, rx_Bitset);
-static int qlen(struct rx_superstate *);
-static void check_cache(struct rx_cache *);
-static void semifree_superstate(struct rx_cache *);
-static void refresh_semifree_superstate
-
- (struct rx_cache *, struct rx_superstate *);
-static void rx_refresh_this_superstate
-
- (struct rx_cache *, struct rx_superstate *);
-static void release_superset_low(struct rx_cache *, struct rx_superset *);
-RX_DECL void rx_release_superset(struct rx *, struct rx_superset *);
-static int rx_really_free_superstate(struct rx_cache *);
-static char *rx_cache_get(struct rx_cache *, struct rx_freelist **);
-static char *rx_cache_malloc_or_get(struct rx_cache *,
- struct rx_freelist **, int);
-static char *rx_cache_get_superstate(struct rx_cache *);
-static int supersetcmp(void *, void *);
-static struct rx_hash_item
-*superset_allocator(struct rx_hash_rules *, void *);
-static struct rx_hash
-*super_hash_allocator(struct rx_hash_rules *);
-static void super_hash_liberator(struct rx_hash *, struct rx_hash_rules *);
-static void superset_hash_item_liberator
-
- (struct rx_hash_item *, struct rx_hash_rules *);
-static int bytes_for_cache_size(int, int);
-static void rx_morecore(struct rx_cache *);
-RX_DECL struct rx_superset
-*rx_superset_cons(struct rx *, struct rx_nfa_state *,
-
- struct rx_superset *);
-RX_DECL struct rx_superset
-*rx_superstate_eclosure_union
-
- (struct rx *, struct rx_superset *, struct rx_nfa_state_set *);
-static struct rx_distinct_future
-*include_futures(struct rx *,
- struct rx_distinct_future *,
-
- struct rx_nfa_state *, struct rx_superstate *);
-RX_DECL struct rx_superstate
-*rx_superstate(struct rx *, struct rx_superset *);
-static int solve_destination(struct rx *, struct rx_distinct_future *);
-static int compute_super_edge(struct rx *,
- struct rx_distinct_future **,
-
- rx_Bitset, struct rx_superstate *,
- unsigned char);
-static struct rx_super_edge
-*rx_super_edge(struct rx *, struct rx_superstate *,
-
- rx_Bitset, struct rx_distinct_future *);
-static void install_partial_transition
- (struct rx_superstate *, struct rx_inx *, RX_subset, int);
-RX_DECL struct rx_inx
-*rx_handle_cache_miss(struct rx *, struct rx_superstate *,
-
- unsigned char, void *);
-static boolean
-
-at_begline_loc_p(__const__ char *, __const__ char *, reg_syntax_t);
-static boolean at_endline_loc_p(__const__ char *, __const__ char *, int);
-static rx_Bitset
-inverse_translation(struct re_pattern_buffer *, char *,
- rx_Bitset, unsigned char *, int);
-
-
-#ifdef __STDC__
-static int
-eclose_node(struct rx *rx, struct rx_nfa_state *outnode,
- struct rx_nfa_state *node, struct eclose_frame *frame)
-#else
-static int eclose_node(rx, outnode, node, frame)
-struct rx *rx;
-struct rx_nfa_state *outnode;
-struct rx_nfa_state *node;
-struct eclose_frame *frame;
-#endif
-{
- struct rx_nfa_edge *e = node->edges;
-
- /* For each node, we follow all epsilon paths to build the closure.
- * The closure omits nodes that have only epsilon edges.
- * The closure is split into partial closures -- all the states in
- * a partial closure are reached by crossing the same list of
- * of side effects (though not necessarily the same path).
- */
- if (node->mark)
- return 1;
- node->mark = 1;
-
- if (node->id >= 0 || node->is_final) {
- struct rx_possible_future **ec;
- struct rx_se_list *prog_in_order
- = ((struct rx_se_list *) hash_se_prog(rx,
- &rx->se_list_memo,
- frame->prog_backwards));
- int cmp;
-
- ec = &outnode->futures;
-
- while (*ec) {
- cmp =
- se_list_cmp((void *) (*ec)->effects,
- (void *) prog_in_order);
- if (cmp <= 0)
- break;
- ec = &(*ec)->next;
- }
- if (!*ec || (cmp < 0)) {
- struct rx_possible_future *saved = *ec;
-
- *ec = rx_possible_future(rx, prog_in_order);
- (*ec)->next = saved;
- if (!*ec)
- return 0;
- }
- if (node->id >= 0) {
- (*ec)->destset = nfa_set_enjoin(rx, &rx->set_list_memo,
- node, (*ec)->destset);
- if (!(*ec)->destset)
- return 0;
- }
- }
-
- while (e) {
- switch (e->type) {
- case ne_epsilon:
- if (!eclose_node(rx, outnode, e->dest, frame))
- return 0;
- break;
- case ne_side_effect:
- {
- frame->prog_backwards = side_effect_cons(rx,
- e->params.side_effect,
- frame->prog_backwards);
- if (!frame->prog_backwards)
- return 0;
- if (!eclose_node(rx, outnode, e->dest, frame))
- return 0;
- {
- struct rx_se_list *dying = frame->prog_backwards;
-
- frame->prog_backwards = frame->prog_backwards->cdr;
- free((char *) dying);
- }
- break;
- }
- default:
- break;
- }
- e = e->next;
- }
- node->mark = 0;
- return 1;
-}
-
-#ifdef __STDC__
-RX_DECL int rx_eclose_nfa(struct rx *rx)
-#else
-RX_DECL int rx_eclose_nfa(rx)
-struct rx *rx;
-#endif
-{
- struct rx_nfa_state *n = rx->nfa_states;
- struct eclose_frame frame;
- static int rx_id = 0;
-
- frame.prog_backwards = 0;
- rx->rx_id = rx_id++;
- bzero(&rx->se_list_memo, sizeof(rx->se_list_memo));
- bzero(&rx->set_list_memo, sizeof(rx->set_list_memo));
- while (n) {
- n->futures = 0;
- if (n->eclosure_needed && !eclose_node(rx, n, n, &frame))
- return 0;
- /* clear_marks (rx); */
- n = n->next;
- }
- return 1;
-}
-
-
-/* This deletes epsilon edges from an NFA. After running eclose_node,
- * we have no more need for these edges. They are removed to simplify
- * further operations on the NFA.
- */
-
-#ifdef __STDC__
-RX_DECL void rx_delete_epsilon_transitions(struct rx *rx)
-#else
-RX_DECL void rx_delete_epsilon_transitions(rx)
-struct rx *rx;
-#endif
-{
- struct rx_nfa_state *n = rx->nfa_states;
- struct rx_nfa_edge **e;
-
- while (n) {
- e = &n->edges;
- while (*e) {
- struct rx_nfa_edge *t;
-
- switch ((*e)->type) {
- case ne_epsilon:
- case ne_side_effect:
- t = *e;
- *e = t->next;
- rx_free_nfa_edge(t);
- break;
-
- default:
- e = &(*e)->next;
- break;
- }
- }
- n = n->next;
- }
-}
-
-
-/* This page: storing the nfa in a contiguous region of memory for
- * subsequent conversion to a super-nfa.
- */
-
-/* This is for qsort on an array of nfa_states. The order
- * is based on state ids and goes
- * [0...MAX][MIN..-1] where (MAX>=0) and (MIN<0)
- * This way, positive ids double as array indices.
- */
-
-#ifdef __STDC__
-static int nfacmp(void *va, void *vb)
-#else
-static int nfacmp(va, vb)
-void *va;
-void *vb;
-#endif
-{
- struct rx_nfa_state **a = (struct rx_nfa_state **) va;
- struct rx_nfa_state **b = (struct rx_nfa_state **) vb;
-
- return (*a == *b /* &&&& 3.18 */
- ? 0 : (((*a)->id < 0) == ((*b)->id < 0)
- ? (((*a)->id < (*b)->id) ? -1 : 1)
- : (((*a)->id < 0)
- ? 1 : -1)));
-}
-
-#ifdef __STDC__
-static int count_hash_nodes(struct rx_hash *st)
-#else
-static int count_hash_nodes(st)
-struct rx_hash *st;
-#endif
-{
- int x;
- int count = 0;
-
- for (x = 0; x < 13; ++x)
- count += ((st->children[x])
- ? count_hash_nodes(st->children[x])
- : st->bucket_size[x]);
-
- return count;
-}
-
-
-#ifdef __STDC__
-static void se_memo_freer(struct rx_hash_item *node)
-#else
-static void se_memo_freer(node)
-struct rx_hash_item *node;
-#endif
-{
- free((char *) node->data);
-}
-
-
-#ifdef __STDC__
-static void nfa_set_freer(struct rx_hash_item *node)
-#else
-static void nfa_set_freer(node)
-struct rx_hash_item *node;
-#endif
-{
- free((char *) node->data);
-}
-
-
-/* This copies an entire NFA into a single malloced block of memory.
- * Mostly this is for compatability with regex.c, though it is convenient
- * to have the nfa nodes in an array.
- */
-
-#ifdef __STDC__
-RX_DECL int
-rx_compactify_nfa(struct rx *rx, void **mem, unsigned long *size)
-#else
-RX_DECL int rx_compactify_nfa(rx, mem, size)
-struct rx *rx;
-void **mem;
-unsigned long *size;
-#endif
-{
- int total_nodec;
- struct rx_nfa_state *n;
- int edgec = 0;
- int eclosec = 0;
- int se_list_consc = count_hash_nodes(&rx->se_list_memo);
- int nfa_setc = count_hash_nodes(&rx->set_list_memo);
- unsigned long total_size;
-
- /* This takes place in two stages. First, the total size of the
- * nfa is computed, then structures are copied.
- */
- n = rx->nfa_states;
- total_nodec = 0;
- while (n) {
- struct rx_nfa_edge *e = n->edges;
- struct rx_possible_future *ec = n->futures;
-
- ++total_nodec;
- while (e) {
- ++edgec;
- e = e->next;
- }
- while (ec) {
- ++eclosec;
- ec = ec->next;
- }
- n = n->next;
- }
-
- total_size = (total_nodec * sizeof(struct rx_nfa_state)
- + edgec * rx_sizeof_bitset(rx->local_cset_size)
- + edgec * sizeof(struct rx_nfa_edge)
- + nfa_setc * sizeof(struct rx_nfa_state_set)
- + eclosec * sizeof(struct rx_possible_future)
- + se_list_consc * sizeof(struct rx_se_list)
- + rx->reserved);
-
- if (total_size > *size) {
- *mem = remalloc(*mem, total_size);
- if (*mem)
- *size = total_size;
- else
- return 0;
- }
- /* Now we've allocated the memory; this copies the NFA. */
- {
- static struct rx_nfa_state **scratch = 0;
- static int scratch_alloc = 0;
- struct rx_nfa_state *state_base = (struct rx_nfa_state *) *mem;
- struct rx_nfa_state *new_state = state_base;
- struct rx_nfa_edge *new_edge = (struct rx_nfa_edge *)
- ((char *) state_base + total_nodec * sizeof(struct rx_nfa_state));
- struct rx_se_list *new_se_list = (struct rx_se_list *)
- ((char *) new_edge + edgec * sizeof(struct rx_nfa_edge));
- struct rx_possible_future *new_close =
- ((struct rx_possible_future *)
- ((char *) new_se_list
-
- + se_list_consc * sizeof(struct rx_se_list)));
- struct rx_nfa_state_set *new_nfa_set = ((struct rx_nfa_state_set *)
-
- ((char *) new_close +
- eclosec *
-
- sizeof(struct
- rx_possible_future)));
- char *new_bitset =
-
- ((char *) new_nfa_set +
- nfa_setc * sizeof(struct rx_nfa_state_set));
- int x;
- struct rx_nfa_state *n;
-
- if (scratch_alloc < total_nodec) {
- scratch = ((struct rx_nfa_state **)
- remalloc(scratch, total_nodec * sizeof(*scratch)));
- if (scratch)
- scratch_alloc = total_nodec;
- else {
- scratch_alloc = 0;
- return 0;
- }
- }
-
- for (x = 0, n = rx->nfa_states; n; n = n->next)
- scratch[x++] = n;
-
- qsort(scratch, total_nodec, sizeof(struct rx_nfa_state *),
- (__compar_fn_t) nfacmp);
-
- for (x = 0; x < total_nodec; ++x) {
- struct rx_possible_future *eclose = scratch[x]->futures;
- struct rx_nfa_edge *edge = scratch[x]->edges;
- struct rx_nfa_state *cn = new_state++;
-
- cn->futures = 0;
- cn->edges = 0;
- cn->next = (x == total_nodec - 1) ? 0 : (cn + 1);
- cn->id = scratch[x]->id;
- cn->is_final = scratch[x]->is_final;
- cn->is_start = scratch[x]->is_start;
- cn->mark = 0;
- while (edge) {
- int indx = (edge->dest->id < 0
- ? (total_nodec + edge->dest->id)
-
- : edge->dest->id);
- struct rx_nfa_edge *e = new_edge++;
- rx_Bitset cset = (rx_Bitset) new_bitset;
-
- new_bitset += rx_sizeof_bitset(rx->local_cset_size);
- rx_bitset_null(rx->local_cset_size, cset);
- rx_bitset_union(rx->local_cset_size, cset,
- edge->params.cset);
- e->next = cn->edges;
- cn->edges = e;
- e->type = edge->type;
- e->dest = state_base + indx;
- e->params.cset = cset;
- edge = edge->next;
- }
- while (eclose) {
- struct rx_possible_future *ec = new_close++;
- struct rx_hash_item *sp;
- struct rx_se_list **sepos;
- struct rx_se_list *sesrc;
- struct rx_nfa_state_set *destlst;
- struct rx_nfa_state_set **destpos;
-
- ec->next = cn->futures;
- cn->futures = ec;
- for (sepos = &ec->effects, sesrc = eclose->effects;
- sesrc; sesrc = sesrc->cdr, sepos = &(*sepos)->cdr) {
- sp = rx_hash_find(&rx->se_list_memo,
- (long) sesrc->
- car ^ (long) sesrc->cdr, sesrc,
- &se_list_hash_rules);
- if (sp->binding) {
- sesrc = (struct rx_se_list *) sp->binding;
- break;
- }
- *new_se_list = *sesrc;
- sp->binding = (void *) new_se_list;
- *sepos = new_se_list;
- ++new_se_list;
- }
- *sepos = sesrc;
- for (destpos = &ec->destset, destlst = eclose->destset;
- destlst;
- destpos = &(*destpos)->cdr, destlst = destlst->cdr) {
- sp = rx_hash_find(&rx->set_list_memo,
- ((((long) destlst->car) >> 8)
- ^ (long) destlst->cdr),
- destlst, &nfa_set_hash_rules);
- if (sp->binding) {
- destlst = (struct rx_nfa_state_set *) sp->binding;
- break;
- }
- *new_nfa_set = *destlst;
- new_nfa_set->car = state_base + destlst->car->id;
- sp->binding = (void *) new_nfa_set;
- *destpos = new_nfa_set;
- ++new_nfa_set;
- }
- *destpos = destlst;
- eclose = eclose->next;
- }
- }
- }
- rx_free_hash_table(&rx->se_list_memo, se_memo_freer,
- &se_list_hash_rules);
- bzero(&rx->se_list_memo, sizeof(rx->se_list_memo));
- rx_free_hash_table(&rx->set_list_memo, nfa_set_freer,
- &nfa_set_hash_rules);
- bzero(&rx->set_list_memo, sizeof(rx->set_list_memo));
-
- rx_free_nfa(rx);
- rx->nfa_states = (struct rx_nfa_state *) *mem;
- return 1;
-}
-
-
-/* The functions in the next several pages define the lazy-NFA-conversion used
- * by matchers. The input to this construction is an NFA such as
- * is built by compactify_nfa (rx.c). The output is the superNFA.
- */
-
-/* Match engines can use arbitrary values for opcodes. So, the parse tree
- * is built using instructions names (enum rx_opcode), but the superstate
- * nfa is populated with mystery opcodes (void *).
- *
- * For convenience, here is an id table. The opcodes are == to their inxs
- *
- * The lables in re_search_2 would make good values for instructions.
- */
-
-void *rx_id_instruction_table[rx_num_instructions] = {
- (void *) rx_backtrack_point,
- (void *) rx_do_side_effects,
- (void *) rx_cache_miss,
- (void *) rx_next_char,
- (void *) rx_backtrack,
- (void *) rx_error_inx
-};
-
-
-
-/* Memory mgt. for superstate graphs. */
-
-#ifdef __STDC__
-static char *rx_cache_malloc(struct rx_cache *cache, int bytes)
-#else
-static char *rx_cache_malloc(cache, bytes)
-struct rx_cache *cache;
-int bytes;
-#endif
-{
- while (cache->bytes_left < bytes) {
- if (cache->memory_pos)
- cache->memory_pos = cache->memory_pos->next;
- if (!cache->memory_pos) {
- cache->morecore(cache);
- if (!cache->memory_pos)
- return 0;
- }
- cache->bytes_left = cache->memory_pos->bytes;
- cache->memory_addr = ((char *) cache->memory_pos
-
- + sizeof(struct rx_blocklist));
- }
- cache->bytes_left -= bytes;
- {
- char *addr = cache->memory_addr;
-
- cache->memory_addr += bytes;
- return addr;
- }
-}
-
-#ifdef __STDC__
-static void
-rx_cache_free(struct rx_cache *cache,
- struct rx_freelist **freelist, char *mem)
-#else
-static void rx_cache_free(cache, freelist, mem)
-struct rx_cache *cache;
-struct rx_freelist **freelist;
-char *mem;
-#endif
-{
- struct rx_freelist *it = (struct rx_freelist *) mem;
-
- it->next = *freelist;
- *freelist = it;
-}
-
-/* The partially instantiated superstate graph has a transition
- * table at every node. There is one entry for every character.
- * This fills in the transition for a set.
- */
-#ifdef __STDC__
-static void
-install_transition(struct rx_superstate *super,
- struct rx_inx *answer, rx_Bitset trcset)
-#else
-static void install_transition(super, answer, trcset)
-struct rx_superstate *super;
-struct rx_inx *answer;
-rx_Bitset trcset;
-#endif
-{
- struct rx_inx *transitions = super->transitions;
- int chr;
-
- for (chr = 0; chr < 256;)
- if (!*trcset) {
- ++trcset;
- chr += 32;
- } else {
- RX_subset sub = *trcset;
- RX_subset mask = 1;
- int bound = chr + 32;
-
- while (chr < bound) {
- if (sub & mask)
- transitions[chr] = *answer;
- ++chr;
- mask <<= 1;
- }
- ++trcset;
- }
-}
-
-#ifdef __STDC__
-static int qlen(struct rx_superstate *q)
-#else
-static int qlen(q)
-struct rx_superstate *q;
-#endif
-{
- int count = 1;
- struct rx_superstate *it;
-
- if (!q)
- return 0;
- for (it = q->next_recyclable; it != q; it = it->next_recyclable)
- ++count;
- return count;
-}
-
-#ifdef __STDC__
-static void check_cache(struct rx_cache *cache)
-#else
-static void check_cache(cache)
-struct rx_cache *cache;
-#endif
-{
- struct rx_cache *you_fucked_up = 0;
- int total = cache->superstates;
- int semi = cache->semifree_superstates;
-
- if (semi != qlen(cache->semifree_superstate))
- check_cache(you_fucked_up);
- if ((total - semi) != qlen(cache->lru_superstate))
- check_cache(you_fucked_up);
-}
-
-/* When a superstate is old and neglected, it can enter a
- * semi-free state. A semi-free state is slated to die.
- * Incoming transitions to a semi-free state are re-written
- * to cause an (interpreted) fault when they are taken.
- * The fault handler revives the semi-free state, patches
- * incoming transitions back to normal, and continues.
- *
- * The idea is basicly to free in two stages, aborting
- * between the two if the state turns out to be useful again.
- * When a free is aborted, the rescued superstate is placed
- * in the most-favored slot to maximize the time until it
- * is next semi-freed.
- */
-
-#ifdef __STDC__
-static void semifree_superstate(struct rx_cache *cache)
-#else
-static void semifree_superstate(cache)
-struct rx_cache *cache;
-#endif
-{
- int disqualified = cache->semifree_superstates;
-
- if (disqualified == cache->superstates)
- return;
- while (cache->lru_superstate->locks) {
- cache->lru_superstate = cache->lru_superstate->next_recyclable;
- ++disqualified;
- if (disqualified == cache->superstates)
- return;
- }
- {
- struct rx_superstate *it = cache->lru_superstate;
-
- it->next_recyclable->prev_recyclable = it->prev_recyclable;
- it->prev_recyclable->next_recyclable = it->next_recyclable;
- cache->lru_superstate = (it == it->next_recyclable
- ? 0 : it->next_recyclable);
- if (!cache->semifree_superstate) {
- cache->semifree_superstate = it;
- it->next_recyclable = it;
- it->prev_recyclable = it;
- } else {
- it->prev_recyclable =
- cache->semifree_superstate->prev_recyclable;
- it->next_recyclable = cache->semifree_superstate;
- it->prev_recyclable->next_recyclable = it;
- it->next_recyclable->prev_recyclable = it;
- }
- {
- struct rx_distinct_future *df;
-
- it->is_semifree = 1;
- ++cache->semifree_superstates;
- df = it->transition_refs;
- if (df) {
- df->prev_same_dest->next_same_dest = 0;
- for (df = it->transition_refs; df; df = df->next_same_dest) {
- df->future_frame.inx =
- cache->instruction_table[rx_cache_miss];
- df->future_frame.data = 0;
- df->future_frame.data_2 = (void *) df;
- /* If there are any NEXT-CHAR instruction frames that
- * refer to this state, we convert them to CACHE-MISS frames.
- */
- if (!df->effects
- && (df->edge->options->next_same_super_edge[0]
- == df->edge->options))
- install_transition(df->present, &df->future_frame,
- df->edge->cset);
- }
- df = it->transition_refs;
- df->prev_same_dest->next_same_dest = df;
- }
- }
- }
-}
-
-#ifdef __STDC__
-static void
-refresh_semifree_superstate(struct rx_cache *cache,
- struct rx_superstate *super)
-#else
-static void refresh_semifree_superstate(cache, super)
-struct rx_cache *cache;
-struct rx_superstate *super;
-#endif
-{
- struct rx_distinct_future *df;
-
- if (super->transition_refs) {
- super->transition_refs->prev_same_dest->next_same_dest = 0;
- for (df = super->transition_refs; df; df = df->next_same_dest) {
- df->future_frame.inx = cache->instruction_table[rx_next_char];
- df->future_frame.data = (void *) super->transitions;
- /* CACHE-MISS instruction frames that refer to this state,
- * must be converted to NEXT-CHAR frames.
- */
- if (!df->effects && (df->edge->options->next_same_super_edge[0]
- == df->edge->options))
- install_transition(df->present, &df->future_frame,
- df->edge->cset);
- }
- super->transition_refs->prev_same_dest->next_same_dest
- = super->transition_refs;
- }
- if (cache->semifree_superstate == super)
- cache->semifree_superstate = (super->prev_recyclable == super
- ? 0 : super->prev_recyclable);
- super->next_recyclable->prev_recyclable = super->prev_recyclable;
- super->prev_recyclable->next_recyclable = super->next_recyclable;
-
- if (!cache->lru_superstate)
- (cache->lru_superstate
- = super->next_recyclable = super->prev_recyclable = super);
- else {
- super->next_recyclable = cache->lru_superstate;
- super->prev_recyclable = cache->lru_superstate->prev_recyclable;
- super->next_recyclable->prev_recyclable = super;
- super->prev_recyclable->next_recyclable = super;
- }
- super->is_semifree = 0;
- --cache->semifree_superstates;
-}
-
-#ifdef __STDC__
-static void
-rx_refresh_this_superstate(struct rx_cache *cache,
- struct rx_superstate *superstate)
-#else
-static void rx_refresh_this_superstate(cache, superstate)
-struct rx_cache *cache;
-struct rx_superstate *superstate;
-#endif
-{
- if (superstate->is_semifree)
- refresh_semifree_superstate(cache, superstate);
- else if (cache->lru_superstate == superstate)
- cache->lru_superstate = superstate->next_recyclable;
- else if (superstate != cache->lru_superstate->prev_recyclable) {
- superstate->next_recyclable->prev_recyclable
- = superstate->prev_recyclable;
- superstate->prev_recyclable->next_recyclable
- = superstate->next_recyclable;
- superstate->next_recyclable = cache->lru_superstate;
- superstate->prev_recyclable =
- cache->lru_superstate->prev_recyclable;
- superstate->next_recyclable->prev_recyclable = superstate;
- superstate->prev_recyclable->next_recyclable = superstate;
- }
-}
-
-#ifdef __STDC__
-static void
-release_superset_low(struct rx_cache *cache, struct rx_superset *set)
-#else
-static void release_superset_low(cache, set)
-struct rx_cache *cache;
-struct rx_superset *set;
-#endif
-{
- if (!--set->refs) {
- if (set->cdr)
- release_superset_low(cache, set->cdr);
-
- set->starts_for = 0;
-
- rx_hash_free
- (rx_hash_find
- (&cache->superset_table,
- (unsigned long) set->car ^ set->
- id ^ (unsigned long) set->cdr, (void *) set,
- &cache->superset_hash_rules), &cache->superset_hash_rules);
- rx_cache_free(cache, &cache->free_supersets, (char *) set);
- }
-}
-
-#ifdef __STDC__
-RX_DECL void rx_release_superset(struct rx *rx, struct rx_superset *set)
-#else
-RX_DECL void rx_release_superset(rx, set)
-struct rx *rx;
-struct rx_superset *set;
-#endif
-{
- release_superset_low(rx->cache, set);
-}
-
-/* This tries to add a new superstate to the superstate freelist.
- * It might, as a result, free some edge pieces or hash tables.
- * If nothing can be freed because too many locks are being held, fail.
- */
-
-#ifdef __STDC__
-static int rx_really_free_superstate(struct rx_cache *cache)
-#else
-static int rx_really_free_superstate(cache)
-struct rx_cache *cache;
-#endif
-{
- int locked_superstates = 0;
- struct rx_superstate *it;
-
- if (!cache->superstates)
- return 0;
-
- {
- /* This is a total guess. The idea is that we should expect as
- * many misses as we've recently experienced. I.e., cache->misses
- * should be the same as cache->semifree_superstates.
- */
- while ((cache->hits + cache->misses) > cache->superstates_allowed) {
- cache->hits >>= 1;
- cache->misses >>= 1;
- }
- if (((cache->hits + cache->misses) * cache->semifree_superstates)
- < (cache->superstates * cache->misses)) {
- semifree_superstate(cache);
- semifree_superstate(cache);
- }
- }
-
- while (cache->semifree_superstate && cache->semifree_superstate->locks) {
- refresh_semifree_superstate(cache, cache->semifree_superstate);
- ++locked_superstates;
- if (locked_superstates == cache->superstates)
- return 0;
- }
-
- if (cache->semifree_superstate) {
- it = cache->semifree_superstate;
- it->next_recyclable->prev_recyclable = it->prev_recyclable;
- it->prev_recyclable->next_recyclable = it->next_recyclable;
- cache->semifree_superstate = ((it == it->next_recyclable)
- ? 0 : it->next_recyclable);
- --cache->semifree_superstates;
- } else {
- while (cache->lru_superstate->locks) {
- cache->lru_superstate = cache->lru_superstate->next_recyclable;
- ++locked_superstates;
- if (locked_superstates == cache->superstates)
- return 0;
- }
- it = cache->lru_superstate;
- it->next_recyclable->prev_recyclable = it->prev_recyclable;
- it->prev_recyclable->next_recyclable = it->next_recyclable;
- cache->lru_superstate = ((it == it->next_recyclable)
- ? 0 : it->next_recyclable);
- }
-
- if (it->transition_refs) {
- struct rx_distinct_future *df;
-
- for (df = it->transition_refs,
- df->prev_same_dest->next_same_dest = 0;
- df; df = df->next_same_dest) {
- df->future_frame.inx = cache->instruction_table[rx_cache_miss];
- df->future_frame.data = 0;
- df->future_frame.data_2 = (void *) df;
- df->future = 0;
- }
- it->transition_refs->prev_same_dest->next_same_dest =
- it->transition_refs;
- }
- {
- struct rx_super_edge *tc = it->edges;
-
- while (tc) {
- struct rx_distinct_future *df;
- struct rx_super_edge *tct = tc->next;
-
- df = tc->options;
- df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
- while (df) {
- struct rx_distinct_future *dft = df;
-
- df = df->next_same_super_edge[0];
-
-
- if (dft->future && dft->future->transition_refs == dft) {
- dft->future->transition_refs = dft->next_same_dest;
- if (dft->future->transition_refs == dft)
- dft->future->transition_refs = 0;
- }
- dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
- dft->prev_same_dest->next_same_dest = dft->next_same_dest;
- rx_cache_free(cache, &cache->free_discernable_futures,
- (char *) dft);
- }
- rx_cache_free(cache, &cache->free_transition_classes,
- (char *) tc);
- tc = tct;
- }
- }
-
- if (it->contents->superstate == it)
- it->contents->superstate = 0;
- release_superset_low(cache, it->contents);
- rx_cache_free(cache, &cache->free_superstates, (char *) it);
- --cache->superstates;
- return 1;
-}
-
-#ifdef __STDC__
-static char *rx_cache_get(struct rx_cache *cache,
- struct rx_freelist **freelist)
-#else
-static char *rx_cache_get(cache, freelist)
-struct rx_cache *cache;
-struct rx_freelist **freelist;
-#endif
-{
- while (!*freelist && rx_really_free_superstate(cache));
- if (!*freelist)
- return 0;
- {
- struct rx_freelist *it = *freelist;
-
- *freelist = it->next;
- return (char *) it;
- }
-}
-
-#ifdef __STDC__
-static char *rx_cache_malloc_or_get(struct rx_cache *cache,
- struct rx_freelist **freelist,
- int bytes)
-#else
-static char *rx_cache_malloc_or_get(cache, freelist, bytes)
-struct rx_cache *cache;
-struct rx_freelist **freelist;
-int bytes;
-#endif
-{
- if (!*freelist) {
- char *answer = rx_cache_malloc(cache, bytes);
-
- if (answer)
- return answer;
- }
-
- return rx_cache_get(cache, freelist);
-}
-
-#ifdef __STDC__
-static char *rx_cache_get_superstate(struct rx_cache *cache)
-#else
-static char *rx_cache_get_superstate(cache)
-struct rx_cache *cache;
-#endif
-{
- char *answer;
- int bytes = (sizeof(struct rx_superstate)
- + cache->local_cset_size * sizeof(struct rx_inx));
-
- if (!cache->free_superstates
- && (cache->superstates < cache->superstates_allowed)) {
- answer = rx_cache_malloc(cache, bytes);
- if (answer) {
- ++cache->superstates;
- return answer;
- }
- }
- answer = rx_cache_get(cache, &cache->free_superstates);
- if (!answer) {
- answer = rx_cache_malloc(cache, bytes);
- if (answer)
- ++cache->superstates_allowed;
- }
- ++cache->superstates;
- return answer;
-}
-
-
-
-#ifdef __STDC__
-static int supersetcmp(void *va, void *vb)
-#else
-static int supersetcmp(va, vb)
-void *va;
-void *vb;
-#endif
-{
- struct rx_superset *a = (struct rx_superset *) va;
- struct rx_superset *b = (struct rx_superset *) vb;
-
- return ((a == b)
- || (a && b && (a->car == b->car) && (a->cdr == b->cdr)));
-}
-
-#ifdef __STDC__
-static struct rx_hash_item *superset_allocator(struct rx_hash_rules *rules,
- void *val)
-#else
-static struct rx_hash_item *superset_allocator(rules, val)
-struct rx_hash_rules *rules;
-void *val;
-#endif
-{
- struct rx_cache *cache = ((struct rx_cache *)
- ((char *) rules
- -
-
- (unsigned
- long) (&((struct rx_cache *)
- 0)->superset_hash_rules)));
- struct rx_superset *template = (struct rx_superset *) val;
- struct rx_superset *newset
- = ((struct rx_superset *) rx_cache_malloc_or_get(cache,
- &cache->free_supersets,
- sizeof
-
- (*template)));
- if (!newset)
- return 0;
- newset->refs = 0;
- newset->car = template->car;
- newset->id = template->car->id;
- newset->cdr = template->cdr;
- newset->superstate = 0;
- rx_protect_superset(rx, template->cdr);
- newset->hash_item.data = (void *) newset;
- newset->hash_item.binding = 0;
- return &newset->hash_item;
-}
-
-#ifdef __STDC__
-static struct rx_hash *super_hash_allocator(struct rx_hash_rules *rules)
-#else
-static struct rx_hash *super_hash_allocator(rules)
-struct rx_hash_rules *rules;
-#endif
-{
- struct rx_cache *cache = ((struct rx_cache *)
- ((char *) rules
- -
-
- (unsigned
- long) (&((struct rx_cache *)
- 0)->superset_hash_rules)));
- return ((struct rx_hash *)
- rx_cache_malloc_or_get(cache, &cache->free_hash,
-
- sizeof(struct rx_hash)));
-}
-
-
-#ifdef __STDC__
-static void
-super_hash_liberator(struct rx_hash *hash, struct rx_hash_rules *rules)
-#else
-static void super_hash_liberator(hash, rules)
-struct rx_hash *hash;
-struct rx_hash_rules *rules;
-#endif
-{
- struct rx_cache *cache = ((struct rx_cache *)
-
- (char *) rules -
- (long) (&
-
- ((struct rx_cache *)
- 0)->superset_hash_rules));
- rx_cache_free(cache, &cache->free_hash, (char *) hash);
-}
-
-#ifdef __STDC__
-static void
-superset_hash_item_liberator(struct rx_hash_item *it,
- struct rx_hash_rules *rules)
-#else
-static void superset_hash_item_liberator(it, rules) /* Well, it does ya know. */
-struct rx_hash_item *it;
-struct rx_hash_rules *rules;
-#endif
-{
-}
-
-int rx_cache_bound = 128;
-static int rx_default_cache_got = 0;
-
-#ifdef __STDC__
-static int bytes_for_cache_size(int supers, int cset_size)
-#else
-static int bytes_for_cache_size(supers, cset_size)
-int supers;
-int cset_size;
-#endif
-{
- /* What the hell is this? !!! */
- return (int)
- ((float) supers * ((1.03 * (float) (rx_sizeof_bitset(cset_size)
- +
- sizeof(struct rx_super_edge)))
- +
- (1.80 *
- (float) sizeof(struct rx_possible_future)) +
- (float) (sizeof(struct rx_superstate)
- + cset_size * sizeof(struct rx_inx))));
-}
-
-#ifdef __STDC__
-static void rx_morecore(struct rx_cache *cache)
-#else
-static void rx_morecore(cache)
-struct rx_cache *cache;
-#endif
-{
- if (rx_default_cache_got >= rx_cache_bound)
- return;
-
- rx_default_cache_got += 16;
- cache->superstates_allowed = rx_cache_bound;
- {
- struct rx_blocklist **pos = &cache->memory;
- int size = bytes_for_cache_size(16, cache->local_cset_size);
-
- while (*pos)
- pos = &(*pos)->next;
- *pos = ((struct rx_blocklist *)
- malloc(size + sizeof(struct rx_blocklist)));
-
- if (!*pos)
- return;
-
- (*pos)->next = 0;
- (*pos)->bytes = size;
- cache->memory_pos = *pos;
- cache->memory_addr = (char *) *pos + sizeof(**pos);
- cache->bytes_left = size;
- }
-}
-
-static struct rx_cache default_cache = {
- {
- supersetcmp,
- super_hash_allocator,
- super_hash_liberator,
- superset_allocator,
- superset_hash_item_liberator,
- },
- 0,
- 0,
- 0,
- 0,
- rx_morecore,
-
- 0,
- 0,
- 0,
- 0,
- 0,
-
- 0,
- 0,
-
- 0,
-
- 0,
- 0,
- 0,
- 0,
- 128,
-
- 256,
- rx_id_instruction_table,
-
- {
- 0,
- 0,
- {0},
- {0},
- {0}
- }
-};
-
-/* This adds an element to a superstate set. These sets are lists, such
- * that lists with == elements are ==. The empty set is returned by
- * superset_cons (rx, 0, 0) and is NOT equivelent to
- * (struct rx_superset)0.
- */
-
-#ifdef __STDC__
-RX_DECL struct rx_superset *rx_superset_cons(struct rx *rx,
- struct rx_nfa_state *car,
- struct rx_superset *cdr)
-#else
-RX_DECL struct rx_superset *rx_superset_cons(rx, car, cdr)
-struct rx *rx;
-struct rx_nfa_state *car;
-struct rx_superset *cdr;
-#endif
-{
- struct rx_cache *cache = rx->cache;
-
- if (!car && !cdr) {
- if (!cache->empty_superset) {
- cache->empty_superset = ((struct rx_superset *)
- rx_cache_malloc_or_get(cache,
- &cache->free_supersets,
-
- sizeof(struct
- rx_superset)));
- if (!cache->empty_superset)
- return 0;
- bzero(cache->empty_superset, sizeof(struct rx_superset));
-
- cache->empty_superset->refs = 1000;
- }
- return cache->empty_superset;
- }
- {
- struct rx_superset template;
- struct rx_hash_item *hit;
-
- template.car = car;
- template.cdr = cdr;
- template.id = car->id;
- /* While hash_store will protect cdr itself it might first allocate hash
- tables and stuff which might cause it to be garbage collected before
- it's protected -- [gsstark:19961026.2155EST] */
- rx_protect_superset(rx, cdr);
- hit = rx_hash_store(&cache->superset_table,
- (unsigned long) car ^ car->id ^ (unsigned long)
- cdr, (void *) &template,
- &cache->superset_hash_rules);
- rx_release_superset(rx, cdr);
- return (hit ? (struct rx_superset *) hit->data : 0);
- }
-}
-
-/* This computes a union of two NFA state sets. The sets do not have the
- * same representation though. One is a RX_SUPERSET structure (part
- * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
- */
-
-#ifdef __STDC__
-RX_DECL struct rx_superset *rx_superstate_eclosure_union
- (struct rx *rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
-#else
-RX_DECL struct rx_superset *rx_superstate_eclosure_union(rx, set, ecl)
-struct rx *rx;
-struct rx_superset *set;
-struct rx_nfa_state_set *ecl;
-#endif
-{
- if (!ecl)
- return set;
-
- if (!set->car)
- return rx_superset_cons(rx, ecl->car,
- rx_superstate_eclosure_union(rx, set,
- ecl->cdr));
- if (set->car == ecl->car)
- return rx_superstate_eclosure_union(rx, set, ecl->cdr);
-
- {
- struct rx_superset *tail;
- struct rx_nfa_state *first;
-
- if (set->car > ecl->car) {
- tail = rx_superstate_eclosure_union(rx, set->cdr, ecl);
- first = set->car;
- } else {
- tail = rx_superstate_eclosure_union(rx, set, ecl->cdr);
- first = ecl->car;
- }
- if (!tail)
- return 0;
- else {
- struct rx_superset *answer;
-
- answer = rx_superset_cons(rx, first, tail);
- if (!answer) {
- rx_protect_superset(rx, tail);
- rx_release_superset(rx, tail);
- return 0;
- } else
- return answer;
- }
- }
-}
-
-
-
-
-/*
- * This makes sure that a list of rx_distinct_futures contains
- * a future for each possible set of side effects in the eclosure
- * of a given state. This is some of the work of filling in a
- * superstate transition.
- */
-
-#ifdef __STDC__
-static struct rx_distinct_future *include_futures(struct rx *rx, struct rx_distinct_future
- *df, struct rx_nfa_state
- *state, struct rx_superstate
- *superstate)
-#else
-static struct rx_distinct_future *include_futures(rx, df, state,
- superstate)
-struct rx *rx;
-struct rx_distinct_future *df;
-struct rx_nfa_state *state;
-struct rx_superstate *superstate;
-#endif
-{
- struct rx_possible_future *future;
- struct rx_cache *cache = rx->cache;
-
- for (future = state->futures; future; future = future->next) {
- struct rx_distinct_future *dfp;
- struct rx_distinct_future *insert_before = 0;
-
- if (df)
- df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
- for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
- if (dfp->effects == future->effects)
- break;
- else {
- int order =
-
- rx->se_list_cmp(rx, dfp->effects, future->effects);
- if (order > 0) {
- insert_before = dfp;
- dfp = 0;
- break;
- }
- }
- if (df)
- df->next_same_super_edge[1]->next_same_super_edge[0] = df;
- if (!dfp) {
- dfp = ((struct rx_distinct_future *)
- rx_cache_malloc_or_get(cache,
- &cache->free_discernable_futures,
-
- sizeof(struct
- rx_distinct_future)));
- if (!dfp)
- return 0;
- if (!df) {
- df = insert_before = dfp;
- df->next_same_super_edge[0] = df->next_same_super_edge[1] =
- df;
- } else if (!insert_before)
- insert_before = df;
- else if (insert_before == df)
- df = dfp;
-
- dfp->next_same_super_edge[0] = insert_before;
- dfp->next_same_super_edge[1]
- = insert_before->next_same_super_edge[1];
- dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
- dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
- dfp->next_same_dest = dfp->prev_same_dest = dfp;
- dfp->future = 0;
- dfp->present = superstate;
- dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
- dfp->future_frame.data = 0;
- dfp->future_frame.data_2 = (void *) dfp;
- dfp->side_effects_frame.inx
- = rx->instruction_table[rx_do_side_effects];
- dfp->side_effects_frame.data = 0;
- dfp->side_effects_frame.data_2 = (void *) dfp;
- dfp->effects = future->effects;
- }
- }
- return df;
-}
-
-
-/* This constructs a new superstate from its state set. The only
- * complexity here is memory management.
- */
-#ifdef __STDC__
-RX_DECL struct rx_superstate *rx_superstate(struct rx *rx,
- struct rx_superset *set)
-#else
-RX_DECL struct rx_superstate *rx_superstate(rx, set)
-struct rx *rx;
-struct rx_superset *set;
-#endif
-{
- struct rx_cache *cache = rx->cache;
- struct rx_superstate *superstate = 0;
-
- /* Does the superstate already exist in the cache? */
- if (set->superstate) {
- if (set->superstate->rx_id != rx->rx_id) {
- /* Aha. It is in the cache, but belongs to a superstate
- * that refers to an NFA that no longer exists.
- * (We know it no longer exists because it was evidently
- * stored in the same region of memory as the current nfa
- * yet it has a different id.)
- */
- superstate = set->superstate;
- if (!superstate->is_semifree) {
- if (cache->lru_superstate == superstate) {
- cache->lru_superstate = superstate->next_recyclable;
- if (cache->lru_superstate == superstate)
- cache->lru_superstate = 0;
- }
- {
- superstate->next_recyclable->prev_recyclable
- = superstate->prev_recyclable;
- superstate->prev_recyclable->next_recyclable
- = superstate->next_recyclable;
- if (!cache->semifree_superstate) {
- (cache->semifree_superstate
- = superstate->next_recyclable
- = superstate->prev_recyclable = superstate);
- } else {
- superstate->next_recyclable =
- cache->semifree_superstate;
- superstate->prev_recyclable =
- cache->semifree_superstate->prev_recyclable;
- superstate->next_recyclable->prev_recyclable =
- superstate;
- superstate->prev_recyclable->next_recyclable =
- superstate;
- cache->semifree_superstate = superstate;
- }
- ++cache->semifree_superstates;
- }
- }
- set->superstate = 0;
- goto handle_cache_miss;
- }
- ++cache->hits;
- superstate = set->superstate;
-
- rx_refresh_this_superstate(cache, superstate);
- return superstate;
- }
-
- handle_cache_miss:
-
- /* This point reached only for cache misses. */
- ++cache->misses;
-#if RX_DEBUG
- if (rx_debug_trace > 1) {
- struct rx_superset *setp = set;
-
- fprintf(stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
- while (setp) {
- fprintf(stderr, "%d ", setp->id);
- setp = setp->cdr;
- }
- fprintf(stderr, "(%d)\n", set);
- }
-#endif
- superstate = (struct rx_superstate *) rx_cache_get_superstate(cache);
- if (!superstate)
- return 0;
-
- if (!cache->lru_superstate)
- (cache->lru_superstate
- = superstate->next_recyclable
- = superstate->prev_recyclable = superstate);
- else {
- superstate->next_recyclable = cache->lru_superstate;
- superstate->prev_recyclable =
- cache->lru_superstate->prev_recyclable;
- (superstate->prev_recyclable->next_recyclable =
- superstate->next_recyclable->prev_recyclable = superstate);
- }
- superstate->rx_id = rx->rx_id;
- superstate->transition_refs = 0;
- superstate->locks = 0;
- superstate->is_semifree = 0;
- set->superstate = superstate;
- superstate->contents = set;
- rx_protect_superset(rx, set);
- superstate->edges = 0;
- {
- int x;
-
- /* None of the transitions from this superstate are known yet. */
- for (x = 0; x < rx->local_cset_size; ++x) { /* &&&&& 3.8 % */
- struct rx_inx *ifr = &superstate->transitions[x];
-
- ifr->inx = rx->instruction_table[rx_cache_miss];
- ifr->data = ifr->data_2 = 0;
- }
- }
- return superstate;
-}
-
-
-/* This computes the destination set of one edge of the superstate NFA.
- * Note that a RX_DISTINCT_FUTURE is a superstate edge.
- * Returns 0 on an allocation failure.
- */
-
-#ifdef __STDC__
-static int solve_destination(struct rx *rx, struct rx_distinct_future *df)
-#else
-static int solve_destination(rx, df)
-struct rx *rx;
-struct rx_distinct_future *df;
-#endif
-{
- struct rx_super_edge *tc = df->edge;
- struct rx_superset *nfa_state;
- struct rx_superset *nil_set = rx_superset_cons(rx, 0, 0);
- struct rx_superset *solution = nil_set;
- struct rx_superstate *dest;
-
- rx_protect_superset(rx, solution);
- /* Iterate over all NFA states in the state set of this superstate. */
- for (nfa_state = df->present->contents;
- nfa_state->car; nfa_state = nfa_state->cdr) {
- struct rx_nfa_edge *e;
-
- /* Iterate over all edges of each NFA state. */
- for (e = nfa_state->car->edges; e; e = e->next)
- /* If we find an edge that is labeled with
- * the characters we are solving for.....
- */
- if (rx_bitset_is_subset(rx->local_cset_size,
- tc->cset, e->params.cset)) {
- struct rx_nfa_state *n = e->dest;
- struct rx_possible_future *pf;
-
- /* ....search the partial epsilon closures of the destination
- * of that edge for a path that involves the same set of
- * side effects we are solving for.
- * If we find such a RX_POSSIBLE_FUTURE, we add members to the
- * stateset we are computing.
- */
- for (pf = n->futures; pf; pf = pf->next)
- if (pf->effects == df->effects) {
- struct rx_superset *old_sol;
-
- old_sol = solution;
- solution =
- rx_superstate_eclosure_union(rx, solution,
- pf->destset);
- if (!solution)
- return 0;
- rx_protect_superset(rx, solution);
- rx_release_superset(rx, old_sol);
- }
- }
- }
- /* It is possible that the RX_DISTINCT_FUTURE we are working on has
- * the empty set of NFA states as its definition. In that case, this
- * is a failure point.
- */
- if (solution == nil_set) {
- df->future_frame.inx = (void *) rx_backtrack;
- df->future_frame.data = 0;
- df->future_frame.data_2 = 0;
- return 1;
- }
- dest = rx_superstate(rx, solution);
- rx_release_superset(rx, solution);
- if (!dest)
- return 0;
-
- {
- struct rx_distinct_future *dft;
-
- dft = df;
- df->prev_same_dest->next_same_dest = 0;
- while (dft) {
- dft->future = dest;
- dft->future_frame.inx = rx->instruction_table[rx_next_char];
- dft->future_frame.data = (void *) dest->transitions;
- dft = dft->next_same_dest;
- }
- df->prev_same_dest->next_same_dest = df;
- }
- if (!dest->transition_refs)
- dest->transition_refs = df;
- else {
- struct rx_distinct_future *dft =
-
- dest->transition_refs->next_same_dest;
- dest->transition_refs->next_same_dest = df->next_same_dest;
- df->next_same_dest->prev_same_dest = dest->transition_refs;
- df->next_same_dest = dft;
- dft->prev_same_dest = df;
- }
- return 1;
-}
-
-
-/* This takes a superstate and a character, and computes some edges
- * from the superstate NFA. In particular, this computes all edges
- * that lead from SUPERSTATE given CHR. This function also
- * computes the set of characters that share this edge set.
- * This returns 0 on allocation error.
- * The character set and list of edges are returned through
- * the paramters CSETOUT and DFOUT.
-} */
-
-#ifdef __STDC__
-static int
-compute_super_edge(struct rx *rx, struct rx_distinct_future **dfout,
- rx_Bitset csetout, struct rx_superstate *superstate,
- unsigned char chr)
-#else
-static int compute_super_edge(rx, dfout, csetout, superstate, chr)
-struct rx *rx;
-struct rx_distinct_future **dfout;
-rx_Bitset csetout;
-struct rx_superstate *superstate;
-unsigned char chr;
-#endif
-{
- struct rx_superset *stateset = superstate->contents;
-
- /* To compute the set of characters that share edges with CHR,
- * we start with the full character set, and subtract.
- */
- rx_bitset_universe(rx->local_cset_size, csetout);
- *dfout = 0;
-
- /* Iterate over the NFA states in the superstate state-set. */
- while (stateset->car) {
- struct rx_nfa_edge *e;
-
- for (e = stateset->car->edges; e; e = e->next)
- if (RX_bitset_member(e->params.cset, chr)) {
- /* If we find an NFA edge that applies, we make sure there
- * are corresponding edges in the superstate NFA.
- */
- {
- struct rx_distinct_future *saved;
-
- saved = *dfout;
- *dfout =
- include_futures(rx, *dfout, e->dest, superstate);
- if (!*dfout) {
- struct rx_distinct_future *df;
-
- df = saved;
- if (df)
- df->
- next_same_super_edge
- [1]->next_same_super_edge[0] = 0;
- while (df) {
- struct rx_distinct_future *dft;
-
- dft = df;
- df = df->next_same_super_edge[0];
-
- if (dft->future
- && dft->future->transition_refs == dft) {
- dft->future->transition_refs =
- dft->next_same_dest;
- if (dft->future->transition_refs == dft)
- dft->future->transition_refs = 0;
- }
- dft->next_same_dest->prev_same_dest =
- dft->prev_same_dest;
- dft->prev_same_dest->next_same_dest =
- dft->next_same_dest;
- rx_cache_free(rx->cache,
- &rx->
- cache->free_discernable_futures,
- (char *) dft);
- }
- return 0;
- }
- }
- /* We also trim the character set a bit. */
- rx_bitset_intersection(rx->local_cset_size,
- csetout, e->params.cset);
- } else
- /* An edge that doesn't apply at least tells us some characters
- * that don't share the same edge set as CHR.
- */
- rx_bitset_difference(rx->local_cset_size, csetout,
- e->params.cset);
- stateset = stateset->cdr;
- }
- return 1;
-}
-
-
-/* This is a constructor for RX_SUPER_EDGE structures. These are
- * wrappers for lists of superstate NFA edges that share character sets labels.
- * If a transition class contains more than one rx_distinct_future (superstate
- * edge), then it represents a non-determinism in the superstate NFA.
- */
-
-
-#ifdef __STDC__
-static struct rx_super_edge *rx_super_edge(struct rx *rx,
- struct rx_superstate *super,
- rx_Bitset cset,
- struct rx_distinct_future *df)
-#else
-static struct rx_super_edge *rx_super_edge(rx, super, cset, df)
-struct rx *rx;
-struct rx_superstate *super;
-rx_Bitset cset;
-struct rx_distinct_future *df;
-#endif
-{
- struct rx_super_edge *tc =
- (struct rx_super_edge *) rx_cache_malloc_or_get
- (rx->cache, &rx->cache->free_transition_classes,
- sizeof(struct rx_super_edge) +
-
- rx_sizeof_bitset(rx->local_cset_size));
-
- if (!tc)
- return 0;
- tc->next = super->edges;
- super->edges = tc;
- tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
- tc->rx_backtrack_frame.data = 0;
- tc->rx_backtrack_frame.data_2 = (void *) tc;
- tc->options = df;
- tc->cset = (rx_Bitset) ((char *) tc + sizeof(*tc));
- rx_bitset_assign(rx->local_cset_size, tc->cset, cset);
- if (df) {
- struct rx_distinct_future *dfp = df;
-
- df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
- while (dfp) {
- dfp->edge = tc;
- dfp = dfp->next_same_super_edge[0];
- }
- df->next_same_super_edge[1]->next_same_super_edge[0] = df;
- }
- return tc;
-}
-
-
-/* There are three kinds of cache miss. The first occurs when a
- * transition is taken that has never been computed during the
- * lifetime of the source superstate. That cache miss is handled by
- * calling COMPUTE_SUPER_EDGE. The second kind of cache miss
- * occurs when the destination superstate of a transition doesn't
- * exist. SOLVE_DESTINATION is used to construct the destination superstate.
- * Finally, the third kind of cache miss occurs when the destination
- * superstate of a transition is in a `semi-free state'. That case is
- * handled by UNFREE_SUPERSTATE.
- *
- * The function of HANDLE_CACHE_MISS is to figure out which of these
- * cases applies.
- */
-
-
-#ifdef __STDC__
-static void
-install_partial_transition(struct rx_superstate *super,
- struct rx_inx *answer,
- RX_subset set, int offset)
-#else
-static void install_partial_transition(super, answer, set, offset)
-struct rx_superstate *super;
-struct rx_inx *answer;
-RX_subset set;
-int offset;
-#endif
-{
- int start = offset;
- int end = start + 32;
- RX_subset pos = 1;
- struct rx_inx *transitions = super->transitions;
-
- while (start < end) {
- if (set & pos)
- transitions[start] = *answer;
- pos <<= 1;
- ++start;
- }
-}
-
-#ifdef __STDC__
-RX_DECL struct rx_inx *rx_handle_cache_miss
- (struct rx *rx, struct rx_superstate *super, unsigned char chr,
- void *data)
-#else
-RX_DECL struct rx_inx *rx_handle_cache_miss(rx, super, chr, data)
-struct rx *rx;
-struct rx_superstate *super;
-unsigned char chr;
-void *data;
-#endif
-{
- int offset = chr / RX_subset_bits;
- struct rx_distinct_future *df = data;
-
- if (!df) { /* must be the shared_cache_miss_frame */
- /* Perhaps this is just a transition waiting to be filled. */
- struct rx_super_edge *tc;
- RX_subset mask = rx_subset_singletons[chr % RX_subset_bits];
-
- for (tc = super->edges; tc; tc = tc->next)
- if (tc->cset[offset] & mask) {
- struct rx_inx *answer;
-
- df = tc->options;
- answer =
- ((tc->options->next_same_super_edge[0] !=
- tc->options) ? &tc->
- rx_backtrack_frame : (df->effects ?
- &df->side_effects_frame :
- &df->future_frame));
- install_partial_transition(super, answer, tc->cset[offset],
- offset * 32);
- return answer;
- }
- /* Otherwise, it's a flushed or newly encountered edge. */
- {
- char cset_space[1024]; /* this limit is far from unreasonable */
- rx_Bitset trcset;
- struct rx_inx *answer;
-
- if (rx_sizeof_bitset(rx->local_cset_size) > sizeof(cset_space))
- return 0; /* If the arbitrary limit is hit, always fail */
- /* cleanly. */
- trcset = (rx_Bitset) cset_space;
- rx_lock_superstate(rx, super);
- if (!compute_super_edge(rx, &df, trcset, super, chr)) {
- rx_unlock_superstate(rx, super);
- return 0;
- }
- if (!df) { /* We just computed the fail transition. */
- static struct rx_inx
- shared_fail_frame = { 0, 0, (void *) rx_backtrack, 0 };
-
- answer = &shared_fail_frame;
- } else {
- tc = rx_super_edge(rx, super, trcset, df);
- if (!tc) {
- rx_unlock_superstate(rx, super);
- return 0;
- }
- answer =
- ((tc->options->next_same_super_edge[0] !=
- tc->options) ? &tc->
- rx_backtrack_frame : (df->effects ?
- &df->side_effects_frame :
- &df->future_frame));
- }
- install_partial_transition(super, answer,
- trcset[offset], offset * 32);
- rx_unlock_superstate(rx, super);
- return answer;
- }
- } else if (df->future) { /* A cache miss on an edge with a future? Must be
- * a semi-free destination. */
- if (df->future->is_semifree)
- refresh_semifree_superstate(rx->cache, df->future);
- return &df->future_frame;
- } else
- /* no future superstate on an existing edge */
- {
- rx_lock_superstate(rx, super);
- if (!solve_destination(rx, df)) {
- rx_unlock_superstate(rx, super);
- return 0;
- }
- if (!df->effects
- && (df->edge->options->next_same_super_edge[0] ==
- df->edge->options)) install_partial_transition(super,
- &df->future_frame,
- df->
- edge->cset
- [offset],
- offset *
- 32);
- rx_unlock_superstate(rx, super);
- return &df->future_frame;
- }
-}
-
-
-
-
-/* The rest of the code provides a regex.c compatable interface. */
-
-
-__const__ char *re_error_msg[] = {
- 0, /* REG_NOUT */
- "No match", /* REG_NOMATCH */
- "Invalid regular expression", /* REG_BADPAT */
- "Invalid collation character", /* REG_ECOLLATE */
- "Invalid character class name", /* REG_ECTYPE */
- "Trailing backslash", /* REG_EESCAPE */
- "Invalid back reference", /* REG_ESUBREG */
- "Unmatched [ or [^", /* REG_EBRACK */
- "Unmatched ( or \\(", /* REG_EPAREN */
- "Unmatched \\{", /* REG_EBRACE */
- "Invalid content of \\{\\}", /* REG_BADBR */
- "Invalid range end", /* REG_ERANGE */
- "Memory exhausted", /* REG_ESPACE */
- "Invalid preceding regular expression", /* REG_BADRPT */
- "Premature end of regular expression", /* REG_EEND */
- "Regular expression too big", /* REG_ESIZE */
- "Unmatched ) or \\)", /* REG_ERPAREN */
-};
-
-
-
-/*
- * Macros used while compiling patterns.
- *
- * By convention, PEND points just past the end of the uncompiled pattern,
- * P points to the read position in the pattern. `translate' is the name
- * of the translation table (`TRANSLATE' is the name of a macro that looks
- * things up in `translate').
- */
-
-
-/*
- * Fetch the next character in the uncompiled pattern---translating it
- * if necessary. *Also cast from a signed character in the constant
- * string passed to us by the user to an unsigned char that we can use
- * as an array index (in, e.g., `translate').
- */
-#define PATFETCH(c) \
- do {if (p == pend) return REG_EEND; \
- c = (unsigned char) *p++; \
- c = translate[c]; \
- } while (0)
-
-/*
- * Fetch the next character in the uncompiled pattern, with no
- * translation.
- */
-#define PATFETCH_RAW(c) \
- do {if (p == pend) return REG_EEND; \
- c = (unsigned char) *p++; \
- } while (0)
-
-/* Go backwards one character in the pattern. */
-#define PATUNFETCH p--
-
-
-#define TRANSLATE(d) translate[(unsigned char) (d)]
-
-typedef unsigned regnum_t;
-
-/* Since offsets can go either forwards or backwards, this type needs to
- * be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.
- */
-typedef int pattern_offset_t;
-
-typedef struct {
- struct rexp_node **top_expression; /* was begalt */
- struct rexp_node **last_expression; /* was laststart */
- pattern_offset_t inner_group_offset;
- regnum_t regnum;
-} compile_stack_elt_t;
-typedef struct {
- compile_stack_elt_t *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
-} compile_stack_type;
-
-static boolean group_in_compile_stack(compile_stack_type, regnum_t);
-static reg_errcode_t
-compile_range(struct re_pattern_buffer *, rx_Bitset,
- __const__ char **, __const__ char *,
- unsigned char *, reg_syntax_t, rx_Bitset, char *);
-static void find_backrefs(char *, struct rexp_node *,
-
- struct re_se_params *);
-static int compute_fastset(struct re_pattern_buffer *, struct rexp_node *);
-static int is_anchored(struct rexp_node *, rx_side_effect);
-static struct rexp_node
-*remove_unecessary_side_effects
-
- (struct rx *, char *, struct rexp_node *, struct re_se_params *);
-static int pointless_if_repeated(struct rexp_node *,
-
- struct re_se_params *);
-static int registers_on_stack(struct re_pattern_buffer *,
- struct rexp_node *,
-
- int, struct re_se_params *);
-static int has_any_se(struct rx *, struct rexp_node *);
-static int has_non_idempotent_epsilon_path
-
- (struct rx *, struct rexp_node *, struct re_se_params *);
-static int begins_with_complex_se(struct rx *, struct rexp_node *);
-static void speed_up_alt(struct rx *, struct rexp_node *, int);
-RX_DECL reg_errcode_t
-
-rx_compile(__const__ char *, int, reg_syntax_t,
- struct re_pattern_buffer *);
-RX_DECL void rx_blow_up_fastmap(struct re_pattern_buffer *);
-static __inline__ enum rx_get_burst_return
-re_search_2_get_burst(struct rx_string_position *, void *, int);
-static __inline__ enum rx_back_check_return
-re_search_2_back_check(struct rx_string_position *, int,
- int, unsigned char *, void *, int);
-static __inline__ int
-re_search_2_fetch_char(struct rx_string_position *, int, void *, int);
-
-
-#define INIT_COMPILE_STACK_SIZE 32
-
-#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
-#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
-
-/* The next available element. */
-#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
-
-
-/* Set the bit for character C in a list. */
-#define SET_LIST_BIT(c) \
- (b[((unsigned char) (c)) / CHARBITS] \
- |= 1 << (((unsigned char) c) % CHARBITS))
-
-/* Get the next unsigned number in the uncompiled pattern. */
-#define GET_UNSIGNED_NUMBER(num) \
- { if (p != pend) \
- { \
- PATFETCH (c); \
- while (isdigit (c)) \
- { \
- if (num < 0) \
- num = 0; \
- num = num * 10 + c - '0'; \
- if (p == pend) \
- break; \
- PATFETCH (c); \
- } \
- } \
- }
-
-#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
-
-#define IS_CHAR_CLASS(string) \
- (!strcmp (string, "alpha") || !strcmp (string, "upper") \
- || !strcmp (string, "lower") || !strcmp (string, "digit") \
- || !strcmp (string, "alnum") || !strcmp (string, "xdigit") \
- || !strcmp (string, "space") || !strcmp (string, "print") \
- || !strcmp (string, "punct") || !strcmp (string, "graph") \
- || !strcmp (string, "cntrl") || !strcmp (string, "blank"))
-
-
-/* These predicates are used in regex_compile. */
-
-/* P points to just after a ^ in PATTERN. Return true if that ^ comes
- * after an alternative or a begin-subexpression. We assume there is at
- * least one character before the ^.
- */
-
-#ifdef __STDC__
-static boolean
-at_begline_loc_p(__const__ char *pattern, __const__ char *p,
- reg_syntax_t syntax)
-#else
-static boolean at_begline_loc_p(pattern, p, syntax)
-__const__ char *pattern;
-__const__ char *p;
-reg_syntax_t syntax;
-#endif
-{
- __const__ char *prev = p - 2;
- boolean prev_prev_backslash = ((prev > pattern) && (prev[-1] == '\\'));
-
- return ( /* After a subexpression? */
- ((*prev == '(') && ((syntax & RE_NO_BK_PARENS) || prev_prev_backslash))
- ||
- /* After an alternative? */
- ((*prev == '|') && ((syntax & RE_NO_BK_VBAR) || prev_prev_backslash))
- );
-}
-
-/* The dual of at_begline_loc_p. This one is for $. We assume there is
- * at least one character after the $, i.e., `P < PEND'.
- */
-
-#ifdef __STDC__
-static boolean
-at_endline_loc_p(__const__ char *p, __const__ char *pend, int syntax)
-#else
-static boolean at_endline_loc_p(p, pend, syntax)
-__const__ char *p;
-__const__ char *pend;
-int syntax;
-#endif
-{
- __const__ char *next = p;
- boolean next_backslash = (*next == '\\');
- __const__ char *next_next = (p + 1 < pend) ? (p + 1) : 0;
-
- return (
- /* Before a subexpression? */
- ((syntax & RE_NO_BK_PARENS)
- ? (*next == ')')
- : (next_backslash && next_next && (*next_next == ')')))
- ||
- /* Before an alternative? */
- ((syntax & RE_NO_BK_VBAR)
- ? (*next == '|')
- : (next_backslash && next_next && (*next_next == '|')))
- );
-}
-
-
-unsigned char rx_id_translation[256] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
- 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
- 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
- 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
- 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
- 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
-
- 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
- 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
- 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
- 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
- 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
- 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
- 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
-
- 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
- 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
- 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
- 250, 251, 252, 253, 254, 255
-};
-
-/* The compiler keeps an inverted translation table.
- * This looks up/inititalize elements.
- * VALID is an array of booleans that validate CACHE.
- */
-
-#ifdef __STDC__
-static rx_Bitset
-inverse_translation(struct re_pattern_buffer *rxb,
- char *valid, rx_Bitset cache,
- unsigned char *translate, int c)
-#else
-static rx_Bitset inverse_translation(rxb, valid, cache, translate, c)
-struct re_pattern_buffer *rxb;
-char *valid;
-rx_Bitset cache;
-unsigned char *translate;
-int c;
-#endif
-{
- rx_Bitset cs
-
- = cache + c * rx_bitset_numb_subsets(rxb->rx.local_cset_size);
-
- if (!valid[c]) {
- int x;
- int c_tr = TRANSLATE(c);
-
- rx_bitset_null(rxb->rx.local_cset_size, cs);
- for (x = 0; x < 256; ++x) /* &&&& 13.37 */
- if (TRANSLATE(x) == c_tr)
- RX_bitset_enjoin(cs, x);
- valid[c] = 1;
- }
- return cs;
-}
-
-
-
-
-/* More subroutine declarations and macros for regex_compile. */
-
-/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
- false if it's not. */
-
-#ifdef __STDC__
-static boolean
-group_in_compile_stack(compile_stack_type compile_stack, regnum_t regnum)
-#else
-static boolean group_in_compile_stack(compile_stack, regnum)
-compile_stack_type compile_stack;
-regnum_t regnum;
-#endif
-{
- int this_element;
-
- for (this_element = compile_stack.avail - 1;
- this_element >= 0; this_element--)
- if (compile_stack.stack[this_element].regnum == regnum)
- return true;
-
- return false;
-}
-
-
-/*
- * Read the ending character of a range (in a bracket expression) from the
- * uncompiled pattern *P_PTR (which ends at PEND). We assume the
- * starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
- * Then we set the translation of all bits between the starting and
- * ending characters (inclusive) in the compiled pattern B.
- *
- * Return an error code.
- *
- * We use these short variable names so we can use the same macros as
- * `regex_compile' itself.
- */
-
-#ifdef __STDC__
-static reg_errcode_t
-compile_range(struct re_pattern_buffer *rxb, rx_Bitset cs,
- __const__ char **p_ptr, __const__ char *pend,
- unsigned char *translate, reg_syntax_t syntax,
- rx_Bitset inv_tr, char *valid_inv_tr)
-#else
-static reg_errcode_t
-compile_range(rxb, cs, p_ptr, pend, translate, syntax, inv_tr,
- valid_inv_tr)
-struct re_pattern_buffer *rxb;
-rx_Bitset cs;
-__const__ char **p_ptr;
-__const__ char *pend;
-unsigned char *translate;
-reg_syntax_t syntax;
-rx_Bitset inv_tr;
-char *valid_inv_tr;
-#endif
-{
- unsigned this_char;
-
- __const__ char *p = *p_ptr;
-
- unsigned char range_end;
- unsigned char range_start = TRANSLATE(p[-2]);
-
- if (p == pend)
- return REG_ERANGE;
-
- PATFETCH(range_end);
-
- (*p_ptr)++;
-
- if (range_start > range_end)
- return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
-
- for (this_char = range_start; this_char <= range_end; this_char++) {
- rx_Bitset it =
- inverse_translation(rxb, valid_inv_tr, inv_tr, translate,
-
- this_char);
-
- rx_bitset_union(rxb->rx.local_cset_size, cs, it);
- }
-
- return REG_NOERROR;
-}
-
-
-/* This searches a regexp for backreference side effects.
- * It fills in the array OUT with 1 at the index of every register pair
- * referenced by a backreference.
- *
- * This is used to help optimize patterns for searching. The information is
- * useful because, if the caller doesn't want register values, backreferenced
- * registers are the only registers for which we need rx_backtrack.
- */
-
-#ifdef __STDC__
-static void
-find_backrefs(char *out, struct rexp_node *rexp,
- struct re_se_params *params)
-#else
-static void find_backrefs(out, rexp, params)
-char *out;
-struct rexp_node *rexp;
-struct re_se_params *params;
-#endif
-{
- if (rexp)
- switch (rexp->type) {
- case r_cset:
- case r_data:
- return;
- case r_alternate:
- case r_concat:
- case r_opt:
- case r_star:
- case r_2phase_star:
- find_backrefs(out, rexp->params.pair.left, params);
- find_backrefs(out, rexp->params.pair.right, params);
- return;
- case r_side_effect:
- if (((long) rexp->params.side_effect >= 0)
- && (params[(long) rexp->params.side_effect].se ==
- re_se_backref))
- out[params[(long) rexp->params.side_effect].op1] = 1;
- return;
- }
-}
-
-
-
-/* Returns 0 unless the pattern can match the empty string. */
-
-#ifdef __STDC__
-static int
-compute_fastset(struct re_pattern_buffer *rxb, struct rexp_node *rexp)
-#else
-static int compute_fastset(rxb, rexp)
-struct re_pattern_buffer *rxb;
-struct rexp_node *rexp;
-#endif
-{
- if (!rexp)
- return 1;
- switch (rexp->type) {
- case r_data:
- return 1;
- case r_cset:
- {
- rx_bitset_union(rxb->rx.local_cset_size,
- rxb->fastset, rexp->params.cset);
- }
- return 0;
- case r_concat:
- return (compute_fastset(rxb, rexp->params.pair.left)
- && compute_fastset(rxb, rexp->params.pair.right));
- case r_2phase_star:
- compute_fastset(rxb, rexp->params.pair.left);
- /* compute_fastset (rxb, rexp->params.pair.right); nope... */
- return 1;
- case r_alternate:
- return !!(compute_fastset(rxb, rexp->params.pair.left)
- + compute_fastset(rxb, rexp->params.pair.right));
- case r_opt:
- case r_star:
- compute_fastset(rxb, rexp->params.pair.left);
- return 1;
- case r_side_effect:
- return 1;
- }
-
- /* this should never happen */
- return 0;
-}
-
-
-/* returns
- * 1 -- yes, definately anchored by the given side effect.
- * 2 -- maybe anchored, maybe the empty string.
- * 0 -- definately not anchored
- * There is simply no other possibility.
- */
-
-#ifdef __STDC__
-static int is_anchored(struct rexp_node *rexp, rx_side_effect se)
-#else
-static int is_anchored(rexp, se)
-struct rexp_node *rexp;
-rx_side_effect se;
-#endif
-{
- if (!rexp)
- return 2;
- switch (rexp->type) {
- case r_cset:
- case r_data:
- return 0;
- case r_concat:
- case r_2phase_star:
- {
- int l = is_anchored(rexp->params.pair.left, se);
-
- return (l == 2 ? is_anchored(rexp->params.pair.right, se) : l);
- }
- case r_alternate:
- {
- int l = is_anchored(rexp->params.pair.left, se);
- int r = l ? is_anchored(rexp->params.pair.right, se) : 0;
-
- if (l == r)
- return l;
- else if ((l == 0) || (r == 0))
- return 0;
- else
- return 2;
- }
- case r_opt:
- case r_star:
- return is_anchored(rexp->params.pair.left, se) ? 2 : 0;
-
- case r_side_effect:
- return ((rexp->params.side_effect == se)
- ? 1 : 2);
- }
-
- /* this should never happen */
- return 0;
-}
-
-
-/* This removes register assignments that aren't required by backreferencing.
- * This can speed up explore_future, especially if it eliminates
- * non-determinism in the superstate NFA.
- *
- * NEEDED is an array of characters, presumably filled in by FIND_BACKREFS.
- * The non-zero elements of the array indicate which register assignments
- * can NOT be removed from the expression.
- */
-
-#ifdef __STDC__
-static struct rexp_node *remove_unecessary_side_effects(struct rx *rx,
- char *needed,
- struct rexp_node
- *rexp,
- struct re_se_params
- *params)
-#else
-static struct rexp_node *remove_unecessary_side_effects(rx, needed, rexp,
- params)
-struct rx *rx;
-char *needed;
-struct rexp_node *rexp;
-struct re_se_params *params;
-#endif
-{
- struct rexp_node *l;
- struct rexp_node *r;
-
- if (!rexp)
- return 0;
- else
- switch (rexp->type) {
- case r_cset:
- case r_data:
- return rexp;
- case r_alternate:
- case r_concat:
- case r_2phase_star:
- l = remove_unecessary_side_effects(rx, needed,
- rexp->params.pair.left,
- params);
- r =
- remove_unecessary_side_effects(rx, needed,
- rexp->params.pair.right,
- params);
- if ((l && r) || (rexp->type != r_concat)) {
- rexp->params.pair.left = l;
- rexp->params.pair.right = r;
- return rexp;
- } else {
- rexp->params.pair.left = rexp->params.pair.right = 0;
- rx_free_rexp(rx, rexp);
- return l ? l : r;
- }
- case r_opt:
- case r_star:
- l = remove_unecessary_side_effects(rx, needed,
- rexp->params.pair.left,
- params);
- if (l) {
- rexp->params.pair.left = l;
- return rexp;
- } else {
- rexp->params.pair.left = 0;
- rx_free_rexp(rx, rexp);
- return 0;
- }
- case r_side_effect:
- {
- int se = (long) rexp->params.side_effect;
-
- if ((se >= 0)
- && (((enum re_side_effects) params[se].se == re_se_lparen)
- || ((enum re_side_effects) params[se].se ==
- re_se_rparen)) && (params[se].op1 > 0)
- && (!needed[params[se].op1])) {
- rx_free_rexp(rx, rexp);
- return 0;
- } else
- return rexp;
- }
- }
-
- /* this should never happen */
- return 0;
-}
-
-
-
-#ifdef __STDC__
-static int
-pointless_if_repeated(struct rexp_node *node, struct re_se_params *params)
-#else
-static int pointless_if_repeated(node, params)
-struct rexp_node *node;
-struct re_se_params *params;
-#endif
-{
- if (!node)
- return 1;
- switch (node->type) {
- case r_cset:
- return 0;
- case r_alternate:
- case r_concat:
- case r_2phase_star:
- return (pointless_if_repeated(node->params.pair.left, params)
- && pointless_if_repeated(node->params.pair.right, params));
- case r_opt:
- case r_star:
- return pointless_if_repeated(node->params.pair.left, params);
- case r_side_effect:
- switch (((long) node->params.side_effect < 0)
- ? (enum re_side_effects) node->params.side_effect
- : (enum re_side_effects) params[(long) node->
- params.side_effect].se) {
- case re_se_try:
- case re_se_at_dot:
- case re_se_begbuf:
- case re_se_hat:
- case re_se_wordbeg:
- case re_se_wordbound:
- case re_se_notwordbound:
- case re_se_wordend:
- case re_se_endbuf:
- case re_se_dollar:
- case re_se_fail:
- case re_se_win:
- return 1;
- case re_se_lparen:
- case re_se_rparen:
- case re_se_iter:
- case re_se_end_iter:
- case re_se_syntax:
- case re_se_not_syntax:
- case re_se_backref:
- return 0;
- }
- case r_data:
- default:
- return 0;
- }
-}
-
-
-
-#ifdef __STDC__
-static int
-registers_on_stack(struct re_pattern_buffer *rxb,
- struct rexp_node *rexp, int in_danger,
- struct re_se_params *params)
-#else
-static int registers_on_stack(rxb, rexp, in_danger, params)
-struct re_pattern_buffer *rxb;
-struct rexp_node *rexp;
-int in_danger;
-struct re_se_params *params;
-#endif
-{
- if (!rexp)
- return 0;
- else
- switch (rexp->type) {
- case r_cset:
- case r_data:
- return 0;
- case r_alternate:
- case r_concat:
- return (registers_on_stack(rxb, rexp->params.pair.left,
- in_danger, params)
- || (registers_on_stack
- (rxb, rexp->params.pair.right,
- in_danger, params)));
- case r_opt:
- return registers_on_stack(rxb, rexp->params.pair.left, 0,
- params);
- case r_star:
- return registers_on_stack(rxb, rexp->params.pair.left, 1,
- params);
- case r_2phase_star:
- return
- (registers_on_stack(rxb, rexp->params.pair.left, 1, params)
- || registers_on_stack(rxb, rexp->params.pair.right, 1,
- params));
- case r_side_effect:
- {
- int se = (long) rexp->params.side_effect;
-
- if (in_danger && (se >= 0)
- && (params[se].op1 > 0)
- && (((enum re_side_effects) params[se].se == re_se_lparen)
- || ((enum re_side_effects) params[se].se ==
- re_se_rparen))) return 1;
- else
- return 0;
- }
- }
-
- /* this should never happen */
- return 0;
-}
-
-
-
-static char idempotent_complex_se[] = {
-#define RX_WANT_SE_DEFS 1
-#undef RX_DEF_SE
-#undef RX_DEF_CPLX_SE
-#define RX_DEF_SE(IDEM, NAME, VALUE)
-#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) IDEM,
-#include <regex.h>
-#undef RX_DEF_SE
-#undef RX_DEF_CPLX_SE
-#undef RX_WANT_SE_DEFS
- 23
-};
-
-static char idempotent_se[] = {
- 13,
-#define RX_WANT_SE_DEFS 1
-#undef RX_DEF_SE
-#undef RX_DEF_CPLX_SE
-#define RX_DEF_SE(IDEM, NAME, VALUE) IDEM,
-#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE)
-#include <regex.h>
-#undef RX_DEF_SE
-#undef RX_DEF_CPLX_SE
-#undef RX_WANT_SE_DEFS
- 42
-};
-
-
-
-#ifdef __STDC__
-static int has_any_se(struct rx *rx, struct rexp_node *rexp)
-#else
-static int has_any_se(rx, rexp)
-struct rx *rx;
-struct rexp_node *rexp;
-#endif
-{
- if (!rexp)
- return 0;
-
- switch (rexp->type) {
- case r_cset:
- case r_data:
- return 0;
-
- case r_side_effect:
- return 1;
-
- case r_2phase_star:
- case r_concat:
- case r_alternate:
- return (has_any_se(rx, rexp->params.pair.left)
- || has_any_se(rx, rexp->params.pair.right));
-
- case r_opt:
- case r_star:
- return has_any_se(rx, rexp->params.pair.left);
- }
-
- /* this should never happen */
- return 0;
-}
-
-
-
-/* This must be called AFTER `convert_hard_loops' for a given REXP. */
-#ifdef __STDC__
-static int
-has_non_idempotent_epsilon_path(struct rx *rx,
- struct rexp_node *rexp,
- struct re_se_params *params)
-#else
-static int has_non_idempotent_epsilon_path(rx, rexp, params)
-struct rx *rx;
-struct rexp_node *rexp;
-struct re_se_params *params;
-#endif
-{
- if (!rexp)
- return 0;
-
- switch (rexp->type) {
- case r_cset:
- case r_data:
- case r_star:
- return 0;
-
- case r_side_effect:
- return
- !((long) rexp->params.side_effect > 0
- ?
- idempotent_complex_se[params
- [(long) rexp->params.
- side_effect].se] :
- idempotent_se[-(long) rexp->params.side_effect]);
-
- case r_alternate:
- return
- (has_non_idempotent_epsilon_path(rx,
- rexp->params.pair.left,
- params)
- || has_non_idempotent_epsilon_path(rx,
- rexp->params.pair.right,
- params));
-
- case r_2phase_star:
- case r_concat:
- return
- (has_non_idempotent_epsilon_path(rx,
- rexp->params.pair.left,
- params)
- && has_non_idempotent_epsilon_path(rx,
- rexp->params.pair.right,
- params));
-
- case r_opt:
- return has_non_idempotent_epsilon_path(rx,
- rexp->params.pair.left,
- params);
- }
-
- /* this should never happen */
- return 0;
-}
-
-
-
-/* This computes rougly what it's name suggests. It can (and does) go wrong
- * in the direction of returning spurious 0 without causing disasters.
- */
-#ifdef __STDC__
-static int begins_with_complex_se(struct rx *rx, struct rexp_node *rexp)
-#else
-static int begins_with_complex_se(rx, rexp)
-struct rx *rx;
-struct rexp_node *rexp;
-#endif
-{
- if (!rexp)
- return 0;
-
- switch (rexp->type) {
- case r_cset:
- case r_data:
- return 0;
-
- case r_side_effect:
- return ((long) rexp->params.side_effect >= 0);
-
- case r_alternate:
- return (begins_with_complex_se(rx, rexp->params.pair.left)
- && begins_with_complex_se(rx, rexp->params.pair.right));
-
-
- case r_concat:
- return has_any_se(rx, rexp->params.pair.left);
- case r_opt:
- case r_star:
- case r_2phase_star:
- return 0;
- }
-
- /* this should never happen */
- return 0;
-}
-
-
-/* This destructively removes some of the re_se_tv side effects from
- * a rexp tree. In particular, during parsing re_se_tv was inserted on the
- * right half of every | to guarantee that posix path preference could be
- * honored. This function removes some which it can be determined aren't
- * needed.
- */
-
-#ifdef __STDC__
-static void
-speed_up_alt(struct rx *rx, struct rexp_node *rexp, int unposix)
-#else
-static void speed_up_alt(rx, rexp, unposix)
-struct rx *rx;
-struct rexp_node *rexp;
-int unposix;
-#endif
-{
- if (!rexp)
- return;
-
- switch (rexp->type) {
- case r_cset:
- case r_data:
- case r_side_effect:
- return;
-
- case r_opt:
- case r_star:
- speed_up_alt(rx, rexp->params.pair.left, unposix);
- return;
-
- case r_2phase_star:
- case r_concat:
- speed_up_alt(rx, rexp->params.pair.left, unposix);
- speed_up_alt(rx, rexp->params.pair.right, unposix);
- return;
-
- case r_alternate:
- /* the right child is guaranteed to be (concat re_se_tv <subexp>) */
-
- speed_up_alt(rx, rexp->params.pair.left, unposix);
- speed_up_alt(rx, rexp->params.pair.right->params.pair.right,
- unposix);
-
- if (unposix
- || (begins_with_complex_se
- (rx, rexp->params.pair.right->params.pair.right))
- || !(has_any_se(rx, rexp->params.pair.right->params.pair.right)
- || has_any_se(rx, rexp->params.pair.left))) {
- struct rexp_node *conc = rexp->params.pair.right;
-
- rexp->params.pair.right = conc->params.pair.right;
- conc->params.pair.right = 0;
- rx_free_rexp(rx, conc);
- }
- }
-}
-
-
-
-
-
-/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
- Returns one of error codes defined in `regex.h', or zero for success.
-
- Assumes the `allocated' (and perhaps `buffer') and `translate'
- fields are set in BUFP on entry.
-
- If it succeeds, results are put in BUFP (if it returns an error, the
- contents of BUFP are undefined):
- `buffer' is the compiled pattern;
- `syntax' is set to SYNTAX;
- `used' is set to the length of the compiled pattern;
- `fastmap_accurate' is set to zero;
- `re_nsub' is set to the number of groups in PATTERN;
- `not_bol' and `not_eol' are set to zero.
-
- The `fastmap' and `newline_anchor' fields are neither
- examined nor set. */
-
-
-#ifdef __STDC__
-RX_DECL reg_errcode_t
-rx_compile(__const__ char *pattern, int size,
- reg_syntax_t syntax, struct re_pattern_buffer *rxb)
-#else
-RX_DECL reg_errcode_t rx_compile(pattern, size, syntax, rxb)
-__const__ char *pattern;
-int size;
-reg_syntax_t syntax;
-struct re_pattern_buffer *rxb;
-#endif
-{
- RX_subset
- inverse_translate[CHAR_SET_SIZE *
- rx_bitset_numb_subsets(CHAR_SET_SIZE)];
- char validate_inv_tr[CHAR_SET_SIZE *
-
- rx_bitset_numb_subsets(CHAR_SET_SIZE)];
-
- /* We fetch characters from PATTERN here. Even though PATTERN is
- `char *' (i.e., signed), we declare these variables as unsigned, so
- they can be reliably used as array indices. */
- register unsigned char c, c1;
-
- /* A random tempory spot in PATTERN. */
- __const__ char *p1;
-
- /* Keeps track of unclosed groups. */
- compile_stack_type compile_stack;
-
- /* Points to the current (ending) position in the pattern. */
- __const__ char *p = pattern;
- __const__ char *pend = pattern + size;
-
- /* How to translate the characters in the pattern. */
- unsigned char *translate = (rxb->translate
-
- ? rxb->translate : rx_id_translation);
-
- /* When parsing is done, this will hold the expression tree. */
- struct rexp_node *rexp = 0;
-
- /* In the midst of compilation, this holds onto the regexp
- * first parst while rexp goes on to aquire additional constructs.
- */
- struct rexp_node *orig_rexp = 0;
- struct rexp_node *fewer_side_effects = 0;
-
- /* This and top_expression are saved on the compile stack. */
- struct rexp_node **top_expression = &rexp;
- struct rexp_node **last_expression = top_expression;
-
- /* Parameter to `goto append_node' */
- struct rexp_node *append;
-
- /* Counts open-groups as they are encountered. This is the index of the
- * innermost group being compiled.
- */
- regnum_t regnum = 0;
-
- /* Place in the uncompiled pattern (i.e., the {) to
- * which to go back if the interval is invalid.
- */
- __const__ char *beg_interval;
-
- struct re_se_params *params = 0;
- int paramc = 0; /* How many complex side effects so far? */
-
- rx_side_effect side; /* param to `goto add_side_effect' */
-
- bzero(validate_inv_tr, sizeof(validate_inv_tr));
-
- rxb->rx.instruction_table = rx_id_instruction_table;
-
-
- /* Initialize the compile stack. */
- compile_stack.stack = ((compile_stack_elt_t *)
- malloc((INIT_COMPILE_STACK_SIZE) *
- sizeof(compile_stack_elt_t)));
- if (compile_stack.stack == 0)
- return REG_ESPACE;
-
- compile_stack.size = INIT_COMPILE_STACK_SIZE;
- compile_stack.avail = 0;
-
- /* Initialize the pattern buffer. */
- rxb->rx.cache = &default_cache;
- rxb->syntax = syntax;
- rxb->fastmap_accurate = 0;
- rxb->not_bol = rxb->not_eol = 0;
- rxb->least_subs = 0;
-
- /* Always count groups, whether or not rxb->no_sub is set.
- * The whole pattern is implicitly group 0, so counting begins
- * with 1.
- */
- rxb->re_nsub = 0;
-
-#if !defined (emacs) && !defined (SYNTAX_TABLE)
- /* Initialize the syntax table. */
- init_syntax_once();
-#endif
-
- /* Loop through the uncompiled pattern until we're at the end. */
- while (p != pend) {
- PATFETCH(c);
-
- switch (c) {
- case '^':
- {
- if ( /* If at start of pattern, it's an operator. */
- p == pattern + 1
- /* If context independent, it's an operator. */
- || syntax & RE_CONTEXT_INDEP_ANCHORS
- /* Otherwise, depends on what's come before. */
- || at_begline_loc_p(pattern, p, syntax)) {
- struct rexp_node *n = rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect) re_se_hat);
-
- if (!n)
- return REG_ESPACE;
- append = n;
- goto append_node;
- } else
- goto normal_char;
- }
- break;
-
-
- case '$':
- {
- if ( /* If at end of pattern, it's an operator. */
- p == pend
- /* If context independent, it's an operator. */
- || syntax & RE_CONTEXT_INDEP_ANCHORS
- /* Otherwise, depends on what's next. */
- || at_endline_loc_p(p, pend, syntax)) {
- struct rexp_node *n = rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect) re_se_dollar);
-
- if (!n)
- return REG_ESPACE;
- append = n;
- goto append_node;
- } else
- goto normal_char;
- }
- break;
-
-
- case '+':
- case '?':
- if ((syntax & RE_BK_PLUS_QM)
- || (syntax & RE_LIMITED_OPS))
- goto normal_char;
-
- handle_plus:
- case '*':
- /* If there is no previous pattern... */
- if (pointless_if_repeated(*last_expression, params)) {
- if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
- else if (!(syntax & RE_CONTEXT_INDEP_OPS))
- goto normal_char;
- }
-
- {
- /* 1 means zero (many) matches is allowed. */
- char zero_times_ok = 0, many_times_ok = 0;
-
- /* If there is a sequence of repetition chars, collapse it
- down to just one (the right one). We can't combine
- interval operators with these because of, e.g., `a{2}*',
- which should only match an even number of `a's. */
-
- for (;;) {
- zero_times_ok |= c != '+';
- many_times_ok |= c != '?';
-
- if (p == pend)
- break;
-
- PATFETCH(c);
-
- if (c == '*' || (!(syntax & RE_BK_PLUS_QM)
- && (c == '+' || c == '?')));
-
- else if (syntax & RE_BK_PLUS_QM && c == '\\') {
- if (p == pend)
- return REG_EESCAPE;
-
- PATFETCH(c1);
- if (!(c1 == '+' || c1 == '?')) {
- PATUNFETCH;
- PATUNFETCH;
- break;
- }
-
- c = c1;
- } else {
- PATUNFETCH;
- break;
- }
-
- /* If we get here, we found another repeat character. */
- }
-
- /* Star, etc. applied to an empty pattern is equivalent
- to an empty pattern. */
- if (!last_expression)
- break;
-
- /* Now we know whether or not zero matches is allowed
- * and also whether or not two or more matches is allowed.
- */
-
- {
- struct rexp_node *inner_exp = *last_expression;
- int need_sync = 0;
-
- if (many_times_ok
- && has_non_idempotent_epsilon_path(&rxb->rx,
- inner_exp,
- params)) {
- struct rexp_node *pusher =
- rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect) re_se_pushpos);
- struct rexp_node *checker
- = rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect) re_se_chkpos);
- struct rexp_node *pushback
- = rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect) re_se_pushback);
- rx_Bitset cs = rx_cset(&rxb->rx);
- struct rexp_node *lit_t;
- struct rexp_node *fake_state;
- struct rexp_node *phase2;
- struct rexp_node *popper;
- struct rexp_node *star;
- struct rexp_node *a;
- struct rexp_node *whole_thing;
-
- if (!cs)
- return REG_ESPACE;
- lit_t = rx_mk_r_cset(&rxb->rx, cs);
- fake_state =
- rx_mk_r_concat(&rxb->rx, pushback, lit_t);
- phase2 =
- rx_mk_r_concat(&rxb->rx, checker, fake_state);
- popper =
- rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect)
- re_se_poppos);
- star =
- rx_mk_r_2phase_star(&rxb->rx, inner_exp,
- phase2);
- a = rx_mk_r_concat(&rxb->rx, pusher, star);
- whole_thing = rx_mk_r_concat(&rxb->rx, a, popper);
-
- if (!
- (pusher && star && pushback && lit_t
- && fake_state && lit_t && phase2 && checker
- && popper && a && whole_thing))
- return REG_ESPACE;
- RX_bitset_enjoin(cs, 't');
- *last_expression = whole_thing;
- } else {
- struct rexp_node *star =
- (many_times_ok ? rx_mk_r_star : rx_mk_r_opt)
- (&rxb->rx, *last_expression);
-
- if (!star)
- return REG_ESPACE;
- *last_expression = star;
- need_sync = has_any_se(&rxb->rx, *last_expression);
- }
- if (!zero_times_ok) {
- struct rexp_node *concat
- = rx_mk_r_concat(&rxb->rx, inner_exp,
- rx_copy_rexp(&rxb->rx,
- *last_expression));
-
- if (!concat)
- return REG_ESPACE;
- *last_expression = concat;
- }
- if (need_sync) {
- int sync_se = paramc;
-
- params = (params ? ((struct re_se_params *)
- realloc(params,
- sizeof(*params) * (1 +
- paramc)))
- : ((struct re_se_params *)
- malloc(sizeof(*params))));
- if (!params)
- return REG_ESPACE;
- ++paramc;
- params[sync_se].se = re_se_tv;
- side = (rx_side_effect) sync_se;
- goto add_side_effect;
- }
- }
- /* The old regex.c used to optimize `.*\n'.
- * Maybe rx should too?
- */
- }
- break;
-
-
- case '.':
- {
- rx_Bitset cs = rx_cset(&rxb->rx);
- struct rexp_node *n = rx_mk_r_cset(&rxb->rx, cs);
-
- if (!(cs && n))
- return REG_ESPACE;
-
- rx_bitset_universe(rxb->rx.local_cset_size, cs);
- if (!(rxb->syntax & RE_DOT_NEWLINE))
- RX_bitset_remove(cs, '\n');
- if (!(rxb->syntax & RE_DOT_NOT_NULL))
- RX_bitset_remove(cs, 0);
-
- append = n;
- goto append_node;
- break;
- }
-
-
- case '[':
- if (p == pend)
- return REG_EBRACK;
- {
- boolean had_char_class = false;
- rx_Bitset cs = rx_cset(&rxb->rx);
- struct rexp_node *node = rx_mk_r_cset(&rxb->rx, cs);
- int is_inverted = *p == '^';
-
- if (!(node && cs))
- return REG_ESPACE;
-
- /* This branch of the switch is normally exited with
- *`goto append_node'
- */
- append = node;
-
- if (is_inverted)
- p++;
-
- /* Remember the first position in the bracket expression. */
- p1 = p;
-
- /* Read in characters and ranges, setting map bits. */
- for (;;) {
- if (p == pend)
- return REG_EBRACK;
-
- PATFETCH(c);
-
- /* \ might escape characters inside [...] and [^...]. */
- if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
- && c == '\\') {
- if (p == pend)
- return REG_EESCAPE;
-
- PATFETCH(c1);
- {
- rx_Bitset it = inverse_translation(rxb,
- validate_inv_tr,
- inverse_translate,
- translate,
- c1);
-
- rx_bitset_union(rxb->rx.local_cset_size, cs,
- it);
- }
- continue;
- }
-
- /* Could be the end of the bracket expression. If it's
- not (i.e., when the bracket expression is `[]' so
- far), the ']' character bit gets set way below. */
- if (c == ']' && p != p1 + 1)
- goto finalize_class_and_append;
-
- /* Look ahead to see if it's a range when the last thing
- was a character class. */
- if (had_char_class && c == '-' && *p != ']')
- return REG_ERANGE;
-
- /* Look ahead to see if it's a range when the last thing
- was a character: if this is a hyphen not at the
- beginning or the end of a list, then it's the range
- operator. */
- if (c == '-' && !(p - 2 >= pattern && p[-2] == '[')
- && !(p - 3 >= pattern && p[-3] == '['
- && p[-2] == '^') && *p != ']') {
- reg_errcode_t ret =
- compile_range(rxb, cs, &p, pend, translate,
- syntax,
- inverse_translate,
-
- validate_inv_tr);
-
- if (ret != REG_NOERROR)
- return ret;
- }
-
- else if (p[0] == '-' && p[1] != ']') { /* This handles ranges made up of characters only. */
- reg_errcode_t ret;
-
- /* Move past the `-'. */
- PATFETCH(c1);
-
- ret =
- compile_range(rxb, cs, &p, pend, translate,
- syntax, inverse_translate,
- validate_inv_tr);
- if (ret != REG_NOERROR)
- return ret;
- }
-
- /* See if we're at the beginning of a possible character
- class. */
-
- else if ((syntax & RE_CHAR_CLASSES)
- && (c == '[') && (*p == ':')) {
- char str[CHAR_CLASS_MAX_LENGTH + 1];
-
- PATFETCH(c);
- c1 = 0;
-
- /* If pattern is `[[:'. */
- if (p == pend)
- return REG_EBRACK;
-
- for (;;) {
- PATFETCH(c);
- if (c == ':' || c == ']' || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH) break;
- str[c1++] = c;
- }
- str[c1] = '\0';
-
- /* If isn't a word bracketed by `[:' and:`]':
- undo the ending character, the letters, and leave
- the leading `:' and `[' (but set bits for them). */
- if (c == ':' && *p == ']') {
- int ch;
- boolean is_alnum = !strcmp(str, "alnum");
- boolean is_alpha = !strcmp(str, "alpha");
- boolean is_blank = !strcmp(str, "blank");
- boolean is_cntrl = !strcmp(str, "cntrl");
- boolean is_digit = !strcmp(str, "digit");
- boolean is_graph = !strcmp(str, "graph");
- boolean is_lower = !strcmp(str, "lower");
- boolean is_print = !strcmp(str, "print");
- boolean is_punct = !strcmp(str, "punct");
- boolean is_space = !strcmp(str, "space");
- boolean is_upper = !strcmp(str, "upper");
- boolean is_xdigit = !strcmp(str, "xdigit");
-
- if (!IS_CHAR_CLASS(str))
- return REG_ECTYPE;
-
- /* Throw away the ] at the end of the character
- class. */
- PATFETCH(c);
-
- if (p == pend)
- return REG_EBRACK;
-
- for (ch = 0; ch < 1 << CHARBITS; ch++) {
- if ((is_alnum && isalnum(ch))
- || (is_alpha && isalpha(ch))
- || (is_blank && isblank(ch))
- || (is_cntrl && iscntrl(ch))
- || (is_digit && isdigit(ch))
- || (is_graph && isgraph(ch))
- || (is_lower && islower(ch))
- || (is_print && isprint(ch))
- || (is_punct && ispunct(ch))
- || (is_space && isspace(ch))
- || (is_upper && isupper(ch))
- || (is_xdigit && isxdigit(ch))) {
- rx_Bitset it = inverse_translation(rxb,
- validate_inv_tr,
- inverse_translate,
- translate,
- ch);
-
- rx_bitset_union(rxb->
- rx.local_cset_size, cs,
- it);
- }
- }
- had_char_class = true;
- } else {
- c1++;
- while (c1--)
- PATUNFETCH;
- {
- rx_Bitset it = inverse_translation(rxb,
- validate_inv_tr,
- inverse_translate,
- translate,
- '[');
-
- rx_bitset_union(rxb->rx.local_cset_size,
- cs, it);
- }
- {
- rx_Bitset it = inverse_translation(rxb,
- validate_inv_tr,
- inverse_translate,
- translate,
- ':');
-
- rx_bitset_union(rxb->rx.local_cset_size,
- cs, it);
- }
- had_char_class = false;
- }
- } else {
- had_char_class = false;
- {
- rx_Bitset it = inverse_translation(rxb,
- validate_inv_tr,
- inverse_translate,
- translate,
- c);
-
- rx_bitset_union(rxb->rx.local_cset_size, cs,
- it);
- }
- }
- }
-
- finalize_class_and_append:
- if (is_inverted) {
- rx_bitset_complement(rxb->rx.local_cset_size, cs);
- if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
- RX_bitset_remove(cs, '\n');
- }
- goto append_node;
- }
- break;
-
-
- case '(':
- if (syntax & RE_NO_BK_PARENS)
- goto handle_open;
- else
- goto normal_char;
-
-
- case ')':
- if (syntax & RE_NO_BK_PARENS)
- goto handle_close;
- else
- goto normal_char;
-
-
- case '\n':
- if (syntax & RE_NEWLINE_ALT)
- goto handle_alt;
- else
- goto normal_char;
-
-
- case '|':
- if (syntax & RE_NO_BK_VBAR)
- goto handle_alt;
- else
- goto normal_char;
-
-
- case '{':
- if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- goto handle_interval;
- else
- goto normal_char;
-
-
- case '\\':
- if (p == pend)
- return REG_EESCAPE;
-
- /* Do not translate the character after the \, so that we can
- distinguish, e.g., \B from \b, even if we normally would
- translate, e.g., B to b. */
- PATFETCH_RAW(c);
-
- switch (c) {
- case '(':
- if (syntax & RE_NO_BK_PARENS)
- goto normal_backslash;
-
- handle_open:
- rxb->re_nsub++;
- regnum++;
- if (COMPILE_STACK_FULL) {
- ((compile_stack.stack) =
- (compile_stack_elt_t *) realloc(compile_stack.stack,
- (compile_stack.size <<
- 1) *
- sizeof
- (compile_stack_elt_t)));
- if (compile_stack.stack == 0)
- return REG_ESPACE;
-
- compile_stack.size <<= 1;
- }
-
- if (*last_expression) {
- struct rexp_node *concat
- = rx_mk_r_concat(&rxb->rx, *last_expression, 0);
-
- if (!concat)
- return REG_ESPACE;
- *last_expression = concat;
- last_expression = &concat->params.pair.right;
- }
-
- /*
- * These are the values to restore when we hit end of this
- * group.
- */
- COMPILE_STACK_TOP.top_expression = top_expression;
- COMPILE_STACK_TOP.last_expression = last_expression;
- COMPILE_STACK_TOP.regnum = regnum;
-
- compile_stack.avail++;
-
- top_expression = last_expression;
- break;
-
-
- case ')':
- if (syntax & RE_NO_BK_PARENS)
- goto normal_backslash;
-
- handle_close:
- /* See similar code for backslashed left paren above. */
- if (COMPILE_STACK_EMPTY) {
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) {
- goto normal_char;
- } else {
- return REG_ERPAREN;
- }
- }
-
- /* Since we just checked for an empty stack above, this
- ``can't happen''. */
-
- {
- /* We don't just want to restore into `regnum', because
- later groups should continue to be numbered higher,
- as in `(ab)c(de)' -- the second group is #2. */
- regnum_t this_group_regnum;
- struct rexp_node **inner = top_expression;
-
- compile_stack.avail--;
- top_expression = COMPILE_STACK_TOP.top_expression;
- last_expression = COMPILE_STACK_TOP.last_expression;
- this_group_regnum = COMPILE_STACK_TOP.regnum;
- {
- int left_se = paramc;
- int right_se = paramc + 1;
-
- params = (params ? ((struct re_se_params *)
- realloc(params,
- (paramc +
- 2) *
- sizeof(params[0])))
- : ((struct re_se_params *)
- malloc(2 * sizeof(params[0]))));
- if (!params)
- return REG_ESPACE;
- paramc += 2;
-
- params[left_se].se = re_se_lparen;
- params[left_se].op1 = this_group_regnum;
- params[right_se].se = re_se_rparen;
- params[right_se].op1 = this_group_regnum;
- {
- struct rexp_node *left
- = rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect) left_se);
- struct rexp_node *right
- = rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect) right_se);
- struct rexp_node *c1
- = (*inner ? rx_mk_r_concat(&rxb->rx, left,
- *inner) : left);
- struct rexp_node *c2 =
- rx_mk_r_concat(&rxb->rx, c1, right);
-
- if (!(left && right && c1 && c2))
- return REG_ESPACE;
- *inner = c2;
- }
- }
- break;
- }
-
- case '|': /* `\|'. */
- if ((syntax & RE_LIMITED_OPS) || (syntax & RE_NO_BK_VBAR))
- goto normal_backslash;
- handle_alt:
- if (syntax & RE_LIMITED_OPS)
- goto normal_char;
-
- {
- struct rexp_node *alt
- = rx_mk_r_alternate(&rxb->rx, *top_expression, 0);
-
- if (!alt)
- return REG_ESPACE;
- *top_expression = alt;
- last_expression = &alt->params.pair.right;
- {
- int sync_se = paramc;
-
- params = (params ? ((struct re_se_params *)
- realloc(params,
- (paramc +
- 1) *
- sizeof(params[0])))
- : ((struct re_se_params *)
- malloc(sizeof(params[0]))));
- if (!params)
- return REG_ESPACE;
- ++paramc;
-
- params[sync_se].se = re_se_tv;
- {
- struct rexp_node *sync
- = rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect) sync_se);
- struct rexp_node *conc
- = rx_mk_r_concat(&rxb->rx, sync, 0);
-
- if (!sync || !conc)
- return REG_ESPACE;
-
- *last_expression = conc;
- last_expression = &conc->params.pair.right;
- }
- }
- }
- break;
-
-
- case '{':
- /* If \{ is a literal. */
- if (!(syntax & RE_INTERVALS)
- /* If we're at `\{' and it's not the open-interval
- operator. */
- || ((syntax & RE_INTERVALS)
- && (syntax & RE_NO_BK_BRACES)) || (p - 2 == pattern
- && p == pend))
- goto normal_backslash;
-
- handle_interval:
- {
- /* If got here, then the syntax allows intervals. */
-
- /* At least (most) this many matches must be made. */
- int lower_bound = -1, upper_bound = -1;
-
- beg_interval = p - 1;
-
- if (p == pend) {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_EBRACE;
- }
-
- GET_UNSIGNED_NUMBER(lower_bound);
-
- if (c == ',') {
- GET_UNSIGNED_NUMBER(upper_bound);
- if (upper_bound < 0)
- upper_bound = RE_DUP_MAX;
- } else
- /* Interval such as `{1}' => match exactly once. */
- upper_bound = lower_bound;
-
- if (lower_bound < 0 || upper_bound > RE_DUP_MAX
- || lower_bound > upper_bound) {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_BADBR;
- }
-
- if (!(syntax & RE_NO_BK_BRACES)) {
- if (c != '\\')
- return REG_EBRACE;
- PATFETCH(c);
- }
-
- if (c != '}') {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_BADBR;
- }
-
- /* We just parsed a valid interval. */
-
- /* If it's invalid to have no preceding re. */
- if (pointless_if_repeated(*last_expression, params)) {
- if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
- else if (!(syntax & RE_CONTEXT_INDEP_OPS))
- goto unfetch_interval;
- /* was: else laststart = b; */
- }
-
- /* If the upper bound is zero, don't want to iterate
- * at all.
- */
- if (upper_bound == 0) {
- if (*last_expression) {
- rx_free_rexp(&rxb->rx, *last_expression);
- *last_expression = 0;
- }
- } else
- /* Otherwise, we have a nontrivial interval. */
- {
- int iter_se = paramc;
- int end_se = paramc + 1;
-
- params = (params ? ((struct re_se_params *)
- realloc(params,
- sizeof(*params) * (2 +
- paramc)))
- : ((struct re_se_params *)
- malloc(2 * sizeof(*params))));
- if (!params)
- return REG_ESPACE;
- paramc += 2;
- params[iter_se].se = re_se_iter;
- params[iter_se].op1 = lower_bound;
- params[iter_se].op2 = upper_bound;
-
- params[end_se].se = re_se_end_iter;
- params[end_se].op1 = lower_bound;
- params[end_se].op2 = upper_bound;
- {
- struct rexp_node *push0
- = rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect) re_se_push0);
- struct rexp_node *start_one_iter
- = rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect) iter_se);
- struct rexp_node *phase1
- = rx_mk_r_concat(&rxb->rx, start_one_iter,
- *last_expression);
- struct rexp_node *pushback
- = rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect) re_se_pushback);
- rx_Bitset cs = rx_cset(&rxb->rx);
- struct rexp_node *lit_t;
- struct rexp_node *phase2;
- struct rexp_node *loop;
- struct rexp_node *push_n_loop;
- struct rexp_node *final_test;
- struct rexp_node *full_exp;
-
- if (!cs)
- return REG_ESPACE;
- lit_t = rx_mk_r_cset(&rxb->rx, cs);
- phase2 =
- rx_mk_r_concat(&rxb->rx, pushback, lit_t);
- loop =
- rx_mk_r_2phase_star(&rxb->rx, phase1,
- phase2);
- push_n_loop =
- rx_mk_r_concat(&rxb->rx, push0, loop);
- final_test =
- rx_mk_r_side_effect(&rxb->rx,
- (rx_side_effect)
- end_se);
- full_exp =
- rx_mk_r_concat(&rxb->rx, push_n_loop,
- final_test);
-
- if (!(push0 && start_one_iter && phase1
- && pushback && lit_t && phase2
- && loop && push_n_loop && final_test
- && full_exp)) return REG_ESPACE;
-
- RX_bitset_enjoin(cs, 't');
-
- *last_expression = full_exp;
- }
- }
- beg_interval = 0;
- }
- break;
-
- unfetch_interval:
- /* If an invalid interval, match the characters as literals. */
- p = beg_interval;
- beg_interval = 0;
-
- /* normal_char and normal_backslash need `c'. */
- PATFETCH(c);
-
- if (!(syntax & RE_NO_BK_BRACES)) {
- if (p > pattern && p[-1] == '\\')
- goto normal_backslash;
- }
- goto normal_char;
-
-#ifdef emacs
- /* There is no way to specify the before_dot and after_dot
- operators. rms says this is ok. --karl */
- case '=':
- side = (rx_side_effect) rx_se_at_dot;
- goto add_side_effect;
- break;
-
- case 's':
- case 'S':
- {
- rx_Bitset cs = rx_cset(&rxb->rx);
- struct rexp_node *set = rx_mk_r_cset(&rxb->rx, cs);
-
- if (!(cs && set))
- return REG_ESPACE;
- if (c == 'S')
- rx_bitset_universe(rxb->rx.local_cset_size, cs);
-
- PATFETCH(c);
- {
- int x;
- enum syntaxcode code = syntax_spec_code[c];
-
- for (x = 0; x < 256; ++x) {
-
- if (SYNTAX(x) == code) {
- rx_Bitset it =
- inverse_translation(rxb, validate_inv_tr,
- inverse_translate,
- translate, x);
-
- rx_bitset_xor(rxb->rx.local_cset_size, cs, it);
- }
- }
- }
- append = set;
- goto append_node;
- }
- break;
-#endif /* emacs */
-
-
- case 'w':
- case 'W':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- {
- rx_Bitset cs = rx_cset(&rxb->rx);
- struct rexp_node *n =
- (cs ? rx_mk_r_cset(&rxb->rx, cs) : 0);
-
- if (!(cs && n))
- return REG_ESPACE;
- if (c == 'W')
- rx_bitset_universe(rxb->rx.local_cset_size, cs);
- {
- int x;
-
- for (x = rxb->rx.local_cset_size - 1; x > 0; --x)
- if (SYNTAX(x) & Sword)
- RX_bitset_toggle(cs, x);
- }
- append = n;
- goto append_node;
- }
- break;
-
-/* With a little extra work, some of these side effects could be optimized
- * away (basicly by looking at what we already know about the surrounding
- * chars).
- */
- case '<':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- side = (rx_side_effect) re_se_wordbeg;
- goto add_side_effect;
- break;
-
- case '>':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- side = (rx_side_effect) re_se_wordend;
- goto add_side_effect;
- break;
-
- case 'b':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- side = (rx_side_effect) re_se_wordbound;
- goto add_side_effect;
- break;
-
- case 'B':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- side = (rx_side_effect) re_se_notwordbound;
- goto add_side_effect;
- break;
-
- case '`':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- side = (rx_side_effect) re_se_begbuf;
- goto add_side_effect;
- break;
-
- case '\'':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- side = (rx_side_effect) re_se_endbuf;
- goto add_side_effect;
- break;
-
- add_side_effect:
- {
- struct rexp_node *se
-
- = rx_mk_r_side_effect(&rxb->rx, side);
- if (!se)
- return REG_ESPACE;
- append = se;
- goto append_node;
- }
- break;
-
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if (syntax & RE_NO_BK_REFS)
- goto normal_char;
-
- c1 = c - '0';
-
- if (c1 > regnum)
- return REG_ESUBREG;
-
- /* Can't back reference to a subexpression if inside of it. */
- if (group_in_compile_stack(compile_stack, c1))
- return REG_ESUBREG;
-
- {
- int backref_se = paramc;
-
- params = (params ? ((struct re_se_params *)
- realloc(params,
- sizeof(*params) * (1 +
- paramc)))
- : ((struct re_se_params *)
- malloc(sizeof(*params))));
- if (!params)
- return REG_ESPACE;
- ++paramc;
- params[backref_se].se = re_se_backref;
- params[backref_se].op1 = c1;
- side = (rx_side_effect) backref_se;
- goto add_side_effect;
- }
- break;
-
- case '+':
- case '?':
- if (syntax & RE_BK_PLUS_QM)
- goto handle_plus;
- else
- goto normal_backslash;
-
- default:
- normal_backslash:
- /* You might think it would be useful for \ to mean
- not to translate; but if we don't translate it
- it will never match anything. */
- c = TRANSLATE(c);
- goto normal_char;
- }
- break;
-
-
- default:
- /* Expects the character in `c'. */
- normal_char:
- {
- rx_Bitset cs = rx_cset(&rxb->rx);
- struct rexp_node *match = rx_mk_r_cset(&rxb->rx, cs);
- rx_Bitset it;
-
- if (!(cs && match))
- return REG_ESPACE;
- it = inverse_translation(rxb, validate_inv_tr,
- inverse_translate, translate, c);
- rx_bitset_union(CHAR_SET_SIZE, cs, it);
- append = match;
-
- append_node:
- /* This genericly appends the rexp APPEND to *LAST_EXPRESSION
- * and then parses the next character normally.
- */
- if (*last_expression) {
- struct rexp_node *concat
- = rx_mk_r_concat(&rxb->rx, *last_expression, append);
-
- if (!concat)
- return REG_ESPACE;
- *last_expression = concat;
- last_expression = &concat->params.pair.right;
- } else
- *last_expression = append;
- }
- } /* switch (c) */
- } /* while p != pend */
-
-
- {
- int win_se = paramc;
-
- params = (params ? ((struct re_se_params *)
- realloc(params,
- sizeof(*params) * (1 + paramc)))
- : ((struct re_se_params *)
- malloc(sizeof(*params))));
- if (!params)
- return REG_ESPACE;
- ++paramc;
- params[win_se].se = re_se_win;
- {
- struct rexp_node *se
- = rx_mk_r_side_effect(&rxb->rx, (rx_side_effect) win_se);
- struct rexp_node *concat = rx_mk_r_concat(&rxb->rx, rexp, se);
-
- if (!(se && concat))
- return REG_ESPACE;
- rexp = concat;
- }
- }
-
-
- /* Through the pattern now. */
-
- if (!COMPILE_STACK_EMPTY)
- return REG_EPAREN;
-
- free(compile_stack.stack);
-
- orig_rexp = rexp;
-#ifdef RX_DEBUG
- if (rx_debug_compile) {
- dbug_rxb = rxb;
- fputs("\n\nCompiling ", stdout);
- fwrite(pattern, 1, size, stdout);
- fputs(":\n", stdout);
- rxb->se_params = params;
- print_rexp(&rxb->rx, orig_rexp, 2, re_seprint, stdout);
- }
-#endif
- {
- rx_Bitset cs = rx_cset(&rxb->rx);
- rx_Bitset cs2 = rx_cset(&rxb->rx);
- char *se_map = (char *) alloca(paramc);
- struct rexp_node *new_rexp = 0;
-
-
- bzero(se_map, paramc);
- find_backrefs(se_map, rexp, params);
- fewer_side_effects =
- remove_unecessary_side_effects(&rxb->rx, se_map,
- rx_copy_rexp(&rxb->rx, rexp),
- params);
-
- speed_up_alt(&rxb->rx, rexp, 0);
- speed_up_alt(&rxb->rx, fewer_side_effects, 1);
-
- {
- char *syntax_parens = rxb->syntax_parens;
-
- if (syntax_parens == (char *) 0x1)
- rexp = remove_unecessary_side_effects
- (&rxb->rx, se_map, rexp, params);
- else if (syntax_parens) {
- int x;
-
- for (x = 0; x < paramc; ++x)
- if (((params[x].se == re_se_lparen)
- || (params[x].se == re_se_rparen))
- && (!syntax_parens[params[x].op1]))
- se_map[x] = 1;
- rexp = remove_unecessary_side_effects
- (&rxb->rx, se_map, rexp, params);
- }
- }
-
- /* At least one more optimization would be nice to have here but i ran out
- * of time. The idea would be to delay side effects.
- * For examle, `(abc)' is the same thing as `abc()' except that the
- * left paren is offset by 3 (which we know at compile time).
- * (In this comment, write that second pattern `abc(:3:)'
- * where `(:3:' is a syntactic unit.)
- *
- * Trickier: `(abc|defg)' is the same as `(abc(:3:|defg(:4:))'
- * (The paren nesting may be hard to follow -- that's an alternation
- * of `abc(:3:' and `defg(:4:' inside (purely syntactic) parens
- * followed by the closing paren from the original expression.)
- *
- * Neither the expression tree representation nor the the nfa make
- * this very easy to write. :(
- */
-
- /* What we compile is different than what the parser returns.
- * Suppose the parser returns expression R.
- * Let R' be R with unnecessary register assignments removed
- * (see REMOVE_UNECESSARY_SIDE_EFFECTS, above).
- *
- * What we will compile is the expression:
- *
- * m{try}R{win}\|s{try}R'{win}
- *
- * {try} and {win} denote side effect epsilons (see EXPLORE_FUTURE).
- *
- * When trying a match, we insert an `m' at the beginning of the
- * string if the user wants registers to be filled, `s' if not.
- */
- new_rexp =
- rx_mk_r_alternate
- (&rxb->rx,
- rx_mk_r_concat(&rxb->rx, rx_mk_r_cset(&rxb->rx, cs2), rexp),
- rx_mk_r_concat(&rxb->rx,
- rx_mk_r_cset(&rxb->rx, cs),
- fewer_side_effects));
-
- if (!(new_rexp && cs && cs2))
- return REG_ESPACE;
- RX_bitset_enjoin(cs2, '\0'); /* prefixed to the rexp used for matching. */
- RX_bitset_enjoin(cs, '\1'); /* prefixed to the rexp used for searching. */
- rexp = new_rexp;
- }
-
-#ifdef RX_DEBUG
- if (rx_debug_compile) {
- fputs("\n...which is compiled as:\n", stdout);
- print_rexp(&rxb->rx, rexp, 2, re_seprint, stdout);
- }
-#endif
- {
- struct rx_nfa_state *start = 0;
- struct rx_nfa_state *end = 0;
-
- if (!rx_build_nfa(&rxb->rx, rexp, &start, &end))
- return REG_ESPACE; /* */
- else {
- void *mem = (void *) rxb->buffer;
- unsigned long size = rxb->allocated;
- int start_id;
- char *perm_mem;
- int iterator_size = paramc * sizeof(params[0]);
-
- end->is_final = 1;
- start->is_start = 1;
- rx_name_nfa_states(&rxb->rx);
- start_id = start->id;
-#ifdef RX_DEBUG
- if (rx_debug_compile) {
- fputs("...giving the NFA: \n", stdout);
- dbug_rxb = rxb;
- print_nfa(&rxb->rx, rxb->rx.nfa_states, re_seprint,
- stdout);
- }
-#endif
- if (!rx_eclose_nfa(&rxb->rx))
- return REG_ESPACE;
- else {
- rx_delete_epsilon_transitions(&rxb->rx);
-
- /* For compatability reasons, we need to shove the
- * compiled nfa into one chunk of malloced memory.
- */
- rxb->rx.reserved = (sizeof(params[0]) * paramc
- +
- rx_sizeof_bitset(rxb->
- rx.local_cset_size));
-#ifdef RX_DEBUG
- if (rx_debug_compile) {
- dbug_rxb = rxb;
- fputs("...which cooks down (uncompactified) to: \n",
- stdout);
- print_nfa(&rxb->rx, rxb->rx.nfa_states, re_seprint,
- stdout);
- }
-#endif
- if (!rx_compactify_nfa(&rxb->rx, &mem, &size))
- return REG_ESPACE;
- rxb->buffer = mem;
- rxb->allocated = size;
- rxb->rx.buffer = mem;
- rxb->rx.allocated = size;
- perm_mem = ((char *) rxb->rx.buffer
- + rxb->rx.allocated - rxb->rx.reserved);
- rxb->se_params = ((struct re_se_params *) perm_mem);
- bcopy(params, rxb->se_params, iterator_size);
- perm_mem += iterator_size;
- rxb->fastset = (rx_Bitset) perm_mem;
- rxb->start = rx_id_to_nfa_state(&rxb->rx, start_id);
- }
- rx_bitset_null(rxb->rx.local_cset_size, rxb->fastset);
- rxb->can_match_empty = compute_fastset(rxb, orig_rexp);
- rxb->match_regs_on_stack =
- registers_on_stack(rxb, orig_rexp, 0, params);
- rxb->search_regs_on_stack =
- registers_on_stack(rxb, fewer_side_effects, 0, params);
- if (rxb->can_match_empty)
- rx_bitset_universe(rxb->rx.local_cset_size, rxb->fastset);
- rxb->is_anchored =
- is_anchored(orig_rexp, (rx_side_effect) re_se_hat);
- rxb->begbuf_only =
- is_anchored(orig_rexp, (rx_side_effect) re_se_begbuf);
- }
- rx_free_rexp(&rxb->rx, rexp);
- if (params)
- free(params);
-#ifdef RX_DEBUG
- if (rx_debug_compile) {
- dbug_rxb = rxb;
- fputs("...which cooks down to: \n", stdout);
- print_nfa(&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
- }
-#endif
- }
- return REG_NOERROR;
-}
-
-
-
-/* This table gives an error message for each of the error codes listed
- in regex.h. Obviously the order here has to be same as there. */
-
-__const__ char *rx_error_msg[] = { 0, /* REG_NOERROR */
- "No match", /* REG_NOMATCH */
- "Invalid regular expression", /* REG_BADPAT */
- "Invalid collation character", /* REG_ECOLLATE */
- "Invalid character class name", /* REG_ECTYPE */
- "Trailing backslash", /* REG_EESCAPE */
- "Invalid back reference", /* REG_ESUBREG */
- "Unmatched [ or [^", /* REG_EBRACK */
- "Unmatched ( or \\(", /* REG_EPAREN */
- "Unmatched \\{", /* REG_EBRACE */
- "Invalid content of \\{\\}", /* REG_BADBR */
- "Invalid range end", /* REG_ERANGE */
- "Memory exhausted", /* REG_ESPACE */
- "Invalid preceding regular expression", /* REG_BADRPT */
- "Premature end of regular expression", /* REG_EEND */
- "Regular expression too big", /* REG_ESIZE */
- "Unmatched ) or \\)", /* REG_ERPAREN */
-};
-
-
-
-
-char rx_slowmap[256] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-};
-
-#ifdef __STDC__
-RX_DECL void rx_blow_up_fastmap(struct re_pattern_buffer *rxb)
-#else
-RX_DECL void rx_blow_up_fastmap(rxb)
-struct re_pattern_buffer *rxb;
-#endif
-{
- int x;
-
- for (x = 0; x < 256; ++x) /* &&&& 3.6 % */
- rxb->fastmap[x] = !!RX_bitset_member(rxb->fastset, x);
- rxb->fastmap_accurate = 1;
-}
-
-
-
-
-#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
-#define RE_SEARCH_2_FN inner_re_search_2
-#define RE_S2_QUAL static
-#else
-#define RE_SEARCH_2_FN re_search_2
-#define RE_S2_QUAL
-#endif
-
-struct re_search_2_closure {
- __const__ char *string1;
- int size1;
- __const__ char *string2;
- int size2;
-};
-
-RE_S2_QUAL int
-RE_SEARCH_2_FN(struct re_pattern_buffer *,
- __const__ char *,
- int, __const__ char *, int, int,
-
- int, struct re_registers *, int);
-int re_rx_search(struct re_pattern_buffer *, int,
- int, int, int, rx_get_burst_fn,
- rx_back_check_fn, rx_fetch_char_fn,
- void *, struct re_registers *,
-
- struct rx_search_state *, struct rx_search_state *);
-#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
-int re_search_2(struct re_pattern_buffer *,
- __const__ char *, int,
- __const__ char *, int,
-
- int, int, struct re_registers *, int);
-#endif
-int re_search(struct re_pattern_buffer *,
-
- __const__ char *, int, int, int, struct re_registers *);
-int re_match_2(struct re_pattern_buffer *,
- __const__ char *, int,
- __const__ char *, int, int, struct re_registers *, int);
-int re_match(struct re_pattern_buffer *,
-
- __const__ char *, int, int, struct re_registers *);
-reg_syntax_t re_set_syntax(reg_syntax_t);
-void re_set_registers(struct re_pattern_buffer *,
- struct re_registers *, unsigned,
- regoff_t *, regoff_t *);
-static int cplx_se_sublist_len(struct rx_se_list *);
-static int posix_se_list_order(struct rx *, struct rx_se_list *,
-
- struct rx_se_list *);
-__const__ char
-*re_compile_pattern(__const__ char *, int, struct re_pattern_buffer *);
-int re_compile_fastmap(struct re_pattern_buffer *);
-char *re_comp(__const__ char *);
-int re_exec(__const__ char *);
-int regcomp(regex_t *, __const__ char *, int);
-int regexec(__const__ regex_t *,
- __const__ char *, size_t, regmatch_t pmatch[], int);
-size_t regerror(int, __const__ regex_t *, char *, size_t);
-
-#ifdef __STDC__
-static __inline__ enum rx_get_burst_return
-re_search_2_get_burst(struct rx_string_position *pos,
- void *vclosure, int stop)
-#else
-static __inline__ enum rx_get_burst_return
-re_search_2_get_burst(pos, vclosure, stop)
-struct rx_string_position *pos;
-void *vclosure;
-int stop;
-#endif
-{
- struct re_search_2_closure *closure;
-
- closure = (struct re_search_2_closure *) vclosure;
- if (!closure->string2) {
- int inset;
-
- inset = pos->pos - pos->string;
- if ((inset < -1) || (inset > closure->size1))
- return rx_get_burst_no_more;
- else {
- pos->pos =
- (__const__ unsigned char *) closure->string1 + inset;
- pos->string = (__const__ unsigned char *) closure->string1;
- pos->size = closure->size1;
- pos->end = ((__const__ unsigned char *)
- MIN(closure->string1 + closure->size1,
- closure->string1 + stop));
- pos->offset = 0;
- return ((pos->pos < pos->end)
- ? rx_get_burst_ok : rx_get_burst_no_more);
- }
- } else if (!closure->string1) {
- int inset;
-
- inset = pos->pos - pos->string;
- pos->pos = (__const__ unsigned char *) closure->string2 + inset;
- pos->string = (__const__ unsigned char *) closure->string2;
- pos->size = closure->size2;
- pos->end = ((__const__ unsigned char *)
- MIN(closure->string2 + closure->size2,
- closure->string2 + stop));
- pos->offset = 0;
- return ((pos->pos < pos->end)
- ? rx_get_burst_ok : rx_get_burst_no_more);
- } else {
- int inset;
-
- inset = pos->pos - pos->string + pos->offset;
- if (inset < closure->size1) {
- pos->pos =
- (__const__ unsigned char *) closure->string1 + inset;
- pos->string = (__const__ unsigned char *) closure->string1;
- pos->size = closure->size1;
- pos->end = ((__const__ unsigned char *)
- MIN(closure->string1 + closure->size1,
- closure->string1 + stop));
- pos->offset = 0;
- return rx_get_burst_ok;
- } else {
- pos->pos = ((__const__ unsigned char *)
- closure->string2 + inset - closure->size1);
- pos->string = (__const__ unsigned char *) closure->string2;
- pos->size = closure->size2;
- pos->end = ((__const__ unsigned char *)
- MIN(closure->string2 + closure->size2,
- closure->string2 + stop - closure->size1));
- pos->offset = closure->size1;
- return ((pos->pos < pos->end)
- ? rx_get_burst_ok : rx_get_burst_no_more);
- }
- }
-}
-
-
-#ifdef __STDC__
-static __inline__ enum rx_back_check_return
-re_search_2_back_check(struct rx_string_position *pos,
- int lparen, int rparen, unsigned char *translate,
- void *vclosure, int stop)
-#else
-static __inline__ enum rx_back_check_return
-re_search_2_back_check(pos, lparen, rparen, translate, vclosure, stop)
-struct rx_string_position *pos;
-int lparen;
-int rparen;
-unsigned char *translate;
-void *vclosure;
-int stop;
-#endif
-{
- struct rx_string_position there;
- struct rx_string_position past;
-
- there = *pos;
- there.pos = there.string + lparen - there.offset;
- re_search_2_get_burst(&there, vclosure, stop);
-
- past = *pos;
- past.pos = past.string + rparen - there.offset;
- re_search_2_get_burst(&past, vclosure, stop);
-
- ++pos->pos;
- re_search_2_get_burst(pos, vclosure, stop);
-
- while ((there.pos != past.pos)
- && (pos->pos != pos->end))
- if (TRANSLATE(*there.pos) != TRANSLATE(*pos->pos))
- return rx_back_check_fail;
- else {
- ++there.pos;
- ++pos->pos;
- if (there.pos == there.end)
- re_search_2_get_burst(&there, vclosure, stop);
- if (pos->pos == pos->end)
- re_search_2_get_burst(pos, vclosure, stop);
- }
-
- if (there.pos != past.pos)
- return rx_back_check_fail;
- --pos->pos;
- re_search_2_get_burst(pos, vclosure, stop);
- return rx_back_check_pass;
-}
-
-#ifdef __STDC__
-static __inline__ int
-re_search_2_fetch_char(struct rx_string_position *pos, int offset,
- void *app_closure, int stop)
-#else
-static __inline__ int
-re_search_2_fetch_char(pos, offset, app_closure, stop)
-struct rx_string_position *pos;
-int offset;
-void *app_closure;
-int stop;
-#endif
-{
- struct re_search_2_closure *closure;
-
- closure = (struct re_search_2_closure *) app_closure;
- if (offset == 0) {
- if (pos->pos >= pos->string)
- return *pos->pos;
- else {
- if (
- (pos->string ==
- (__const__ unsigned char *) closure->string2)
- && (closure->string1) && (closure->size1))
- return closure->string1[closure->size1 - 1];
- else
- return 0; /* sure, why not. */
- }
- }
- if (pos->pos == pos->end)
- return *closure->string2;
- else
-#if 0
- return pos->pos[1];
-#else
- return pos->pos[offset]; /* FIXME */
-#endif
-}
-
-#ifdef __STDC__
-RE_S2_QUAL int
-RE_SEARCH_2_FN(struct re_pattern_buffer *rxb,
- __const__ char *string1, int size1,
- __const__ char *string2, int size2,
- int startpos, int range,
- struct re_registers *regs, int stop)
-#else
-RE_S2_QUAL int
-RE_SEARCH_2_FN(rxb,
- string1, size1, string2, size2, startpos, range, regs, stop)
-struct re_pattern_buffer *rxb;
-__const__ char *string1;
-int size1;
-__const__ char *string2;
-int size2;
-int startpos;
-int range;
-struct re_registers *regs;
-int stop;
-#endif
-{
- int answer;
- struct re_search_2_closure closure;
-
- closure.string1 = string1;
- closure.size1 = size1;
- closure.string2 = string2;
- closure.size2 = size2;
- answer = rx_search(rxb, startpos, range, stop, size1 + size2,
- re_search_2_get_burst,
- re_search_2_back_check,
- re_search_2_fetch_char,
- (void *) &closure, regs, 0, 0);
- switch (answer) {
- case rx_search_continuation:
- abort();
- case rx_search_error:
- return -2;
- case rx_search_soft_fail:
- case rx_search_fail:
- return -1;
- default:
- return answer;
- }
-}
-
-/* Export rx_search to callers outside this file. */
-
-#ifdef __STDC__
-int
-re_rx_search(struct re_pattern_buffer *rxb, int startpos, int range,
- int stop, int total_size, rx_get_burst_fn get_burst,
- rx_back_check_fn back_check, rx_fetch_char_fn fetch_char,
- void *app_closure, struct re_registers *regs,
- struct rx_search_state *resume_state,
- struct rx_search_state *save_state)
-#else
-int
-re_rx_search(rxb, startpos, range, stop, total_size,
- get_burst, back_check, fetch_char,
- app_closure, regs, resume_state, save_state)
-struct re_pattern_buffer *rxb;
-int startpos;
-int range;
-int stop;
-int total_size;
-rx_get_burst_fn get_burst;
-rx_back_check_fn back_check;
-rx_fetch_char_fn fetch_char;
-void *app_closure;
-struct re_registers *regs;
-struct rx_search_state *resume_state;
-struct rx_search_state *save_state;
-#endif
-{
- return rx_search(rxb, startpos, range, stop, total_size,
- get_burst, back_check, fetch_char, app_closure,
- regs, resume_state, save_state);
-}
-
-#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
-#ifdef __STDC__
-int
-re_search_2(struct re_pattern_buffer *rxb,
- __const__ char *string1, int size1,
- __const__ char *string2, int size2,
- int startpos, int range, struct re_registers *regs, int stop)
-#else
-int
-re_search_2(rxb, string1, size1, string2, size2, startpos, range, regs,
- stop)
-struct re_pattern_buffer *rxb;
-__const__ char *string1;
-int size1;
-__const__ char *string2;
-int size2;
-int startpos;
-int range;
-struct re_registers *regs;
-int stop;
-#endif
-{
- int ret;
-
- ret = inner_re_search_2(rxb, string1, size1, string2, size2, startpos,
- range, regs, stop);
- alloca(0);
- return ret;
-}
-#endif
-
-
-/* Like re_search_2, above, but only one string is specified, and
- * doesn't let you say where to stop matching.
- */
-
-#ifdef __STDC__
-int
-re_search(struct re_pattern_buffer *rxb, __const__ char *string,
- int size, int startpos, int range, struct re_registers *regs)
-#else
-int re_search(rxb, string, size, startpos, range, regs)
-struct re_pattern_buffer *rxb;
-__const__ char *string;
-int size;
-int startpos;
-int range;
-struct re_registers *regs;
-#endif
-{
- return re_search_2(rxb, 0, 0, string, size, startpos, range, regs,
- size);
-}
-
-#ifdef __STDC__
-int
-re_match_2(struct re_pattern_buffer *rxb,
- __const__ char *string1, int size1,
- __const__ char *string2, int size2,
- int pos, struct re_registers *regs, int stop)
-#else
-int re_match_2(rxb, string1, size1, string2, size2, pos, regs, stop)
-struct re_pattern_buffer *rxb;
-__const__ char *string1;
-int size1;
-__const__ char *string2;
-int size2;
-int pos;
-struct re_registers *regs;
-int stop;
-#endif
-{
- struct re_registers some_regs;
- regoff_t start;
- regoff_t end;
- int srch;
- int save = rxb->regs_allocated;
- struct re_registers *regs_to_pass = regs;
- char *old_fastmap = rxb->fastmap;
-
- if (!regs) {
- some_regs.start = &start;
- some_regs.end = &end;
- some_regs.num_regs = 1;
- regs_to_pass = &some_regs;
- rxb->regs_allocated = REGS_FIXED;
- }
-
- rxb->fastmap = NULL;
- srch = re_search_2(rxb, string1, size1, string2, size2,
- pos, 1, regs_to_pass, stop);
- rxb->fastmap = old_fastmap;
- if (regs_to_pass != regs)
- rxb->regs_allocated = save;
- if (srch < 0)
- return srch;
- return regs_to_pass->end[0] - regs_to_pass->start[0];
-}
-
-/* re_match is like re_match_2 except it takes only a single string. */
-
-#ifdef __STDC__
-int
-re_match(struct re_pattern_buffer *rxb,
- __const__ char *string,
- int size, int pos, struct re_registers *regs)
-#else
-int re_match(rxb, string, size, pos, regs)
-struct re_pattern_buffer *rxb;
-__const__ char *string;
-int size;
-int pos;
-struct re_registers *regs;
-#endif
-{
- return re_match_2(rxb, string, size, 0, 0, pos, regs, size);
-}
-
-
-
-/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
- also be assigned to arbitrarily: each pattern buffer stores its own
- syntax, so it can be changed between regex compilations. */
-reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
-
-
-/* Specify the precise syntax of regexps for compilation. This provides
- for compatibility for various utilities which historically have
- different, incompatible syntaxes.
-
- The argument SYNTAX is a bit mask comprised of the various bits
- defined in regex.h. We return the old syntax. */
-
-#ifdef __STDC__
-reg_syntax_t re_set_syntax(reg_syntax_t syntax)
-#else
-reg_syntax_t re_set_syntax(syntax)
-reg_syntax_t syntax;
-#endif
-{
- reg_syntax_t ret = re_syntax_options;
-
- re_syntax_options = syntax;
- return ret;
-}
-
-
-/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
- ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
- this memory for recording register information. STARTS and ENDS
- must be allocated using the malloc library routine, and must each
- be at least NUM_REGS * sizeof (regoff_t) bytes long.
-
- If NUM_REGS == 0, then subsequent matches should allocate their own
- register data.
-
- Unless this function is called, the first search or match using
- PATTERN_BUFFER will allocate its own register data, without
- freeing the old data. */
-
-#ifdef __STDC__
-void
-re_set_registers(struct re_pattern_buffer *bufp,
- struct re_registers *regs,
- unsigned num_regs, regoff_t * starts, regoff_t * ends)
-#else
-void re_set_registers(bufp, regs, num_regs, starts, ends)
-struct re_pattern_buffer *bufp;
-struct re_registers *regs;
-unsigned num_regs;
-regoff_t *starts;
-regoff_t *ends;
-#endif
-{
- if (num_regs) {
- bufp->regs_allocated = REGS_REALLOCATE;
- regs->num_regs = num_regs;
- regs->start = starts;
- regs->end = ends;
- } else {
- bufp->regs_allocated = REGS_UNALLOCATED;
- regs->num_regs = 0;
- regs->start = regs->end = (regoff_t) 0;
- }
-}
-
-
-
-
-#ifdef __STDC__
-static int cplx_se_sublist_len(struct rx_se_list *list)
-#else
-static int cplx_se_sublist_len(list)
-struct rx_se_list *list;
-#endif
-{
- int x = 0;
-
- while (list) {
- if ((long) list->car >= 0)
- ++x;
- list = list->cdr;
- }
- return x;
-}
-
-
-/* For rx->se_list_cmp */
-
-#ifdef __STDC__
-static int
-posix_se_list_order(struct rx *rx,
- struct rx_se_list *a, struct rx_se_list *b)
-#else
-static int posix_se_list_order(rx, a, b)
-struct rx *rx;
-struct rx_se_list *a;
-struct rx_se_list *b;
-#endif
-{
- int al = cplx_se_sublist_len(a);
- int bl = cplx_se_sublist_len(b);
-
- if (!al && !bl)
- return ((a == b)
- ? 0 : ((a < b) ? -1 : 1));
-
- else if (!al)
- return -1;
-
- else if (!bl)
- return 1;
-
- else {
- rx_side_effect *av = ((rx_side_effect *)
- alloca(sizeof(rx_side_effect) * (al + 1)));
- rx_side_effect *bv = ((rx_side_effect *)
- alloca(sizeof(rx_side_effect) * (bl + 1)));
- struct rx_se_list *ap = a;
- struct rx_se_list *bp = b;
- int ai, bi;
-
- for (ai = al - 1; ai >= 0; --ai) {
- while ((long) ap->car < 0)
- ap = ap->cdr;
- av[ai] = ap->car;
- ap = ap->cdr;
- }
- av[al] = (rx_side_effect) - 2;
- for (bi = bl - 1; bi >= 0; --bi) {
- while ((long) bp->car < 0)
- bp = bp->cdr;
- bv[bi] = bp->car;
- bp = bp->cdr;
- }
- bv[bl] = (rx_side_effect) - 1;
-
- {
- int ret;
- int x = 0;
-
- while (av[x] == bv[x])
- ++x;
- ret = (((unsigned *) (av[x]) < (unsigned *) (bv[x])) ? -1 : 1);
- return ret;
- }
- }
-}
-
-
-
-
-/* re_compile_pattern is the GNU regular expression compiler: it
- compiles PATTERN (of length SIZE) and puts the result in RXB.
- Returns 0 if the pattern was valid, otherwise an error string.
-
- Assumes the `allocated' (and perhaps `buffer') and `translate' fields
- are set in RXB on entry.
-
- We call rx_compile to do the actual compilation. */
-
-#ifdef __STDC__
-__const__ char *re_compile_pattern(__const__ char *pattern,
- int length,
- struct re_pattern_buffer *rxb)
-#else
-__const__ char *re_compile_pattern(pattern, length, rxb)
-__const__ char *pattern;
-int length;
-struct re_pattern_buffer *rxb;
-#endif
-{
- reg_errcode_t ret;
-
- /* GNU code is written to assume at least RE_NREGS registers will be set
- (and at least one extra will be -1). */
- rxb->regs_allocated = REGS_UNALLOCATED;
-
- /* And GNU code determines whether or not to get register information
- by passing null for the REGS argument to re_match, etc., not by
- setting no_sub. */
- rxb->no_sub = 0;
-
- rxb->rx.local_cset_size = 256;
-
- /* Match anchors at newline. */
- rxb->newline_anchor = 1;
-
- rxb->re_nsub = 0;
- rxb->start = 0;
- rxb->se_params = 0;
- rxb->rx.nodec = 0;
- rxb->rx.epsnodec = 0;
- rxb->rx.instruction_table = 0;
- rxb->rx.nfa_states = 0;
- rxb->rx.se_list_cmp = posix_se_list_order;
- rxb->rx.start_set = 0;
-
- ret = rx_compile(pattern, length, re_syntax_options, rxb);
- alloca(0);
- return rx_error_msg[(int) ret];
-}
-
-
-#ifdef __STDC__
-int re_compile_fastmap(struct re_pattern_buffer *rxb)
-#else
-int re_compile_fastmap(rxb)
-struct re_pattern_buffer *rxb;
-#endif
-{
- rx_blow_up_fastmap(rxb);
- return 0;
-}
-
-
-
-
-/* Entry points compatible with 4.2 BSD regex library. We don't define
- them if this is an Emacs or POSIX compilation. */
-
-#if (!defined (emacs) && !defined (_POSIX_SOURCE)) || defined(USE_BSD_REGEX)
-
-/* BSD has one and only one pattern buffer. */
-static struct re_pattern_buffer rx_comp_buf;
-
-#ifdef __STDC__
-char *re_comp(__const__ char *s)
-#else
-char *re_comp(s)
-__const__ char *s;
-#endif
-{
- reg_errcode_t ret;
-
- if (!s || (*s == '\0')) {
- if (!rx_comp_buf.buffer)
- return "No previous regular expression";
- return 0;
- }
-
- if (!rx_comp_buf.fastmap) {
- rx_comp_buf.fastmap = (char *) malloc(1 << CHARBITS);
- if (!rx_comp_buf.fastmap)
- return "Memory exhausted";
- }
-
- /* Since `rx_exec' always passes NULL for the `regs' argument, we
- don't need to initialize the pattern buffer fields which affect it. */
-
- /* Match anchors at newlines. */
- rx_comp_buf.newline_anchor = 1;
-
- rx_comp_buf.re_nsub = 0;
- rx_comp_buf.start = 0;
- rx_comp_buf.se_params = 0;
- rx_comp_buf.rx.nodec = 0;
- rx_comp_buf.rx.epsnodec = 0;
- rx_comp_buf.rx.instruction_table = 0;
- rx_comp_buf.rx.nfa_states = 0;
- rx_comp_buf.rx.start = 0;
- rx_comp_buf.rx.se_list_cmp = posix_se_list_order;
- rx_comp_buf.rx.start_set = 0;
- rx_comp_buf.rx.local_cset_size = 256;
-
- ret = rx_compile(s, strlen(s), re_syntax_options, &rx_comp_buf);
- alloca(0);
-
- /* Yes, we're discarding `__const__' here. */
- return (char *) rx_error_msg[(int) ret];
-}
-
-
-#ifdef __STDC__
-int re_exec(__const__ char *s)
-#else
-int re_exec(s)
-__const__ char *s;
-#endif
-{
- __const__ int len = strlen(s);
-
- return
- 0 <= re_search(&rx_comp_buf, s, len, 0, len,
- (struct re_registers *) 0);
-}
-#endif /* not emacs and not _POSIX_SOURCE */
-
-
-
-/* POSIX.2 functions. Don't define these for Emacs. */
-
-#if !defined(emacs)
-
-/* regcomp takes a regular expression as a string and compiles it.
-
- PREG is a regex_t *. We do not expect any fields to be initialized,
- since POSIX says we shouldn't. Thus, we set
-
- `buffer' to the compiled pattern;
- `used' to the length of the compiled pattern;
- `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
- REG_EXTENDED bit in CFLAGS is set; otherwise, to
- RE_SYNTAX_POSIX_BASIC;
- `newline_anchor' to REG_NEWLINE being set in CFLAGS;
- `fastmap' and `fastmap_accurate' to zero;
- `re_nsub' to the number of subexpressions in PATTERN.
-
- PATTERN is the address of the pattern string.
-
- CFLAGS is a series of bits which affect compilation.
-
- If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
- use POSIX basic syntax.
-
- If REG_NEWLINE is set, then . and [^...] don't match newline.
- Also, regexec will try a match beginning after every newline.
-
- If REG_ICASE is set, then we considers upper- and lowercase
- versions of letters to be equivalent when matching.
-
- If REG_NOSUB is set, then when PREG is passed to regexec, that
- routine will report only success or failure, and nothing about the
- registers.
-
- It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
- the return codes and their meanings.) */
-
-
-#ifdef __STDC__
-int regcomp(regex_t * preg, __const__ char *pattern, int cflags)
-#else
-int regcomp(preg, pattern, cflags)
-regex_t *preg;
-__const__ char *pattern;
-int cflags;
-#endif
-{
- reg_errcode_t ret;
- unsigned syntax
-
- =
- cflags & REG_EXTENDED ? RE_SYNTAX_POSIX_EXTENDED :
- RE_SYNTAX_POSIX_BASIC;
-
- /* regex_compile will allocate the space for the compiled pattern. */
- preg->buffer = 0;
- preg->allocated = 0;
- preg->fastmap = malloc(256);
- if (!preg->fastmap)
- return REG_ESPACE;
- preg->fastmap_accurate = 0;
-
- if (cflags & REG_ICASE) {
- unsigned i;
-
- preg->translate = (unsigned char *) malloc(256);
- if (!preg->translate)
- return (int) REG_ESPACE;
-
- /* Map uppercase characters to corresponding lowercase ones. */
- for (i = 0; i < CHAR_SET_SIZE; i++)
- preg->translate[i] = isupper(i) ? tolower(i) : i;
- } else
- preg->translate = 0;
-
- /* If REG_NEWLINE is set, newlines are treated differently. */
- if (cflags & REG_NEWLINE) { /* REG_NEWLINE implies neither . nor [^...] match newline. */
- syntax &= ~RE_DOT_NEWLINE;
- syntax |= RE_HAT_LISTS_NOT_NEWLINE;
- /* It also changes the matching behavior. */
- preg->newline_anchor = 1;
- } else
- preg->newline_anchor = 0;
-
- preg->no_sub = !!(cflags & REG_NOSUB);
-
- /* POSIX says a null character in the pattern terminates it, so we
- can use strlen here in compiling the pattern. */
- preg->re_nsub = 0;
- preg->start = 0;
- preg->se_params = 0;
- preg->syntax_parens = 0;
- preg->rx.nodec = 0;
- preg->rx.epsnodec = 0;
- preg->rx.instruction_table = 0;
- preg->rx.nfa_states = 0;
- preg->rx.local_cset_size = 256;
- preg->rx.start = 0;
- preg->rx.se_list_cmp = posix_se_list_order;
- preg->rx.start_set = 0;
- ret = rx_compile(pattern, strlen(pattern), syntax, preg);
- alloca(0);
-
- /* POSIX doesn't distinguish between an unmatched open-group and an
- unmatched close-group: both are REG_EPAREN. */
- if (ret == REG_ERPAREN)
- ret = REG_EPAREN;
-
- return (int) ret;
-}
-
-
-/* regexec searches for a given pattern, specified by PREG, in the
- string STRING.
-
- If NMATCH is zero or REG_NOSUB was set in the cflags argument to
- `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
- least NMATCH elements, and we set them to the offsets of the
- corresponding matched substrings.
-
- EFLAGS specifies `execution flags' which affect matching: if
- REG_NOTBOL is set, then ^ does not match at the beginning of the
- string; if REG_NOTEOL is set, then $ does not match at the end.
-
- We return 0 if we find a match and REG_NOMATCH if not. */
-
-#ifdef __STDC__
-int
-regexec(__const__ regex_t * preg, __const__ char *string,
- size_t nmatch, regmatch_t pmatch[], int eflags)
-#else
-int regexec(preg, string, nmatch, pmatch, eflags)
-__const__ regex_t *preg;
-__const__ char *string;
-size_t nmatch;
-regmatch_t pmatch[];
-int eflags;
-#endif
-{
- int ret;
- struct re_registers regs;
- regex_t private_preg;
- int len = strlen(string);
- boolean want_reg_info = !preg->no_sub && nmatch > 0;
-
- private_preg = *preg;
-
- private_preg.not_bol = !!(eflags & REG_NOTBOL);
- private_preg.not_eol = !!(eflags & REG_NOTEOL);
-
- /* The user has told us exactly how many registers to return
- * information about, via `nmatch'. We have to pass that on to the
- * matching routines.
- */
- private_preg.regs_allocated = REGS_FIXED;
-
- if (want_reg_info) {
- regs.num_regs = nmatch;
- regs.start = ((regoff_t *) malloc((nmatch) * sizeof(regoff_t)));
- regs.end = ((regoff_t *) malloc((nmatch) * sizeof(regoff_t)));
- if (regs.start == 0 || regs.end == 0)
- return (int) REG_NOMATCH;
- }
-
- /* Perform the searching operation. */
- ret = re_search(&private_preg, string, len,
- /* start: */ 0,
- /* range: */ len,
- want_reg_info ? &regs : (struct re_registers *) 0);
-
- /* Copy the register information to the POSIX structure. */
- if (want_reg_info) {
- if (ret >= 0) {
- unsigned r;
-
- for (r = 0; r < nmatch; r++) {
- pmatch[r].rm_so = regs.start[r];
- pmatch[r].rm_eo = regs.end[r];
- }
- }
-
- /* If we needed the temporary register info, free the space now. */
- free(regs.start);
- free(regs.end);
- }
-
- /* We want zero return to mean success, unlike `re_search'. */
- return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
-}
-
-
-/* Returns a message corresponding to an error code, ERRCODE, returned
- from either regcomp or regexec. */
-
-#ifdef __STDC__
-size_t
-regerror(int errcode, __const__ regex_t * preg,
- char *errbuf, size_t errbuf_size)
-#else
-size_t regerror(errcode, preg, errbuf, errbuf_size)
-int errcode;
-__const__ regex_t *preg;
-char *errbuf;
-size_t errbuf_size;
-#endif
-{
- __const__ char *msg
- = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode];
- size_t msg_size = strlen(msg) + 1; /* Includes the 0. */
-
- if (errbuf_size != 0) {
- if (msg_size > errbuf_size) {
- strncpy(errbuf, msg, errbuf_size - 1);
- errbuf[errbuf_size - 1] = 0;
- } else
- strcpy(errbuf, msg);
- }
-
- return msg_size;
-}
-
-
-/* Free dynamically allocated space used by PREG. */
-
-#ifdef __STDC__
-void regfree(regex_t * preg)
-#else
-void regfree(preg)
-regex_t *preg;
-#endif
-{
- if (preg->buffer != 0)
- free(preg->buffer);
- preg->buffer = 0;
- preg->allocated = 0;
-
- if (preg->fastmap != 0)
- free(preg->fastmap);
- preg->fastmap = 0;
- preg->fastmap_accurate = 0;
-
- if (preg->translate != 0)
- free(preg->translate);
- preg->translate = 0;
-}
-
-#endif /* not emacs */