Sync regex with glibc 2.2.4. I need to add an option to select

a minamalist replacement. Coming soon... -Erik
author: Eric Andersen <andersen@codepoet.org> 2001-11-24 13:20:18 +0000
committer: Eric Andersen <andersen@codepoet.org> 2001-11-24 13:20:18 +0000
commit: 322e234bd5ae2b05566491a6f1481bee8b1731c9 (patch)
tree: 17adf256c552bd3b43b28ca0e1b16d59ef52bee7
parent: dfb5fe2dee1b64c57c3df7fc4c0ecb7ad0450730 (diff)
1 files changed, 6942 insertions, 4300 deletions
diff --git a/libc/misc/regex/regex.c b/libc/misc/regex/regex.c
index 350535fa1..81298314b 100644
--- a/libc/misc/regex/regex.c
+++ b/libc/misc/regex/regex.c
@@ -2,35 +2,41 @@
    version 0.12.
    (Implements POSIX draft P1003.2/D11.2, except for some of the
    internationalization features.)
-   Copyright (C) 1993-1999, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
 
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
 
 /* To exclude some unwanted junk.... */
 #undef _LIBC
+#undef emacs
 #define _REGEX_RE_COMP
+#include <features.h>
+#include <stdlib.h>
+#include <string.h>
+#define STDC_HEADERS
 
 /* AIX requires this to be the first thing in the file. */
 #if defined _AIX && !defined REGEX_MALLOC
-#pragma alloca
+  #pragma alloca
 #endif
 
 #undef	_GNU_SOURCE
 #define _GNU_SOURCE
-#define STDC_HEADERS
 
 #ifdef HAVE_CONFIG_H
 # include <config.h>
@@ -41,141 +47,161 @@
 #  define PARAMS(args) args
 # else
 #  define PARAMS(args) ()
-# endif							/* GCC.  */
-#endif							/* Not PARAMS.  */
+# endif  /* GCC.  */
+#endif  /* Not PARAMS.  */
 
-#if defined STDC_HEADERS && !defined emacs
-# include <stddef.h>
-#else
+#ifndef INSIDE_RECURSION
+
+# if defined STDC_HEADERS && !defined emacs
+#  include <stddef.h>
+# else
 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
-# include <sys/types.h>
-#endif
+#  include <sys/types.h>
+# endif
 
-#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
+# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
 
 /* For platform which support the ISO C amendement 1 functionality we
    support user defined character classes.  */
-#if defined _LIBC || WIDE_CHAR_SUPPORT
+# if defined _LIBC || WIDE_CHAR_SUPPORT
 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
-# include <wchar.h>
-# include <wctype.h>
-#endif
+#  include <wchar.h>
+#  include <wctype.h>
+# endif
 
-#ifdef _LIBC
+# ifdef _LIBC
 /* We have to keep the namespace clean.  */
-# define regfree(preg) __regfree (preg)
-# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
-# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
-# define regerror(errcode, preg, errbuf, errbuf_size) \
+#  define regfree(preg) __regfree (preg)
+#  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+#  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+#  define regerror(errcode, preg, errbuf, errbuf_size) \
 	__regerror(errcode, preg, errbuf, errbuf_size)
-# define re_set_registers(bu, re, nu, st, en) \
+#  define re_set_registers(bu, re, nu, st, en) \
 	__re_set_registers (bu, re, nu, st, en)
-# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+#  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
-# define re_match(bufp, string, size, pos, regs) \
+#  define re_match(bufp, string, size, pos, regs) \
 	__re_match (bufp, string, size, pos, regs)
-# define re_search(bufp, string, size, startpos, range, regs) \
+#  define re_search(bufp, string, size, startpos, range, regs) \
 	__re_search (bufp, string, size, startpos, range, regs)
-# define re_compile_pattern(pattern, length, bufp) \
+#  define re_compile_pattern(pattern, length, bufp) \
 	__re_compile_pattern (pattern, length, bufp)
-# define re_set_syntax(syntax) __re_set_syntax (syntax)
-# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+#  define re_set_syntax(syntax) __re_set_syntax (syntax)
+#  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
-# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+#  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
 
-#define btowc __btowc
-#endif
+#  define btowc __btowc
+
+/* We are also using some library internals.  */
+#  include <locale/localeinfo.h>
+#  include <locale/elem-hash.h>
+#  include <langinfo.h>
+#  include <locale/coll-lookup.h>
+# endif
 
 /* This is for other GNU distributions with internationalized messages.  */
-#if HAVE_LIBINTL_H || defined _LIBC
-# include <libintl.h>
-#else
-# define gettext(msgid) (msgid)
-#endif
+# if HAVE_LIBINTL_H || defined _LIBC
+#  include <libintl.h>
+#  ifdef _LIBC
+#   undef gettext
+#   define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
+#  endif
+# else
+#  define gettext(msgid) (msgid)
+# endif
 
-#ifndef gettext_noop
+# ifndef gettext_noop
 /* This define is so xgettext can find the internationalizable
    strings.  */
-# define gettext_noop(String) String
-#endif
+#  define gettext_noop(String) String
+# endif
 
 /* The `emacs' switch turns on certain matching commands
    that make sense only in Emacs. */
-#ifdef emacs
+# ifdef emacs
 
-# include "lisp.h"
-# include "buffer.h"
-# include "syntax.h"
+#  include "lisp.h"
+#  include "buffer.h"
+#  include "syntax.h"
 
-#else							/* not emacs */
+# else  /* not emacs */
 
 /* If we are not linking with Emacs proper,
    we can't use the relocating allocator
    even if config.h says that we can.  */
-# undef REL_ALLOC
+#  undef REL_ALLOC
 
-# if defined STDC_HEADERS || defined _LIBC
-#  include <stdlib.h>
-# else
-char *malloc();
-char *realloc();
-# endif
+#  if defined STDC_HEADERS || defined _LIBC
+#   include <stdlib.h>
+#  else
+char *malloc ();
+char *realloc ();
+#  endif
 
 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
    If nothing else has been done, use the method below.  */
-# ifdef INHIBIT_STRING_HEADER
-#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
-#   if !defined bzero && !defined bcopy
-#    undef INHIBIT_STRING_HEADER
+#  ifdef INHIBIT_STRING_HEADER
+#   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
+#    if !defined bzero && !defined bcopy
+#     undef INHIBIT_STRING_HEADER
+#    endif
 #   endif
 #  endif
-# endif
 
 /* This is the normal way of making sure we have a bcopy and a bzero.
    This is used in most programs--a few other programs avoid this
    by defining INHIBIT_STRING_HEADER.  */
-# ifndef INHIBIT_STRING_HEADER
-#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
-#   include <string.h>
-#   ifndef bzero
-#    ifndef _LIBC
-#     define bzero(s, n)	(memset (s, '\0', n), (s))
-#    else
-#     define bzero(s, n)	__bzero (s, n)
+#  ifndef INHIBIT_STRING_HEADER
+#   if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
+#    include <string.h>
+#    ifndef bzero
+#     ifndef _LIBC
+#      define bzero(s, n)	(memset (s, '\0', n), (s))
+#     else
+#      define bzero(s, n)	__bzero (s, n)
+#     endif
+#    endif
+#   else
+#    include <strings.h>
+#    ifndef memcmp
+#     define memcmp(s1, s2, n)	bcmp (s1, s2, n)
+#    endif
+#    ifndef memcpy
+#     define memcpy(d, s, n)	(bcopy (s, d, n), (d))
 #    endif
-#   endif
-#  else
-#   include <strings.h>
-#   ifndef memcmp
-#    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
-#   endif
-#   ifndef memcpy
-#    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
 #   endif
 #  endif
-# endif
 
 /* Define the syntax stuff for \<, \>, etc.  */
 
 /* This must be nonzero for the wordchar and notwordchar pattern
    commands in re_match_2.  */
-# ifndef Sword
-#  define Sword 1
-# endif
+#  ifndef Sword
+#   define Sword 1
+#  endif
 
-# ifdef SWITCH_ENUM_BUG
-#  define SWITCH_ENUM_CAST(x) ((int)(x))
-# else
-#  define SWITCH_ENUM_CAST(x) (x)
+#  ifdef SWITCH_ENUM_BUG
+#   define SWITCH_ENUM_CAST(x) ((int)(x))
+#  else
+#   define SWITCH_ENUM_CAST(x) (x)
+#  endif
+
+# endif /* not emacs */
+
+# if defined _LIBC || HAVE_LIMITS_H
+#  include <limits.h>
 # endif
 
-#endif							/* not emacs */
+# ifndef MB_LEN_MAX
+#  define MB_LEN_MAX 1
+# endif
 
 /* Get the interface, including the syntax bits.  */
-#include <regex.h>
+# include <regex.h>
 
 /* isalpha etc. are used for the character classes.  */
-#include <ctype.h>
+# include <ctype.h>
 
 /* Jim Meyering writes:
 
@@ -189,94 +215,102 @@ char *realloc();
    eliminate the && through constant folding."
    Solaris defines some of these symbols so we must undefine them first.  */
 
-#undef ISASCII
-#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
-# define ISASCII(c) 1
-#else
-# define ISASCII(c) isascii(c)
-#endif
+# undef ISASCII
+# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
+#  define ISASCII(c) 1
+# else
+#  define ISASCII(c) isascii(c)
+# endif
 
-#ifdef isblank
-# define ISBLANK(c) (ISASCII (c) && isblank (c))
-#else
-# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
-#endif
-#ifdef isgraph
-# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
-#else
-# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
-#endif
+# ifdef isblank
+#  define ISBLANK(c) (ISASCII (c) && isblank (c))
+# else
+#  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+# endif
+# ifdef isgraph
+#  define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# else
+#  define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# endif
 
-#undef ISPRINT
-#define ISPRINT(c) (ISASCII (c) && isprint (c))
-#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
-#define ISALNUM(c) (ISASCII (c) && isalnum (c))
-#define ISALPHA(c) (ISASCII (c) && isalpha (c))
-#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
-#define ISLOWER(c) (ISASCII (c) && islower (c))
-#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
-#define ISSPACE(c) (ISASCII (c) && isspace (c))
-#define ISUPPER(c) (ISASCII (c) && isupper (c))
-#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
-
-#ifdef _tolower
-# define TOLOWER(c) _tolower(c)
-#else
-# define TOLOWER(c) tolower(c)
-#endif
+# undef ISPRINT
+# define ISPRINT(c) (ISASCII (c) && isprint (c))
+# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+# define ISALNUM(c) (ISASCII (c) && isalnum (c))
+# define ISALPHA(c) (ISASCII (c) && isalpha (c))
+# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+# define ISLOWER(c) (ISASCII (c) && islower (c))
+# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+# define ISSPACE(c) (ISASCII (c) && isspace (c))
+# define ISUPPER(c) (ISASCII (c) && isupper (c))
+# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+# ifdef _tolower
+#  define TOLOWER(c) _tolower(c)
+# else
+#  define TOLOWER(c) tolower(c)
+# endif
 
-#ifndef NULL
-# define NULL (void *)0
-#endif
+# ifndef NULL
+#  define NULL (void *)0
+# endif
 
 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
    since ours (we hope) works properly with all combinations of
    machines, compilers, `char' and `unsigned char' argument types.
    (Per Bothner suggested the basic approach.)  */
-#undef SIGN_EXTEND_CHAR
-#if __STDC__
-# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
-#else							/* not __STDC__ */
+# undef SIGN_EXTEND_CHAR
+# if __STDC__
+#  define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+# else  /* not __STDC__ */
 /* As in Harbison and Steele.  */
-# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
-#endif
+#  define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+# endif
 
-#ifndef emacs
+# ifndef emacs
 /* How many characters in the character set.  */
-# define CHAR_SET_SIZE 256
+#  define CHAR_SET_SIZE 256
 
-# ifdef SYNTAX_TABLE
+#  ifdef SYNTAX_TABLE
 
 extern char *re_syntax_table;
 
-# else							/* not SYNTAX_TABLE */
+#  else /* not SYNTAX_TABLE */
 
 static char re_syntax_table[CHAR_SET_SIZE];
 
-static void init_syntax_once()
+static void init_syntax_once PARAMS ((void));
+
+static void
+init_syntax_once ()
 {
-	register int c;
-	static int done = 0;
+   register int c;
+   static int done = 0;
 
-	if (done)
-		return;
-	bzero(re_syntax_table, sizeof re_syntax_table);
+   if (done)
+     return;
+   bzero (re_syntax_table, sizeof re_syntax_table);
 
-	for (c = 0; c < CHAR_SET_SIZE; ++c)
-		if (ISALNUM(c))
-			re_syntax_table[c] = Sword;
+   for (c = 0; c < CHAR_SET_SIZE; ++c)
+     if (ISALNUM (c))
+	re_syntax_table[c] = Sword;
 
-	re_syntax_table['_'] = Sword;
+   re_syntax_table['_'] = Sword;
 
-	done = 1;
+   done = 1;
 }
 
-# endif							/* not SYNTAX_TABLE */
+#  endif /* not SYNTAX_TABLE */
 
-# define SYNTAX(c) re_syntax_table[((c) & 0xFF)]
+#  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
 
-#endif							/* emacs */
+# endif /* emacs */
 
+/* Integer type for pointers.  */
+# if !defined _LIBC
+typedef unsigned long int uintptr_t;
+# endif
+
 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
    use `alloca' instead of `malloc'.  This is because using malloc in
    re_search* or re_match* could cause memory leaks when C-g is used in
@@ -287,674 +321,1016 @@ static void init_syntax_once()
    not functions -- `alloca'-allocated space disappears at the end of the
    function it is called in.  */
 
-#ifdef REGEX_MALLOC
+# ifdef REGEX_MALLOC
 
-# define REGEX_ALLOCATE malloc
-# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
-# define REGEX_FREE free
+#  define REGEX_ALLOCATE malloc
+#  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+#  define REGEX_FREE free
 
-#else							/* not REGEX_MALLOC  */
+# else /* not REGEX_MALLOC  */
 
 /* Emacs already defines alloca, sometimes.  */
-# ifndef alloca
+#  ifndef alloca
 
 /* Make alloca work the best possible way.  */
-#  ifdef __GNUC__
-#   define alloca __builtin_alloca
-#  else							/* not __GNUC__ */
-#   if HAVE_ALLOCA_H
-#    include <alloca.h>
-#   endif						/* HAVE_ALLOCA_H */
-#  endif						/* not __GNUC__ */
+#   ifdef __GNUC__
+#    define alloca __builtin_alloca
+#   else /* not __GNUC__ */
+#    if HAVE_ALLOCA_H
+#     include <alloca.h>
+#    endif /* HAVE_ALLOCA_H */
+#   endif /* not __GNUC__ */
 
-# endif							/* not alloca */
+#  endif /* not alloca */
 
-# define REGEX_ALLOCATE alloca
+#  define REGEX_ALLOCATE alloca
 
 /* Assumes a `char *destination' variable.  */
-# define REGEX_REALLOCATE(source, osize, nsize)				\
+#  define REGEX_REALLOCATE(source, osize, nsize)			\
   (destination = (char *) alloca (nsize),				\
    memcpy (destination, source, osize))
 
 /* No need to do anything to free, after alloca.  */
-# define REGEX_FREE(arg) ((void)0)	/* Do nothing!  But inhibit gcc warning.  */
+#  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
 
-#endif							/* not REGEX_MALLOC */
+# endif /* not REGEX_MALLOC */
 
 /* Define how to allocate the failure stack.  */
 
-#if defined REL_ALLOC && defined REGEX_MALLOC
+# if defined REL_ALLOC && defined REGEX_MALLOC
 
-# define REGEX_ALLOCATE_STACK(size)				\
+#  define REGEX_ALLOCATE_STACK(size)				\
   r_alloc (&failure_stack_ptr, (size))
-# define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
+#  define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
   r_re_alloc (&failure_stack_ptr, (nsize))
-# define REGEX_FREE_STACK(ptr)					\
+#  define REGEX_FREE_STACK(ptr)					\
   r_alloc_free (&failure_stack_ptr)
 
-#else							/* not using relocating allocator */
+# else /* not using relocating allocator */
 
-# ifdef REGEX_MALLOC
+#  ifdef REGEX_MALLOC
 
-#  define REGEX_ALLOCATE_STACK malloc
-#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
-#  define REGEX_FREE_STACK free
+#   define REGEX_ALLOCATE_STACK malloc
+#   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+#   define REGEX_FREE_STACK free
 
-# else							/* not REGEX_MALLOC */
+#  else /* not REGEX_MALLOC */
 
-#  define REGEX_ALLOCATE_STACK alloca
+#   define REGEX_ALLOCATE_STACK alloca
 
-#  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
+#   define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
    REGEX_REALLOCATE (source, osize, nsize)
 /* No need to explicitly free anything.  */
-#  define REGEX_FREE_STACK(arg)
+#   define REGEX_FREE_STACK(arg)
 
-# endif							/* not REGEX_MALLOC */
-#endif							/* not using relocating allocator */
+#  endif /* not REGEX_MALLOC */
+# endif /* not using relocating allocator */
 
 
 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
    `string1' or just past its end.  This works if PTR is NULL, which is
    a good thing.  */
-#define FIRST_STRING_P(ptr) 					\
+# define FIRST_STRING_P(ptr) 					\
   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
 
 /* (Re)Allocate N items of type T using malloc, or fail.  */
-#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
-#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
-#define RETALLOC_IF(addr, n, t) \
+# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+# define RETALLOC_IF(addr, n, t) \
   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
-#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
 
-#define BYTEWIDTH 8				/* In bits.  */
+# define BYTEWIDTH 8 /* In bits.  */
 
-#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
 
-#undef MAX
-#undef MIN
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
+# undef MAX
+# undef MIN
+# define MAX(a, b) ((a) > (b) ? (a) : (b))
+# define MIN(a, b) ((a) < (b) ? (a) : (b))
 
 typedef char boolean;
-
-#define false 0
-#define true 1
-
-static int re_match_2_internal PARAMS((struct re_pattern_buffer * bufp,
-									   const char *string1, int size1,
-									   const char *string2, int size2,
-									   int pos,
-									   struct re_registers * regs,
-
-									   int stop));
+# define false 0
+# define true 1
+
+static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size,
+                                                   reg_syntax_t syntax,
+                                                   struct re_pattern_buffer *bufp));
+
+static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
+					     const char *string1, int size1,
+					     const char *string2, int size2,
+					     int pos,
+					     struct re_registers *regs,
+					     int stop));
+static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
+				     const char *string1, int size1,
+				     const char *string2, int size2,
+				     int startpos, int range,
+				     struct re_registers *regs, int stop));
+static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
+
+#ifdef MBS_SUPPORT
+static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size,
+                                                   reg_syntax_t syntax,
+                                                   struct re_pattern_buffer *bufp));
+
+
+static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
+					    const char *cstring1, int csize1,
+					    const char *cstring2, int csize2,
+					    int pos,
+					    struct re_registers *regs,
+					    int stop,
+					    wchar_t *string1, int size1,
+					    wchar_t *string2, int size2,
+					    int *mbs_offset1, int *mbs_offset2));
+static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
+				    const char *string1, int size1,
+				    const char *string2, int size2,
+				    int startpos, int range,
+				    struct re_registers *regs, int stop));
+static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
+#endif
 
 /* These are the command codes that appear in compiled regular
    expressions.  Some opcodes are followed by argument bytes.  A
    command code can specify any interpretation whatsoever for its
    arguments.  Zero bytes may appear in the compiled regular expression.  */
 
-typedef enum {
-	no_op = 0,
-
-	/* Succeed right away--no more backtracking.  */
-	succeed,
-
-	/* Followed by one byte giving n, then by n literal bytes.  */
-	exactn,
-
-	/* Matches any (more or less) character.  */
-	anychar,
-
-	/* Matches any one char belonging to specified set.  First
-	   following byte is number of bitmap bytes.  Then come bytes
-	   for a bitmap saying which chars are in.  Bits in each byte
-	   are ordered low-bit-first.  A character is in the set if its
-	   bit is 1.  A character too large to have a bit in the map is
-	   automatically not in the set.  */
-	charset,
-
-	/* Same parameters as charset, but match any character that is
-	   not one of those specified.  */
-	charset_not,
-
-	/* Start remembering the text that is matched, for storing in a
-	   register.  Followed by one byte with the register number, in
-	   the range 0 to one less than the pattern buffer's re_nsub
-	   field.  Then followed by one byte with the number of groups
-	   inner to this one.  (This last has to be part of the
-	   start_memory only because we need it in the on_failure_jump
-	   of re_match_2.)  */
-	start_memory,
-
-	/* Stop remembering the text that is matched and store it in a
-	   memory register.  Followed by one byte with the register
-	   number, in the range 0 to one less than `re_nsub' in the
-	   pattern buffer, and one byte with the number of inner groups,
-	   just like `start_memory'.  (We need the number of inner
-	   groups here because we don't have any easy way of finding the
-	   corresponding start_memory when we're at a stop_memory.)  */
-	stop_memory,
-
-	/* Match a duplicate of something remembered. Followed by one
-	   byte containing the register number.  */
-	duplicate,
-
-	/* Fail unless at beginning of line.  */
-	begline,
+typedef enum
+{
+  no_op = 0,
 
-	/* Fail unless at end of line.  */
-	endline,
+  /* Succeed right away--no more backtracking.  */
+  succeed,
 
-	/* Succeeds if at beginning of buffer (if emacs) or at beginning
-	   of string to be matched (if not).  */
-	begbuf,
+        /* Followed by one byte giving n, then by n literal bytes.  */
+  exactn,
 
-	/* Analogously, for end of buffer/string.  */
-	endbuf,
+# ifdef MBS_SUPPORT
+	/* Same as exactn, but contains binary data.  */
+  exactn_bin,
+# endif
 
-	/* Followed by two byte relative address to which to jump.  */
-	jump,
+        /* Matches any (more or less) character.  */
+  anychar,
+
+        /* Matches any one char belonging to specified set.  First
+           following byte is number of bitmap bytes.  Then come bytes
+           for a bitmap saying which chars are in.  Bits in each byte
+           are ordered low-bit-first.  A character is in the set if its
+           bit is 1.  A character too large to have a bit in the map is
+           automatically not in the set.  */
+        /* ifdef MBS_SUPPORT, following element is length of character
+	   classes, length of collating symbols, length of equivalence
+	   classes, length of character ranges, and length of characters.
+	   Next, character class element, collating symbols elements,
+	   equivalence class elements, range elements, and character
+	   elements follow.
+	   See regex_compile function.  */
+  charset,
+
+        /* Same parameters as charset, but match any character that is
+           not one of those specified.  */
+  charset_not,
+
+        /* Start remembering the text that is matched, for storing in a
+           register.  Followed by one byte with the register number, in
+           the range 0 to one less than the pattern buffer's re_nsub
+           field.  Then followed by one byte with the number of groups
+           inner to this one.  (This last has to be part of the
+           start_memory only because we need it in the on_failure_jump
+           of re_match_2.)  */
+  start_memory,
+
+        /* Stop remembering the text that is matched and store it in a
+           memory register.  Followed by one byte with the register
+           number, in the range 0 to one less than `re_nsub' in the
+           pattern buffer, and one byte with the number of inner groups,
+           just like `start_memory'.  (We need the number of inner
+           groups here because we don't have any easy way of finding the
+           corresponding start_memory when we're at a stop_memory.)  */
+  stop_memory,
+
+        /* Match a duplicate of something remembered. Followed by one
+           byte containing the register number.  */
+  duplicate,
+
+        /* Fail unless at beginning of line.  */
+  begline,
+
+        /* Fail unless at end of line.  */
+  endline,
+
+        /* Succeeds if at beginning of buffer (if emacs) or at beginning
+           of string to be matched (if not).  */
+  begbuf,
+
+        /* Analogously, for end of buffer/string.  */
+  endbuf,
+
+        /* Followed by two byte relative address to which to jump.  */
+  jump,
 
 	/* Same as jump, but marks the end of an alternative.  */
-	jump_past_alt,
-
-	/* Followed by two-byte relative address of place to resume at
-	   in case of failure.  */
-	on_failure_jump,
-
-	/* Like on_failure_jump, but pushes a placeholder instead of the
-	   current string position when executed.  */
-	on_failure_keep_string_jump,
-
-	/* Throw away latest failure point and then jump to following
-	   two-byte relative address.  */
-	pop_failure_jump,
-
-	/* Change to pop_failure_jump if know won't have to backtrack to
-	   match; otherwise change to jump.  This is used to jump
-	   back to the beginning of a repeat.  If what follows this jump
-	   clearly won't match what the repeat does, such that we can be
-	   sure that there is no use backtracking out of repetitions
-	   already matched, then we change it to a pop_failure_jump.
-	   Followed by two-byte address.  */
-	maybe_pop_jump,
-
-	/* Jump to following two-byte address, and push a dummy failure
-	   point. This failure point will be thrown away if an attempt
-	   is made to use it for a failure.  A `+' construct makes this
-	   before the first repeat.  Also used as an intermediary kind
-	   of jump when compiling an alternative.  */
-	dummy_failure_jump,
+  jump_past_alt,
+
+        /* Followed by two-byte relative address of place to resume at
+           in case of failure.  */
+        /* ifdef MBS_SUPPORT, the size of address is 1.  */
+  on_failure_jump,
+
+        /* Like on_failure_jump, but pushes a placeholder instead of the
+           current string position when executed.  */
+  on_failure_keep_string_jump,
+
+        /* Throw away latest failure point and then jump to following
+           two-byte relative address.  */
+        /* ifdef MBS_SUPPORT, the size of address is 1.  */
+  pop_failure_jump,
+
+        /* Change to pop_failure_jump if know won't have to backtrack to
+           match; otherwise change to jump.  This is used to jump
+           back to the beginning of a repeat.  If what follows this jump
+           clearly won't match what the repeat does, such that we can be
+           sure that there is no use backtracking out of repetitions
+           already matched, then we change it to a pop_failure_jump.
+           Followed by two-byte address.  */
+        /* ifdef MBS_SUPPORT, the size of address is 1.  */
+  maybe_pop_jump,
+
+        /* Jump to following two-byte address, and push a dummy failure
+           point. This failure point will be thrown away if an attempt
+           is made to use it for a failure.  A `+' construct makes this
+           before the first repeat.  Also used as an intermediary kind
+           of jump when compiling an alternative.  */
+        /* ifdef MBS_SUPPORT, the size of address is 1.  */
+  dummy_failure_jump,
 
 	/* Push a dummy failure point and continue.  Used at the end of
 	   alternatives.  */
-	push_dummy_failure,
+  push_dummy_failure,
 
-	/* Followed by two-byte relative address and two-byte number n.
-	   After matching N times, jump to the address upon failure.  */
-	succeed_n,
+        /* Followed by two-byte relative address and two-byte number n.
+           After matching N times, jump to the address upon failure.  */
+        /* ifdef MBS_SUPPORT, the size of address is 1.  */
+  succeed_n,
 
-	/* Followed by two-byte relative address, and two-byte number n.
-	   Jump to the address N times, then fail.  */
-	jump_n,
+        /* Followed by two-byte relative address, and two-byte number n.
+           Jump to the address N times, then fail.  */
+        /* ifdef MBS_SUPPORT, the size of address is 1.  */
+  jump_n,
 
-	/* Set the following two-byte relative address to the
-	   subsequent two-byte number.  The address *includes* the two
-	   bytes of number.  */
-	set_number_at,
+        /* Set the following two-byte relative address to the
+           subsequent two-byte number.  The address *includes* the two
+           bytes of number.  */
+        /* ifdef MBS_SUPPORT, the size of address is 1.  */
+  set_number_at,
 
-	wordchar,					/* Matches any word-constituent character.  */
-	notwordchar,				/* Matches any char that is not a word-constituent.  */
+  wordchar,	/* Matches any word-constituent character.  */
+  notwordchar,	/* Matches any char that is not a word-constituent.  */
 
-	wordbeg,					/* Succeeds if at word beginning.  */
-	wordend,					/* Succeeds if at word end.  */
+  wordbeg,	/* Succeeds if at word beginning.  */
+  wordend,	/* Succeeds if at word end.  */
 
-	wordbound,					/* Succeeds if at a word boundary.  */
-	notwordbound				/* Succeeds if not at a word boundary.  */
-#ifdef emacs
-		, before_dot,			/* Succeeds if before point.  */
-	at_dot,						/* Succeeds if at point.  */
-	after_dot,					/* Succeeds if after point.  */
+  wordbound,	/* Succeeds if at a word boundary.  */
+  notwordbound	/* Succeeds if not at a word boundary.  */
+
+# ifdef emacs
+  ,before_dot,	/* Succeeds if before point.  */
+  at_dot,	/* Succeeds if at point.  */
+  after_dot,	/* Succeeds if after point.  */
 
 	/* Matches any character whose syntax is specified.  Followed by
-	   a byte which contains a syntax code, e.g., Sword.  */
-	syntaxspec,
+           a byte which contains a syntax code, e.g., Sword.  */
+  syntaxspec,
 
 	/* Matches any character whose syntax is not that specified.  */
-	notsyntaxspec
-#endif							/* emacs */
+  notsyntaxspec
+# endif /* emacs */
 } re_opcode_t;
+#endif /* not INSIDE_RECURSION */
 
+
+#ifdef BYTE
+# define CHAR_T char
+# define UCHAR_T unsigned char
+# define COMPILED_BUFFER_VAR bufp->buffer
+# define OFFSET_ADDRESS_SIZE 2
+# define PREFIX(name) byte_##name
+# define ARG_PREFIX(name) name
+# define PUT_CHAR(c) putchar (c)
+#else
+# ifdef WCHAR
+#  define CHAR_T wchar_t
+#  define UCHAR_T wchar_t
+#  define COMPILED_BUFFER_VAR wc_buffer
+#  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
+#  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
+#  define PREFIX(name) wcs_##name
+#  define ARG_PREFIX(name) c##name
+/* Should we use wide stream??  */
+#  define PUT_CHAR(c) printf ("%C", c);
+#  define TRUE 1
+#  define FALSE 0
+# else
+#  ifdef MBS_SUPPORT
+#   define WCHAR
+#   define INSIDE_RECURSION
+#   include "regex.c"
+#   undef INSIDE_RECURSION
+#  endif
+#  define BYTE
+#  define INSIDE_RECURSION
+#  include "regex.c"
+#  undef INSIDE_RECURSION
+# endif
+#endif
+
+#ifdef INSIDE_RECURSION
 /* Common operations on the compiled pattern.  */
 
 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
 
-#define STORE_NUMBER(destination, number)				\
+# ifdef WCHAR
+#  define STORE_NUMBER(destination, number)				\
+  do {									\
+    *(destination) = (UCHAR_T)(number);				\
+  } while (0)
+# else /* BYTE */
+#  define STORE_NUMBER(destination, number)				\
   do {									\
     (destination)[0] = (number) & 0377;					\
     (destination)[1] = (number) >> 8;					\
   } while (0)
+# endif /* WCHAR */
 
 /* Same as STORE_NUMBER, except increment DESTINATION to
    the byte after where the number is stored.  Therefore, DESTINATION
    must be an lvalue.  */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
 
-#define STORE_NUMBER_AND_INCR(destination, number)			\
+# define STORE_NUMBER_AND_INCR(destination, number)			\
   do {									\
     STORE_NUMBER (destination, number);					\
-    (destination) += 2;							\
+    (destination) += OFFSET_ADDRESS_SIZE;				\
   } while (0)
 
 /* Put into DESTINATION a number stored in two contiguous bytes starting
    at SOURCE.  */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
 
-#define EXTRACT_NUMBER(destination, source)				\
+# ifdef WCHAR
+#  define EXTRACT_NUMBER(destination, source)				\
+  do {									\
+    (destination) = *(source);						\
+  } while (0)
+# else /* BYTE */
+#  define EXTRACT_NUMBER(destination, source)				\
   do {									\
     (destination) = *(source) & 0377;					\
     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
   } while (0)
+# endif
 
-#ifdef DEBUG
-static void extract_number _RE_ARGS((int *dest, unsigned char *source));
-static void extract_number(dest, source)
-int *dest;
-unsigned char *source;
+# ifdef DEBUG
+static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source));
+static void
+PREFIX(extract_number) (dest, source)
+    int *dest;
+    UCHAR_T *source;
 {
-	int temp = SIGN_EXTEND_CHAR(*(source + 1));
-
-	*dest = *source & 0377;
-	*dest += temp << 8;
+#  ifdef WCHAR
+  *dest = *source;
+#  else /* BYTE */
+  int temp = SIGN_EXTEND_CHAR (*(source + 1));
+  *dest = *source & 0377;
+  *dest += temp << 8;
+#  endif
 }
 
-# ifndef EXTRACT_MACROS			/* To debug the macros.  */
-#  undef EXTRACT_NUMBER
-#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
-# endif							/* not EXTRACT_MACROS */
+#  ifndef EXTRACT_MACROS /* To debug the macros.  */
+#   undef EXTRACT_NUMBER
+#   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
+#  endif /* not EXTRACT_MACROS */
 
-#endif							/* DEBUG */
+# endif /* DEBUG */
 
 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
    SOURCE must be an lvalue.  */
 
-#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
+# define EXTRACT_NUMBER_AND_INCR(destination, source)			\
   do {									\
     EXTRACT_NUMBER (destination, source);				\
-    (source) += 2;
author	Eric Andersen <andersen@codepoet.org>	2001-11-24 13:20:18 +0000
committer	Eric Andersen <andersen@codepoet.org>	2001-11-24 13:20:18 +0000
commit	322e234bd5ae2b05566491a6f1481bee8b1731c9 (patch)
tree	17adf256c552bd3b43b28ca0e1b16d59ef52bee7
parent	dfb5fe2dee1b64c57c3df7fc4c0ecb7ad0450730 (diff)