summaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
authorEric Andersen <andersen@codepoet.org>2001-11-24 13:20:18 +0000
committerEric Andersen <andersen@codepoet.org>2001-11-24 13:20:18 +0000
commit322e234bd5ae2b05566491a6f1481bee8b1731c9 (patch)
tree17adf256c552bd3b43b28ca0e1b16d59ef52bee7 /libc
parentdfb5fe2dee1b64c57c3df7fc4c0ecb7ad0450730 (diff)
Sync regex with glibc 2.2.4. I need to add an option to select
a minamalist replacement. Coming soon... -Erik
Diffstat (limited to 'libc')
-rw-r--r--libc/misc/regex/regex.c11242
1 files changed, 6942 insertions, 4300 deletions
diff --git a/libc/misc/regex/regex.c b/libc/misc/regex/regex.c
index 350535fa1..81298314b 100644
--- a/libc/misc/regex/regex.c
+++ b/libc/misc/regex/regex.c
@@ -2,35 +2,41 @@
version 0.12.
(Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993-1999, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
/* To exclude some unwanted junk.... */
#undef _LIBC
+#undef emacs
#define _REGEX_RE_COMP
+#include <features.h>
+#include <stdlib.h>
+#include <string.h>
+#define STDC_HEADERS
/* AIX requires this to be the first thing in the file. */
#if defined _AIX && !defined REGEX_MALLOC
-#pragma alloca
+ #pragma alloca
#endif
#undef _GNU_SOURCE
#define _GNU_SOURCE
-#define STDC_HEADERS
#ifdef HAVE_CONFIG_H
# include <config.h>
@@ -41,141 +47,161 @@
# define PARAMS(args) args
# else
# define PARAMS(args) ()
-# endif /* GCC. */
-#endif /* Not PARAMS. */
+# endif /* GCC. */
+#endif /* Not PARAMS. */
-#if defined STDC_HEADERS && !defined emacs
-# include <stddef.h>
-#else
+#ifndef INSIDE_RECURSION
+
+# if defined STDC_HEADERS && !defined emacs
+# include <stddef.h>
+# else
/* We need this for `regex.h', and perhaps for the Emacs include files. */
-# include <sys/types.h>
-#endif
+# include <sys/types.h>
+# endif
-#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
+# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
/* For platform which support the ISO C amendement 1 functionality we
support user defined character classes. */
-#if defined _LIBC || WIDE_CHAR_SUPPORT
+# if defined _LIBC || WIDE_CHAR_SUPPORT
/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
-# include <wchar.h>
-# include <wctype.h>
-#endif
+# include <wchar.h>
+# include <wctype.h>
+# endif
-#ifdef _LIBC
+# ifdef _LIBC
/* We have to keep the namespace clean. */
-# define regfree(preg) __regfree (preg)
-# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
-# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
-# define regerror(errcode, preg, errbuf, errbuf_size) \
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
__regerror(errcode, preg, errbuf, errbuf_size)
-# define re_set_registers(bu, re, nu, st, en) \
+# define re_set_registers(bu, re, nu, st, en) \
__re_set_registers (bu, re, nu, st, en)
-# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
-# define re_match(bufp, string, size, pos, regs) \
+# define re_match(bufp, string, size, pos, regs) \
__re_match (bufp, string, size, pos, regs)
-# define re_search(bufp, string, size, startpos, range, regs) \
+# define re_search(bufp, string, size, startpos, range, regs) \
__re_search (bufp, string, size, startpos, range, regs)
-# define re_compile_pattern(pattern, length, bufp) \
+# define re_compile_pattern(pattern, length, bufp) \
__re_compile_pattern (pattern, length, bufp)
-# define re_set_syntax(syntax) __re_set_syntax (syntax)
-# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
-# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
-#define btowc __btowc
-#endif
+# define btowc __btowc
+
+/* We are also using some library internals. */
+# include <locale/localeinfo.h>
+# include <locale/elem-hash.h>
+# include <langinfo.h>
+# include <locale/coll-lookup.h>
+# endif
/* This is for other GNU distributions with internationalized messages. */
-#if HAVE_LIBINTL_H || defined _LIBC
-# include <libintl.h>
-#else
-# define gettext(msgid) (msgid)
-#endif
+# if HAVE_LIBINTL_H || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+# undef gettext
+# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
+# endif
+# else
+# define gettext(msgid) (msgid)
+# endif
-#ifndef gettext_noop
+# ifndef gettext_noop
/* This define is so xgettext can find the internationalizable
strings. */
-# define gettext_noop(String) String
-#endif
+# define gettext_noop(String) String
+# endif
/* The `emacs' switch turns on certain matching commands
that make sense only in Emacs. */
-#ifdef emacs
+# ifdef emacs
-# include "lisp.h"
-# include "buffer.h"
-# include "syntax.h"
+# include "lisp.h"
+# include "buffer.h"
+# include "syntax.h"
-#else /* not emacs */
+# else /* not emacs */
/* If we are not linking with Emacs proper,
we can't use the relocating allocator
even if config.h says that we can. */
-# undef REL_ALLOC
+# undef REL_ALLOC
-# if defined STDC_HEADERS || defined _LIBC
-# include <stdlib.h>
-# else
-char *malloc();
-char *realloc();
-# endif
+# if defined STDC_HEADERS || defined _LIBC
+# include <stdlib.h>
+# else
+char *malloc ();
+char *realloc ();
+# endif
/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
If nothing else has been done, use the method below. */
-# ifdef INHIBIT_STRING_HEADER
-# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
-# if !defined bzero && !defined bcopy
-# undef INHIBIT_STRING_HEADER
+# ifdef INHIBIT_STRING_HEADER
+# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
+# if !defined bzero && !defined bcopy
+# undef INHIBIT_STRING_HEADER
+# endif
# endif
# endif
-# endif
/* This is the normal way of making sure we have a bcopy and a bzero.
This is used in most programs--a few other programs avoid this
by defining INHIBIT_STRING_HEADER. */
-# ifndef INHIBIT_STRING_HEADER
-# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
-# include <string.h>
-# ifndef bzero
-# ifndef _LIBC
-# define bzero(s, n) (memset (s, '\0', n), (s))
-# else
-# define bzero(s, n) __bzero (s, n)
+# ifndef INHIBIT_STRING_HEADER
+# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
+# include <string.h>
+# ifndef bzero
+# ifndef _LIBC
+# define bzero(s, n) (memset (s, '\0', n), (s))
+# else
+# define bzero(s, n) __bzero (s, n)
+# endif
+# endif
+# else
+# include <strings.h>
+# ifndef memcmp
+# define memcmp(s1, s2, n) bcmp (s1, s2, n)
+# endif
+# ifndef memcpy
+# define memcpy(d, s, n) (bcopy (s, d, n), (d))
# endif
-# endif
-# else
-# include <strings.h>
-# ifndef memcmp
-# define memcmp(s1, s2, n) bcmp (s1, s2, n)
-# endif
-# ifndef memcpy
-# define memcpy(d, s, n) (bcopy (s, d, n), (d))
# endif
# endif
-# endif
/* Define the syntax stuff for \<, \>, etc. */
/* This must be nonzero for the wordchar and notwordchar pattern
commands in re_match_2. */
-# ifndef Sword
-# define Sword 1
-# endif
+# ifndef Sword
+# define Sword 1
+# endif
-# ifdef SWITCH_ENUM_BUG
-# define SWITCH_ENUM_CAST(x) ((int)(x))
-# else
-# define SWITCH_ENUM_CAST(x) (x)
+# ifdef SWITCH_ENUM_BUG
+# define SWITCH_ENUM_CAST(x) ((int)(x))
+# else
+# define SWITCH_ENUM_CAST(x) (x)
+# endif
+
+# endif /* not emacs */
+
+# if defined _LIBC || HAVE_LIMITS_H
+# include <limits.h>
# endif
-#endif /* not emacs */
+# ifndef MB_LEN_MAX
+# define MB_LEN_MAX 1
+# endif
/* Get the interface, including the syntax bits. */
-#include <regex.h>
+# include <regex.h>
/* isalpha etc. are used for the character classes. */
-#include <ctype.h>
+# include <ctype.h>
/* Jim Meyering writes:
@@ -189,94 +215,102 @@ char *realloc();
eliminate the && through constant folding."
Solaris defines some of these symbols so we must undefine them first. */
-#undef ISASCII
-#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
-# define ISASCII(c) 1
-#else
-# define ISASCII(c) isascii(c)
-#endif
+# undef ISASCII
+# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
+# define ISASCII(c) 1
+# else
+# define ISASCII(c) isascii(c)
+# endif
-#ifdef isblank
-# define ISBLANK(c) (ISASCII (c) && isblank (c))
-#else
-# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
-#endif
-#ifdef isgraph
-# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
-#else
-# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
-#endif
+# ifdef isblank
+# define ISBLANK(c) (ISASCII (c) && isblank (c))
+# else
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+# endif
+# ifdef isgraph
+# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# else
+# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# endif
-#undef ISPRINT
-#define ISPRINT(c) (ISASCII (c) && isprint (c))
-#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
-#define ISALNUM(c) (ISASCII (c) && isalnum (c))
-#define ISALPHA(c) (ISASCII (c) && isalpha (c))
-#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
-#define ISLOWER(c) (ISASCII (c) && islower (c))
-#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
-#define ISSPACE(c) (ISASCII (c) && isspace (c))
-#define ISUPPER(c) (ISASCII (c) && isupper (c))
-#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
-
-#ifdef _tolower
-# define TOLOWER(c) _tolower(c)
-#else
-# define TOLOWER(c) tolower(c)
-#endif
+# undef ISPRINT
+# define ISPRINT(c) (ISASCII (c) && isprint (c))
+# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+# define ISALNUM(c) (ISASCII (c) && isalnum (c))
+# define ISALPHA(c) (ISASCII (c) && isalpha (c))
+# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+# define ISLOWER(c) (ISASCII (c) && islower (c))
+# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+# define ISSPACE(c) (ISASCII (c) && isspace (c))
+# define ISUPPER(c) (ISASCII (c) && isupper (c))
+# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+# ifdef _tolower
+# define TOLOWER(c) _tolower(c)
+# else
+# define TOLOWER(c) tolower(c)
+# endif
-#ifndef NULL
-# define NULL (void *)0
-#endif
+# ifndef NULL
+# define NULL (void *)0
+# endif
/* We remove any previous definition of `SIGN_EXTEND_CHAR',
since ours (we hope) works properly with all combinations of
machines, compilers, `char' and `unsigned char' argument types.
(Per Bothner suggested the basic approach.) */
-#undef SIGN_EXTEND_CHAR
-#if __STDC__
-# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
-#else /* not __STDC__ */
+# undef SIGN_EXTEND_CHAR
+# if __STDC__
+# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+# else /* not __STDC__ */
/* As in Harbison and Steele. */
-# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
-#endif
+# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+# endif
-#ifndef emacs
+# ifndef emacs
/* How many characters in the character set. */
-# define CHAR_SET_SIZE 256
+# define CHAR_SET_SIZE 256
-# ifdef SYNTAX_TABLE
+# ifdef SYNTAX_TABLE
extern char *re_syntax_table;
-# else /* not SYNTAX_TABLE */
+# else /* not SYNTAX_TABLE */
static char re_syntax_table[CHAR_SET_SIZE];
-static void init_syntax_once()
+static void init_syntax_once PARAMS ((void));
+
+static void
+init_syntax_once ()
{
- register int c;
- static int done = 0;
+ register int c;
+ static int done = 0;
- if (done)
- return;
- bzero(re_syntax_table, sizeof re_syntax_table);
+ if (done)
+ return;
+ bzero (re_syntax_table, sizeof re_syntax_table);
- for (c = 0; c < CHAR_SET_SIZE; ++c)
- if (ISALNUM(c))
- re_syntax_table[c] = Sword;
+ for (c = 0; c < CHAR_SET_SIZE; ++c)
+ if (ISALNUM (c))
+ re_syntax_table[c] = Sword;
- re_syntax_table['_'] = Sword;
+ re_syntax_table['_'] = Sword;
- done = 1;
+ done = 1;
}
-# endif /* not SYNTAX_TABLE */
+# endif /* not SYNTAX_TABLE */
-# define SYNTAX(c) re_syntax_table[((c) & 0xFF)]
+# define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
-#endif /* emacs */
+# endif /* emacs */
+/* Integer type for pointers. */
+# if !defined _LIBC
+typedef unsigned long int uintptr_t;
+# endif
+
/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
use `alloca' instead of `malloc'. This is because using malloc in
re_search* or re_match* could cause memory leaks when C-g is used in
@@ -287,674 +321,1016 @@ static void init_syntax_once()
not functions -- `alloca'-allocated space disappears at the end of the
function it is called in. */
-#ifdef REGEX_MALLOC
+# ifdef REGEX_MALLOC
-# define REGEX_ALLOCATE malloc
-# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
-# define REGEX_FREE free
+# define REGEX_ALLOCATE malloc
+# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE free
-#else /* not REGEX_MALLOC */
+# else /* not REGEX_MALLOC */
/* Emacs already defines alloca, sometimes. */
-# ifndef alloca
+# ifndef alloca
/* Make alloca work the best possible way. */
-# ifdef __GNUC__
-# define alloca __builtin_alloca
-# else /* not __GNUC__ */
-# if HAVE_ALLOCA_H
-# include <alloca.h>
-# endif /* HAVE_ALLOCA_H */
-# endif /* not __GNUC__ */
+# ifdef __GNUC__
+# define alloca __builtin_alloca
+# else /* not __GNUC__ */
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# endif /* HAVE_ALLOCA_H */
+# endif /* not __GNUC__ */
-# endif /* not alloca */
+# endif /* not alloca */
-# define REGEX_ALLOCATE alloca
+# define REGEX_ALLOCATE alloca
/* Assumes a `char *destination' variable. */
-# define REGEX_REALLOCATE(source, osize, nsize) \
+# define REGEX_REALLOCATE(source, osize, nsize) \
(destination = (char *) alloca (nsize), \
memcpy (destination, source, osize))
/* No need to do anything to free, after alloca. */
-# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
+# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
-#endif /* not REGEX_MALLOC */
+# endif /* not REGEX_MALLOC */
/* Define how to allocate the failure stack. */
-#if defined REL_ALLOC && defined REGEX_MALLOC
+# if defined REL_ALLOC && defined REGEX_MALLOC
-# define REGEX_ALLOCATE_STACK(size) \
+# define REGEX_ALLOCATE_STACK(size) \
r_alloc (&failure_stack_ptr, (size))
-# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
r_re_alloc (&failure_stack_ptr, (nsize))
-# define REGEX_FREE_STACK(ptr) \
+# define REGEX_FREE_STACK(ptr) \
r_alloc_free (&failure_stack_ptr)
-#else /* not using relocating allocator */
+# else /* not using relocating allocator */
-# ifdef REGEX_MALLOC
+# ifdef REGEX_MALLOC
-# define REGEX_ALLOCATE_STACK malloc
-# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
-# define REGEX_FREE_STACK free
+# define REGEX_ALLOCATE_STACK malloc
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE_STACK free
-# else /* not REGEX_MALLOC */
+# else /* not REGEX_MALLOC */
-# define REGEX_ALLOCATE_STACK alloca
+# define REGEX_ALLOCATE_STACK alloca
-# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
REGEX_REALLOCATE (source, osize, nsize)
/* No need to explicitly free anything. */
-# define REGEX_FREE_STACK(arg)
+# define REGEX_FREE_STACK(arg)
-# endif /* not REGEX_MALLOC */
-#endif /* not using relocating allocator */
+# endif /* not REGEX_MALLOC */
+# endif /* not using relocating allocator */
/* True if `size1' is non-NULL and PTR is pointing anywhere inside
`string1' or just past its end. This works if PTR is NULL, which is
a good thing. */
-#define FIRST_STRING_P(ptr) \
+# define FIRST_STRING_P(ptr) \
(size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
/* (Re)Allocate N items of type T using malloc, or fail. */
-#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
-#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
-#define RETALLOC_IF(addr, n, t) \
+# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+# define RETALLOC_IF(addr, n, t) \
if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
-#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
-#define BYTEWIDTH 8 /* In bits. */
+# define BYTEWIDTH 8 /* In bits. */
-#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
-#undef MAX
-#undef MIN
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
+# undef MAX
+# undef MIN
+# define MAX(a, b) ((a) > (b) ? (a) : (b))
+# define MIN(a, b) ((a) < (b) ? (a) : (b))
typedef char boolean;
-
-#define false 0
-#define true 1
-
-static int re_match_2_internal PARAMS((struct re_pattern_buffer * bufp,
- const char *string1, int size1,
- const char *string2, int size2,
- int pos,
- struct re_registers * regs,
-
- int stop));
+# define false 0
+# define true 1
+
+static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp));
+
+static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int pos,
+ struct re_registers *regs,
+ int stop));
+static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int startpos, int range,
+ struct re_registers *regs, int stop));
+static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
+
+#ifdef MBS_SUPPORT
+static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp));
+
+
+static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
+ const char *cstring1, int csize1,
+ const char *cstring2, int csize2,
+ int pos,
+ struct re_registers *regs,
+ int stop,
+ wchar_t *string1, int size1,
+ wchar_t *string2, int size2,
+ int *mbs_offset1, int *mbs_offset2));
+static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int startpos, int range,
+ struct re_registers *regs, int stop));
+static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
+#endif
/* These are the command codes that appear in compiled regular
expressions. Some opcodes are followed by argument bytes. A
command code can specify any interpretation whatsoever for its
arguments. Zero bytes may appear in the compiled regular expression. */
-typedef enum {
- no_op = 0,
-
- /* Succeed right away--no more backtracking. */
- succeed,
-
- /* Followed by one byte giving n, then by n literal bytes. */
- exactn,
-
- /* Matches any (more or less) character. */
- anychar,
-
- /* Matches any one char belonging to specified set. First
- following byte is number of bitmap bytes. Then come bytes
- for a bitmap saying which chars are in. Bits in each byte
- are ordered low-bit-first. A character is in the set if its
- bit is 1. A character too large to have a bit in the map is
- automatically not in the set. */
- charset,
-
- /* Same parameters as charset, but match any character that is
- not one of those specified. */
- charset_not,
-
- /* Start remembering the text that is matched, for storing in a
- register. Followed by one byte with the register number, in
- the range 0 to one less than the pattern buffer's re_nsub
- field. Then followed by one byte with the number of groups
- inner to this one. (This last has to be part of the
- start_memory only because we need it in the on_failure_jump
- of re_match_2.) */
- start_memory,
-
- /* Stop remembering the text that is matched and store it in a
- memory register. Followed by one byte with the register
- number, in the range 0 to one less than `re_nsub' in the
- pattern buffer, and one byte with the number of inner groups,
- just like `start_memory'. (We need the number of inner
- groups here because we don't have any easy way of finding the
- corresponding start_memory when we're at a stop_memory.) */
- stop_memory,
-
- /* Match a duplicate of something remembered. Followed by one
- byte containing the register number. */
- duplicate,
-
- /* Fail unless at beginning of line. */
- begline,
+typedef enum
+{
+ no_op = 0,
- /* Fail unless at end of line. */
- endline,
+ /* Succeed right away--no more backtracking. */
+ succeed,
- /* Succeeds if at beginning of buffer (if emacs) or at beginning
- of string to be matched (if not). */
- begbuf,
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn,
- /* Analogously, for end of buffer/string. */
- endbuf,
+# ifdef MBS_SUPPORT
+ /* Same as exactn, but contains binary data. */
+ exactn_bin,
+# endif
- /* Followed by two byte relative address to which to jump. */
- jump,
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ /* ifdef MBS_SUPPORT, following element is length of character
+ classes, length of collating symbols, length of equivalence
+ classes, length of character ranges, and length of characters.
+ Next, character class element, collating symbols elements,
+ equivalence class elements, range elements, and character
+ elements follow.
+ See regex_compile function. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
/* Same as jump, but marks the end of an alternative. */
- jump_past_alt,
-
- /* Followed by two-byte relative address of place to resume at
- in case of failure. */
- on_failure_jump,
-
- /* Like on_failure_jump, but pushes a placeholder instead of the
- current string position when executed. */
- on_failure_keep_string_jump,
-
- /* Throw away latest failure point and then jump to following
- two-byte relative address. */
- pop_failure_jump,
-
- /* Change to pop_failure_jump if know won't have to backtrack to
- match; otherwise change to jump. This is used to jump
- back to the beginning of a repeat. If what follows this jump
- clearly won't match what the repeat does, such that we can be
- sure that there is no use backtracking out of repetitions
- already matched, then we change it to a pop_failure_jump.
- Followed by two-byte address. */
- maybe_pop_jump,
-
- /* Jump to following two-byte address, and push a dummy failure
- point. This failure point will be thrown away if an attempt
- is made to use it for a failure. A `+' construct makes this
- before the first repeat. Also used as an intermediary kind
- of jump when compiling an alternative. */
- dummy_failure_jump,
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ dummy_failure_jump,
/* Push a dummy failure point and continue. Used at the end of
alternatives. */
- push_dummy_failure,
+ push_dummy_failure,
- /* Followed by two-byte relative address and two-byte number n.
- After matching N times, jump to the address upon failure. */
- succeed_n,
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ succeed_n,
- /* Followed by two-byte relative address, and two-byte number n.
- Jump to the address N times, then fail. */
- jump_n,
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ jump_n,
- /* Set the following two-byte relative address to the
- subsequent two-byte number. The address *includes* the two
- bytes of number. */
- set_number_at,
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ /* ifdef MBS_SUPPORT, the size of address is 1. */
+ set_number_at,
- wordchar, /* Matches any word-constituent character. */
- notwordchar, /* Matches any char that is not a word-constituent. */
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
- wordbeg, /* Succeeds if at word beginning. */
- wordend, /* Succeeds if at word end. */
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
- wordbound, /* Succeeds if at a word boundary. */
- notwordbound /* Succeeds if not at a word boundary. */
-#ifdef emacs
- , before_dot, /* Succeeds if before point. */
- at_dot, /* Succeeds if at point. */
- after_dot, /* Succeeds if after point. */
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+
+# ifdef emacs
+ ,before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
/* Matches any character whose syntax is specified. Followed by
- a byte which contains a syntax code, e.g., Sword. */
- syntaxspec,
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
/* Matches any character whose syntax is not that specified. */
- notsyntaxspec
-#endif /* emacs */
+ notsyntaxspec
+# endif /* emacs */
} re_opcode_t;
+#endif /* not INSIDE_RECURSION */
+
+#ifdef BYTE
+# define CHAR_T char
+# define UCHAR_T unsigned char
+# define COMPILED_BUFFER_VAR bufp->buffer
+# define OFFSET_ADDRESS_SIZE 2
+# define PREFIX(name) byte_##name
+# define ARG_PREFIX(name) name
+# define PUT_CHAR(c) putchar (c)
+#else
+# ifdef WCHAR
+# define CHAR_T wchar_t
+# define UCHAR_T wchar_t
+# define COMPILED_BUFFER_VAR wc_buffer
+# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
+# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
+# define PREFIX(name) wcs_##name
+# define ARG_PREFIX(name) c##name
+/* Should we use wide stream?? */
+# define PUT_CHAR(c) printf ("%C", c);
+# define TRUE 1
+# define FALSE 0
+# else
+# ifdef MBS_SUPPORT
+# define WCHAR
+# define INSIDE_RECURSION
+# include "regex.c"
+# undef INSIDE_RECURSION
+# endif
+# define BYTE
+# define INSIDE_RECURSION
+# include "regex.c"
+# undef INSIDE_RECURSION
+# endif
+#endif
+
+#ifdef INSIDE_RECURSION
/* Common operations on the compiled pattern. */
/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
-#define STORE_NUMBER(destination, number) \
+# ifdef WCHAR
+# define STORE_NUMBER(destination, number) \
+ do { \
+ *(destination) = (UCHAR_T)(number); \
+ } while (0)
+# else /* BYTE */
+# define STORE_NUMBER(destination, number) \
do { \
(destination)[0] = (number) & 0377; \
(destination)[1] = (number) >> 8; \
} while (0)
+# endif /* WCHAR */
/* Same as STORE_NUMBER, except increment DESTINATION to
the byte after where the number is stored. Therefore, DESTINATION
must be an lvalue. */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
-#define STORE_NUMBER_AND_INCR(destination, number) \
+# define STORE_NUMBER_AND_INCR(destination, number) \
do { \
STORE_NUMBER (destination, number); \
- (destination) += 2; \
+ (destination) += OFFSET_ADDRESS_SIZE; \
} while (0)
/* Put into DESTINATION a number stored in two contiguous bytes starting
at SOURCE. */
+/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
-#define EXTRACT_NUMBER(destination, source) \
+# ifdef WCHAR
+# define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source); \
+ } while (0)
+# else /* BYTE */
+# define EXTRACT_NUMBER(destination, source) \
do { \
(destination) = *(source) & 0377; \
(destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
} while (0)
+# endif
-#ifdef DEBUG
-static void extract_number _RE_ARGS((int *dest, unsigned char *source));
-static void extract_number(dest, source)
-int *dest;
-unsigned char *source;
+# ifdef DEBUG
+static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source));
+static void
+PREFIX(extract_number) (dest, source)
+ int *dest;
+ UCHAR_T *source;
{
- int temp = SIGN_EXTEND_CHAR(*(source + 1));
-
- *dest = *source & 0377;
- *dest += temp << 8;
+# ifdef WCHAR
+ *dest = *source;
+# else /* BYTE */
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
+# endif
}
-# ifndef EXTRACT_MACROS /* To debug the macros. */
-# undef EXTRACT_NUMBER
-# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
-# endif /* not EXTRACT_MACROS */
+# ifndef EXTRACT_MACROS /* To debug the macros. */
+# undef EXTRACT_NUMBER
+# define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
+# endif /* not EXTRACT_MACROS */
-#endif /* DEBUG */
+# endif /* DEBUG */
/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
SOURCE must be an lvalue. */
-#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+# define EXTRACT_NUMBER_AND_INCR(destination, source) \
do { \