From f3a9360625bca4fb0b5fe9730d6e25431ac4704b Mon Sep 17 00:00:00 2001 From: Eric Andersen Date: Sun, 9 Jul 2000 06:39:19 +0000 Subject: Add in a bunch of junk. Busybox now compiles (except for mkfs.minix and fsck.minix). Of course, it doesn't link yet due to missing functions, but hey... At least it is now easy to see what isn't working. :-) -Erik --- libc/misc/regex/Makefile | 17 + libc/misc/regex/rx.c | 7522 ++++++++++++++++++++++++++++++++++++++++++++++ libc/stdlib/Makefile | 2 +- libc/stdlib/realpath.c | 168 ++ 4 files changed, 7708 insertions(+), 1 deletion(-) create mode 100644 libc/misc/regex/Makefile create mode 100644 libc/misc/regex/rx.c create mode 100644 libc/stdlib/realpath.c (limited to 'libc') diff --git a/libc/misc/regex/Makefile b/libc/misc/regex/Makefile new file mode 100644 index 000000000..c6c8d8e52 --- /dev/null +++ b/libc/misc/regex/Makefile @@ -0,0 +1,17 @@ +TOPDIR=../ +include $(TOPDIR)Rules.make + +LIBC=../libc.a + +OBJ=rx.o + +all: $(LIBC) + +$(LIBC): $(LIBC)($(OBJ)) + +$(LIBC)(rx.o): rx.c + $(CC) $(CFLAGS) -DL_$* $< -c -o $*.o + $(AR) $(ARFLAGS) $@ $*.o + +clean: + rm -f libc.a *.o core mon.out timer.t.h dMakefile dtr try timer diff --git a/libc/misc/regex/rx.c b/libc/misc/regex/rx.c new file mode 100644 index 000000000..8e85782f2 --- /dev/null +++ b/libc/misc/regex/rx.c @@ -0,0 +1,7522 @@ +/* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc. + +This file is part of the librx library. + +Librx is free software; you can redistribute it and/or modify it under +the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +Librx is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU Library General Public +License along with this software; see the file COPYING.LIB. If not, +write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA +02139, USA. */ + +/* NOTE!!! AIX is so losing it requires this to be the first thing in the + * file. + * Do not put ANYTHING before it! + */ +#if !defined (__GNUC__) && defined (_AIX) + #pragma alloca +#endif + +/* To make linux happy? */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + + +#include +#include +#include +#include +#ifndef isgraph +#define isgraph(c) (isprint (c) && !isspace (c)) +#endif +#ifndef isblank +#define isblank(c) ((c) == ' ' || (c) == '\t') +#endif + +#include + +#undef MAX +#undef MIN +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +typedef char boolean; +#define false 0 +#define true 1 + +#ifndef __GCC__ +#undef __inline__ +#define __inline__ +#endif + +/* Emacs already defines alloca, sometimes. */ +#ifndef alloca + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else /* not __GNUC__ */ +#if HAVE_ALLOCA_H +#include +#else /* not __GNUC__ or HAVE_ALLOCA_H */ +#ifndef _AIX /* Already did AIX, up at the top. */ +char *alloca (); +#endif /* not _AIX */ +#endif /* not HAVE_ALLOCA_H */ +#endif /* not __GNUC__ */ + +#endif /* not alloca */ + +/* Memory management and stuff for emacs. */ + +#define CHARBITS 8 +#define remalloc(M, S) (M ? realloc (M, S) : malloc (S)) + + +/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we + * use `alloca' instead of `malloc' for the backtracking stack. + * + * Emacs will die miserably if we don't do this. + */ + +#ifdef REGEX_MALLOC +#define REGEX_ALLOCATE malloc +#else /* not REGEX_MALLOC */ +#define REGEX_ALLOCATE alloca +#endif /* not REGEX_MALLOC */ + + +#ifdef RX_WANT_RX_DEFS +#define RX_DECL extern +#define RX_DEF_QUAL +#else +#define RX_WANT_RX_DEFS +#define RX_DECL static +#define RX_DEF_QUAL static +#endif + +#include +#undef RX_DECL +#define RX_DECL RX_DEF_QUAL + + +/* + * Prototypes. + */ +#ifdef __STDC__ +RX_DECL struct rx_hash_item + *rx_hash_find (struct rx_hash *, unsigned long, + void *, struct rx_hash_rules *); +RX_DECL struct rx_hash_item + *rx_hash_find (struct rx_hash *, unsigned long, + void *, struct rx_hash_rules *); +RX_DECL struct rx_hash_item + *rx_hash_store (struct rx_hash *, unsigned long, + void *, struct rx_hash_rules *); +RX_DECL void rx_hash_free (struct rx_hash_item *, + struct rx_hash_rules *); +RX_DECL void rx_free_hash_table (struct rx_hash *, rx_hash_freefn, + struct rx_hash_rules *); +RX_DECL rx_Bitset + rx_cset (struct rx *); +RX_DECL rx_Bitset + rx_copy_cset (struct rx *, rx_Bitset); +RX_DECL void rx_free_cset (struct rx *, rx_Bitset); +static struct rx_hash_item + *compiler_hash_item_alloc (struct rx_hash_rules *, void *); +static struct rx_hash + *compiler_hash_alloc (struct rx_hash_rules *); +static void compiler_free_hash (struct rx_hash *, + struct rx_hash_rules *); +static void compiler_free_hash_item (struct rx_hash_item *, + struct rx_hash_rules *); +RX_DECL struct rexp_node + *rexp_node (struct rx *, enum rexp_node_type); +RX_DECL struct rexp_node + *rx_mk_r_cset (struct rx *, rx_Bitset); +RX_DECL struct rexp_node + *rx_mk_r_concat (struct rx *, struct rexp_node *, + struct rexp_node *); +RX_DECL struct rexp_node + *rx_mk_r_alternate (struct rx *, struct rexp_node *, + struct rexp_node *); +RX_DECL struct rexp_node + *rx_mk_r_alternate (struct rx *, struct rexp_node *, + struct rexp_node *); +RX_DECL struct rexp_node + *rx_mk_r_opt (struct rx *, struct rexp_node *); +RX_DECL struct rexp_node + *rx_mk_r_star (struct rx *, struct rexp_node *); +RX_DECL struct rexp_node + *rx_mk_r_2phase_star (struct rx *, struct rexp_node *, + struct rexp_node *); +RX_DECL struct rexp_node + *rx_mk_r_side_effect (struct rx *, rx_side_effect); +RX_DECL struct rexp_node + *rx_mk_r_data (struct rx *, void *); +RX_DECL void rx_free_rexp (struct rx *, struct rexp_node *); +RX_DECL struct rexp_node + *rx_copy_rexp (struct rx *, struct rexp_node *); +RX_DECL struct rx_nfa_state + *rx_nfa_state (struct rx *); +RX_DECL void rx_free_nfa_state (struct rx_nfa_state *); +RX_DECL struct rx_nfa_state + *rx_id_to_nfa_state (struct rx *, int); +RX_DECL struct rx_nfa_edge + *rx_nfa_edge (struct rx *, enum rx_nfa_etype, + struct rx_nfa_state *, + struct rx_nfa_state *); +RX_DECL void rx_free_nfa_edge (struct rx_nfa_edge *); +static struct rx_possible_future + *rx_possible_future (struct rx *, struct rx_se_list *); +static void rx_free_possible_future (struct rx_possible_future *); +RX_DECL void rx_free_nfa (struct rx *); +RX_DECL int rx_build_nfa (struct rx *, struct rexp_node *, + struct rx_nfa_state **, + struct rx_nfa_state **); +RX_DECL void rx_name_nfa_states (struct rx *); +static int se_list_cmp (void *, void *); +static int se_list_equal (void *, void *); +static struct rx_se_list + *hash_cons_se_prog (struct rx *, struct rx_hash *, + void *, struct rx_se_list *); +static struct rx_se_list + *hash_se_prog (struct rx *, struct rx_hash *, + struct rx_se_list *); +static int nfa_set_cmp (void *, void *); +static int nfa_set_equal (void *, void *); +static struct rx_nfa_state_set + *nfa_set_cons (struct rx *, struct rx_hash *, + struct rx_nfa_state *, + struct rx_nfa_state_set *); +static struct rx_nfa_state_set + *nfa_set_enjoin (struct rx *, struct rx_hash *, + struct rx_nfa_state *, + struct rx_nfa_state_set *); +#endif + +#ifndef emacs + +#ifdef SYNTAX_TABLE +extern char *re_syntax_table; +#else /* not SYNTAX_TABLE */ + +#ifndef RX_WANT_RX_DEFS +RX_DECL char re_syntax_table[CHAR_SET_SIZE]; +#endif + +#ifdef __STDC__ +static void +init_syntax_once (void) +#else +static void +init_syntax_once () +#endif +{ + register int c; + static int done = 0; + + if (done) + return; + + bzero (re_syntax_table, sizeof re_syntax_table); + + for (c = 'a'; c <= 'z'; c++) + re_syntax_table[c] = Sword; + + for (c = 'A'; c <= 'Z'; c++) + re_syntax_table[c] = Sword; + + for (c = '0'; c <= '9'; c++) + re_syntax_table[c] = Sword; + + re_syntax_table['_'] = Sword; + + done = 1; +} +#endif /* not SYNTAX_TABLE */ +#endif /* not emacs */ + +/* Compile with `-DRX_DEBUG' and use the following flags. + * + * Debugging flags: + * rx_debug - print information as a regexp is compiled + * rx_debug_trace - print information as a regexp is executed + */ + +#ifdef RX_DEBUG + +int rx_debug_compile = 0; +int rx_debug_trace = 0; +static struct re_pattern_buffer * dbug_rxb = 0; + + +/* + * More Prototypes + */ +#ifdef __STDC__ +typedef void (*side_effect_printer) (struct rx *, void *, FILE *); +static void print_cset (struct rx *, rx_Bitset, FILE *); +static void print_rexp (struct rx *, struct rexp_node *, int, + side_effect_printer, FILE *); +static void print_nfa (struct rx *, struct rx_nfa_state *, + side_effect_printer, FILE *); +static void re_seprint (struct rx *, void *, FILE *); +void print_compiled_pattern (struct re_pattern_buffer *); +void print_fastmap (char *); +#else +typedef void (*side_effect_printer) (); +static void print_cset (); +#endif + +#ifdef __STDC__ +static void +print_rexp (struct rx *rx, + struct rexp_node *node, int depth, + side_effect_printer seprint, FILE * fp) +#else +static void +print_rexp (rx, node, depth, seprint, fp) + struct rx *rx; + struct rexp_node *node; + int depth; + side_effect_printer seprint; + FILE * fp; +#endif +{ + if (!node) + return; + else + { + switch (node->type) + { + case r_cset: + { + fprintf (fp, "%*s", depth, ""); + print_cset (rx, node->params.cset, fp); + fputc ('\n', fp); + break; + } + + case r_opt: + case r_star: + fprintf (fp, "%*s%s\n", depth, "", + node->type == r_opt ? "opt" : "star"); + print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp); + break; + + case r_2phase_star: + fprintf (fp, "%*s2phase star\n", depth, ""); + print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp); + print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp); + break; + + + case r_alternate: + case r_concat: + fprintf (fp, "%*s%s\n", depth, "", + node->type == r_alternate ? "alt" : "concat"); + print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp); + print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp); + break; + case r_side_effect: + fprintf (fp, "%*sSide effect: ", depth, ""); + seprint (rx, node->params.side_effect, fp); + fputc ('\n', fp); + } + } +} + +#ifdef __STDC__ +static void +print_nfa (struct rx * rx, + struct rx_nfa_state * n, + side_effect_printer seprint, FILE * fp) +#else +static void +print_nfa (rx, n, seprint, fp) + struct rx * rx; + struct rx_nfa_state * n; + side_effect_printer seprint; + FILE * fp; +#endif +{ + while (n) + { + struct rx_nfa_edge *e = n->edges; + struct rx_possible_future *ec = n->futures; + fprintf (fp, "node %d %s\n", n->id, + n->is_final ? "final" : (n->is_start ? "start" : "")); + while (e) + { + fprintf (fp, " edge to %d, ", e->dest->id); + switch (e->type) + { + case ne_epsilon: + fprintf (fp, "epsilon\n"); + break; + case ne_side_effect: + fprintf (fp, "side effect "); + seprint (rx, e->params.side_effect, fp); + fputc ('\n', fp); + break; + case ne_cset: + fprintf (fp, "cset "); + print_cset (rx, e->params.cset, fp); + fputc ('\n', fp); + break; + } + e = e->next; + } + + while (ec) + { + int x; + struct rx_nfa_state_set * s; + struct rx_se_list * l; + fprintf (fp, " eclosure to {"); + for (s = ec->destset; s; s = s->cdr) + fprintf (fp, "%d ", s->car->id); + fprintf (fp, "} ("); + for (l = ec->effects; l; l = l->cdr) + { + seprint (rx, l->car, fp); + fputc (' ', fp); + } + fprintf (fp, ")\n"); + ec = ec->next; + } + n = n->next; + } +} + +static char * efnames [] = +{ + "bogon", + "re_se_try", + "re_se_pushback", + "re_se_push0", + "re_se_pushpos", + "re_se_chkpos", + "re_se_poppos", + "re_se_at_dot", + "re_se_syntax", + "re_se_not_syntax", + "re_se_begbuf", + "re_se_hat", + "re_se_wordbeg", + "re_se_wordbound", + "re_se_notwordbound", + "re_se_wordend", + "re_se_endbuf", + "re_se_dollar", + "re_se_fail", +}; + +static char * efnames2[] = +{ + "re_se_win", + "re_se_lparen", + "re_se_rparen", + "re_se_backref", + "re_se_iter", + "re_se_end_iter", + "re_se_tv" +}; + +static char * inx_names[] = +{ + "rx_backtrack_point", + "rx_do_side_effects", + "rx_cache_miss", + "rx_next_char", + "rx_backtrack", + "rx_error_inx", + "rx_num_instructions" +}; + + +#ifdef __STDC__ +static void +re_seprint (struct rx * rx, void * effect, FILE * fp) +#else +static void +re_seprint (rx, effect, fp) + struct rx * rx; + void * effect; + FILE * fp; +#endif +{ + if ((int)effect < 0) + fputs (efnames[-(int)effect], fp); + else if (dbug_rxb) + { + struct re_se_params * p = &dbug_rxb->se_params[(int)effect]; + fprintf (fp, "%s(%d,%d)", efnames2[p->se], p->op1, p->op2); + } + else + fprintf (fp, "[complex op # %d]", (int)effect); +} + +/* These are so the regex.c regression tests will compile. */ +void +print_compiled_pattern (rxb) + struct re_pattern_buffer * rxb; +{ +} + +void +print_fastmap (fm) + char * fm; +{ +} + +#endif /* RX_DEBUG */ + + + +/* This page: Bitsets. Completely unintersting. */ + +RX_DECL int rx_bitset_is_equal (int, rx_Bitset, rx_Bitset); +RX_DECL int rx_bitset_is_subset (int, rx_Bitset, rx_Bitset); +RX_DECL int rx_bitset_empty (int, rx_Bitset); +RX_DECL void rx_bitset_null (int, rx_Bitset); +RX_DECL void rx_bitset_complement (int, rx_Bitset); +RX_DECL void rx_bitset_complement (int, rx_Bitset); +RX_DECL void rx_bitset_assign (int, rx_Bitset, rx_Bitset); +RX_DECL void rx_bitset_union (int, rx_Bitset, rx_Bitset); +RX_DECL void rx_bitset_intersection (int, rx_Bitset, rx_Bitset); +RX_DECL void rx_bitset_difference (int, rx_Bitset, rx_Bitset); +RX_DECL void rx_bitset_revdifference (int, rx_Bitset, rx_Bitset); +RX_DECL void rx_bitset_xor (int, rx_Bitset, rx_Bitset); +RX_DECL unsigned long + rx_bitset_hash (int, rx_Bitset); + +#ifdef __STDC__ +RX_DECL int +rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b) +#else +RX_DECL int +rx_bitset_is_equal (size, a, b) + int size; + rx_Bitset a; + rx_Bitset b; +#endif +{ + int x; + RX_subset s = b[0]; + b[0] = ~a[0]; + + for (x = rx_bitset_numb_subsets(size) - 1; a[x] == b[x]; --x) + ; + + b[0] = s; + return !x && s == a[0]; +} + +#ifdef __STDC__ +RX_DECL int +rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b) +#else +RX_DECL int +rx_bitset_is_subset (size, a, b) + int size; + rx_Bitset a; + rx_Bitset b; +#endif +{ + int x = rx_bitset_numb_subsets(size) - 1; + while (x-- && (a[x] & b[x]) == a[x]); + return x == -1; +} + + +#ifdef __STDC__ +RX_DECL int +rx_bitset_empty (int size, rx_Bitset set) +#else +RX_DECL int +rx_bitset_empty (size, set) + int size; + rx_Bitset set; +#endif +{ + int x; + RX_subset s = set[0]; + set[0] = 1; + for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x) + ; + set[0] = s; + return !s; +} + +#ifdef __STDC__ +RX_DECL void +rx_bitset_null (int size, rx_Bitset b) +#else +RX_DECL void +rx_bitset_null (size, b) + int size; + rx_Bitset b; +#endif +{ + bzero (b, rx_sizeof_bitset(size)); +} + + +#ifdef __STDC__ +RX_DECL void +rx_bitset_universe (int size, rx_Bitset b) +#else +RX_DECL void +rx_bitset_universe (size, b) + int size; + rx_Bitset b; +#endif +{ + int x = rx_bitset_numb_subsets (size); + while (x--) + *b++ = ~(RX_subset)0; +} + + +#ifdef __STDC__ +RX_DECL void +rx_bitset_complement (int size, rx_Bitset b) +#else +RX_DECL void +rx_bitset_complement (size, b) + int size; + rx_Bitset b; +#endif +{ + int x = rx_bitset_numb_subsets (size); + while (x--) + { + *b = ~*b; + ++b; + } +} + + +#ifdef __STDC__ +RX_DECL void +rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b) +#else +RX_DECL void +rx_bitset_assign (size, a, b) + int size; + rx_Bitset a; + rx_Bitset b; +#endif +{ + int x; + for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x) + a[x] = b[x]; +} + +#ifdef __STDC__ +RX_DECL void +rx_bitset_union (int size, rx_Bitset a, rx_Bitset b) +#else +RX_DECL void +rx_bitset_union (size, a, b) + int size; + rx_Bitset a; + rx_Bitset b; +#endif +{ + int x; + for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x) + a[x] |= b[x]; +} + + +#ifdef __STDC__ +RX_DECL void +rx_bitset_intersection (int size, + rx_Bitset a, rx_Bitset b) +#else +RX_DECL void +rx_bitset_intersection (size, a, b) + int size; + rx_Bitset a; + rx_Bitset b; +#endif +{ + int x; + for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x) + a[x] &= b[x]; +} + + +#ifdef __STDC__ +RX_DECL void +rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b) +#else +RX_DECL void +rx_bitset_difference (size, a, b) + int size; + rx_Bitset a; + rx_Bitset b; +#endif +{ + int x; + for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x) + a[x] &= ~ b[x]; +} + + +#ifdef __STDC__ +RX_DECL void +rx_bitset_revdifference (int size, + rx_Bitset a, rx_Bitset b) +#else +RX_DECL void +rx_bitset_revdifference (size, a, b) + int size; + rx_Bitset a; + rx_Bitset b; +#endif +{ + int x; + for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x) + a[x] = ~a[x] & b[x]; +} + +#ifdef __STDC__ +RX_DECL void +rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b) +#else +RX_DECL void +rx_bitset_xor (size, a, b) + int size; + rx_Bitset a; + rx_Bitset b; +#endif +{ + int x; + for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x) + a[x] ^= b[x]; +} + + +#ifdef __STDC__ +RX_DECL unsigned long +rx_bitset_hash (int size, rx_Bitset b) +#else +RX_DECL unsigned long +rx_bitset_hash (size, b) + int size; + rx_Bitset b; +#endif +{ + int x; + unsigned long hash = (unsigned long)rx_bitset_hash; + + for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x) + hash ^= rx_bitset_subset_val(b, x); + + return hash; +} + +RX_DECL RX_subset rx_subset_singletons [RX_subset_bits] = +{ + 0x1, + 0x2, + 0x4, + 0x8, + 0x10, + 0x20, + 0x40, + 0x80, + 0x100, + 0x200, + 0x400, + 0x800, + 0x1000, + 0x2000, + 0x4000, + 0x8000, + 0x10000, + 0x20000, + 0x40000, + 0x80000, + 0x100000, + 0x200000, + 0x400000, + 0x800000, + 0x1000000, + 0x2000000, + 0x4000000, + 0x8000000, + 0x10000000, + 0x20000000, + 0x40000000, + 0x80000000 +}; + +#ifdef RX_DEBUG + +#ifdef __STDC__ +static void +print_cset (struct rx *rx, rx_Bitset cset, FILE * fp) +#else +static void +print_cset (rx, cset, fp) + struct rx *rx; + rx_Bitset cset; + FILE * fp; +#endif +{ + int x; + fputc ('[', fp); + for (x = 0; x < rx->local_cset_size; ++x) + if (RX_bitset_member (cset, x)) + { + if (isprint(x)) + fputc (x, fp); + else + fprintf (fp, "\\0%o ", x); + } + fputc (']', fp); +} + +#endif /* RX_DEBUG */ + + + +static unsigned long rx_hash_masks[4] = +{ + 0x12488421, + 0x96699669, + 0xbe7dd7eb, + 0xffffffff +}; + + +/* Hash tables */ +#ifdef __STDC__ +RX_DECL struct rx_hash_item * +rx_hash_find (struct rx_hash * table, + unsigned long hash, + void * value, + struct rx_hash_rules * rules) +#else +RX_DECL struct rx_hash_item * +rx_hash_find (table, hash, value, rules) + struct rx_hash * table; + unsigned long hash; + void * value; + struct rx_hash_rules * rules; +#endif +{ + rx_hash_eq eq = rules->eq; + int maskc = 0; + long mask = rx_hash_masks [0]; + int bucket = (hash & mask) % 13; + + while (table->children [bucket]) + { + table = table->children [bucket]; + ++maskc; + mask = rx_hash_masks[maskc]; + bucket = (hash & mask) % 13; + } + + { + struct rx_hash_item * it = table->buckets[bucket]; + while (it) + if (eq (it->data, value)) + return it; + else + it = it->next_same_hash; + } + + return 0; +} + +#ifdef __STDC__ +RX_DECL struct rx_hash_item * +rx_hash_store (struct rx_hash * table, + unsigned long hash, + void * value, + struct rx_hash_rules * rules) +#else +RX_DECL struct rx_hash_item * +rx_hash_store (table, hash, value, rules) + struct rx_hash * table; + unsigned long hash; + void * value; + struct rx_hash_rules * rules; +#endif +{ + rx_hash_eq eq = rules->eq; + int maskc = 0; + long mask = rx_hash_masks[0]; + int bucket = (hash & mask) % 13; + int depth = 0; + + while (table->children [bucket]) + { + table = table->children [bucket]; + ++maskc; + mask = rx_hash_masks[maskc]; + bucket = (hash & mask) % 13; + ++depth; + } + + { + struct rx_hash_item * it = table->buckets[bucket]; + while (it) + if (eq (it->data, value)) + return it; + else + it = it->next_same_hash; + } + + { + if ( (depth < 3) + && (table->bucket_size [bucket] >= 4)) + { + struct rx_hash * newtab = ((struct rx_hash *) + rules->hash_alloc (rules)); + if (!newtab) + goto add_to_bucket; + bzero (newtab, sizeof (*newtab)); + newtab->parent = table; + { + struct rx_hash_item * them = table->buckets[bucket]; + unsigned long newmask = rx_hash_masks[maskc + 1]; + while (them) + { + struct rx_hash_item * save = them->next_same_hash; + int new_buck = (them->hash & newmask) % 13; + them->next_same_hash = newtab->buckets[new_buck]; + newtab->buckets[new_buck] = them; + them->table = newtab; + them = save; + ++newtab->bucket_size[new_buck]; + ++newtab->refs; + } + table->refs = (table->refs - table->bucket_size[bucket] + 1); + table->bucket_size[bucket] = 0; + table->buckets[bucket] = 0; + table->children[bucket] = newtab; + table = newtab; + bucket = (hash & newmask) % 13; + } + } + } + add_to_bucket: + { + struct rx_hash_item * it = ((struct rx_hash_item *) + rules->hash_item_alloc (rules, value)); + if (!it) + return 0; + it->hash = hash; + it->table = table; + /* DATA and BINDING are to be set in hash_item_alloc */ + it->next_same_hash = table->buckets [bucket]; + table->buckets[bucket] = it; + ++table->bucket_size [bucket]; + ++table->refs; + return it; + } +} + + +#ifdef __STDC__ +RX_DECL void +rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules) +#else +RX_DECL void +rx_hash_free (it, rules) + struct rx_hash_item * it; + struct rx_hash_rules * rules; +#endif +{ + if (it) + { + struct rx_hash * table = it->table; + unsigned long hash = it->hash; + int depth = (table->parent + ? (table->parent->parent + ? (table->parent->parent->parent + ? 3 + : 2) + : 1) + : 0); + int bucket = (hash & rx_hash_masks [depth]) % 13; + struct rx_hash_item ** pos = &table->buckets [bucket]; + + while (*pos != it) + pos = &(*pos)->next_same_hash; + *pos = it->next_same_hash; + rules->free_hash_item (it, rules); + --table->bucket_size[bucket]; + --table->refs; + while (!table->refs && depth) + { + struct rx_hash * save = table; + table = table->parent; + --depth; + bucket = (hash & rx_hash_masks [depth]) % 13; + --table->refs; + table->children[bucket] = 0; + rules->free_hash (save, rules); + } + } +} + +#ifdef __STDC__ +RX_DECL void +rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn, + struct rx_hash_rules * rules) +#else +RX_DECL void +rx_free_hash_table (tab, freefn, rules) + struct rx_hash * tab; + rx_hash_freefn freefn; + struct rx_hash_rules * rules; +#endif +{ + int x; + + for (x = 0; x < 13; ++x) + if (tab->children[x]) + { + rx_free_hash_table (tab->children[x], freefn, rules); + rules->free_hash (tab->children[x], rules); + } + else + { + struct rx_hash_item * them = tab->buckets[x]; + while (them) + { + struct rx_hash_item * that = them; + them = that->next_same_hash; + freefn (that); + rules->free_hash_item (that, rules); + } + } +} + + + +/* Utilities for manipulating bitset represntations of characters sets. */ + +#ifdef __STDC__ +RX_DECL rx_Bitset +rx_cset (struct rx *rx) +#else +RX_DECL rx_Bitset +rx_cset (rx) + struct rx *rx; +#endif +{ + rx_Bitset b = (rx_Bitset) malloc (rx_sizeof_bitset (rx->local_cset_size)); + if (b) + rx_bitset_null (rx->local_cset_size, b); + return b; +} + + +#ifdef __STDC__ +RX_DECL rx_Bitset +rx_copy_cset (struct rx *rx, rx_Bitset a) +#else +RX_DECL rx_Bitset +rx_copy_cset (rx, a) + struct rx *rx; + rx_Bitset a; +#endif +{ + rx_Bitset cs = rx_cset (rx); + + if (cs) + rx_bitset_union (rx->local_cset_size, cs, a); + + return cs; +} + + +#ifdef __STDC__ +RX_DECL void +rx_free_cset (struct rx * rx, rx_Bitset c) +#else +RX_DECL void +rx_free_cset (rx, c) + struct rx * rx; + rx_Bitset c; +#endif +{ + if (c) + free ((char *)c); +} + + +/* Hash table memory allocation policy for the regexp compiler */ + +#ifdef __STDC__ +static struct rx_hash * +compiler_hash_alloc (struct rx_hash_rules * rules) +#else +static struct rx_hash * +compiler_hash_alloc (rules) + struct rx_hash_rules * rules; +#endif +{ + return (struct rx_hash *)malloc (sizeof (struct rx_hash)); +} + + +#ifdef __STDC__ +static struct rx_hash_item * +compiler_hash_item_alloc (struct rx_hash_rules * rules, void * value) +#else +static struct rx_hash_item * +compiler_hash_item_alloc (rules, value) + struct rx_hash_rules * rules; + void * value; +#endif +{ + struct rx_hash_item * it; + it = (struct rx_hash_item *)malloc (sizeof (*it)); + if (it) + { + it->data = value; + it->binding = 0; + } + return it; +} + +#ifdef __STDC__ +static void +compiler_free_hash (struct rx_hash * tab, + struct rx_hash_rules * rules) +#else +static void +compiler_free_hash (tab, rules) + struct rx_hash * tab; + struct rx_hash_rules * rules; +#endif +{ + free ((char *)tab); +} + + +#ifdef __STDC__ +static void +compiler_free_hash_item (struct rx_hash_item * item, + struct rx_hash_rules * rules) +#else +static void +compiler_free_hash_item (item, rules) + struct rx_hash_item * item; + struct rx_hash_rules * rules; +#endif +{ + free ((char *)item); +} + + +/* This page: REXP_NODE (expression tree) structures. */ + +#ifdef __STDC__ +RX_DECL struct rexp_node * +rexp_node (struct rx *rx, + enum rexp_node_type type) +#else +RX_DECL struct rexp_node * +rexp_node (rx, type) + struct rx *rx; + enum rexp_node_type type; +#endif +{ + struct rexp_node *n; + + n = (struct rexp_node *)malloc (sizeof (*n)); + if (n) + { + bzero (n, sizeof (*n)); + n->type = type; + } + return n; +} + + +/* free_rexp_node assumes that the bitset passed to rx_mk_r_cset + * can be freed using rx_free_cset. + */ +#ifdef __STDC__ +RX_DECL struct rexp_node * +rx_mk_r_cset (struct rx * rx, + rx_Bitset b) +#else +RX_DECL struct rexp_node * +rx_mk_r_cset (rx, b) + struct rx * rx; + rx_Bitset b; +#endif +{ + struct rexp_node * n = rexp_node (rx, r_cset); + if (n) + n->params.cset = b; + return n; +} + +#ifdef __STDC__ +RX_DECL struct rexp_node * +rx_mk_r_concat (struct rx * rx, + struct rexp_node * a, + struct rexp_node * b) +#else +RX_DECL struct rexp_node * +rx_mk_r_concat (rx, a, b) + struct rx * rx; + struct rexp_node * a; + struct rexp_node * b; +#endif +{ + struct rexp_node * n = rexp_node (rx, r_concat); + if (n) + { + n->params.pair.left = a; + n->params.pair.right = b; + } + return n; +} + + +#ifdef __STDC__ +RX_DECL struct rexp_node * +rx_mk_r_alternate (struct rx * rx, + struct rexp_node * a, + struct rexp_node * b) +#else +RX_DECL struct rexp_node * +rx_mk_r_alternate (rx, a, b) + struct rx * rx; + struct rexp_node * a; + struct rexp_node * b; +#endif +{ + struct rexp_node * n = rexp_node (rx, r_alternate); + if (n) + { + n->params.pair.left = a; + n->params.pair.right = b; + } + return n; +} + + +#ifdef __STDC__ +RX_DECL struct rexp_node * +rx_mk_r_opt (struct rx * rx, + struct rexp_node * a) +#else +RX_DECL struct rexp_node * +rx_mk_r_opt (rx, a) + struct rx * rx; + struct rexp_node * a; +#endif +{ + struct rexp_node * n = rexp_node (rx, r_opt); + if (n) + { + n->params.pair.left = a; + n->params.pair.right = 0; + } + return n; +} + +#ifdef __STDC__ +RX_DECL struct rexp_node * +rx_mk_r_star (struct rx * rx, + struct rexp_node * a) +#else +RX_DECL struct rexp_node * +rx_mk_r_star (rx, a) + struct rx * rx; + struct rexp_node * a; +#endif +{ + struct rexp_node * n = rexp_node (rx, r_star); + if (n) + { + n->params.pair.left = a; + n->params.pair.right = 0; + } + return n; +} + + +#ifdef __STDC__ +RX_DECL struct rexp_node * +rx_mk_r_2phase_star (struct rx * rx, + struct rexp_node * a, + struct rexp_node * b) +#else +RX_DECL struct rexp_node * +rx_mk_r_2phase_star (rx, a, b) + struct rx * rx; + struct rexp_node * a; + struct rexp_node * b; +#endif +{ + struct rexp_node * n = rexp_node (rx, r_2phase_star); + if (n) + { + n->params.pair.left = a; + n->params.pair.right = b; + } + return n; +} + +#ifdef __STDC__ +RX_DECL struct rexp_node * +rx_mk_r_side_effect (struct rx * rx, + rx_side_effect a) +#else +RX_DECL struct rexp_node * +rx_mk_r_side_effect (rx, a) + struct rx * rx; + rx_side_effect a; +#endif +{ + struct rexp_node * n = rexp_node (rx, r_side_effect); + if (n) + { + n->params.side_effect = a; + n->params.pair.right = 0; + } + return n; +} + + +#ifdef __STDC__ +RX_DECL struct rexp_node * +rx_mk_r_data (struct rx * rx, + void * a) +#else +RX_DECL struct rexp_node * +rx_mk_r_data (rx, a) + struct rx * rx; + void * a; +#endif +{ + struct rexp_node * n = rexp_node (rx, r_data); + if (n) + { + n->params.pair.left = a; + n->params.pair.right = 0; + } + return n; +} + +#ifdef __STDC__ +RX_DECL void +rx_free_rexp (struct rx * rx, struct rexp_node * node) +#else +RX_DECL void +rx_free_rexp (rx, node) + struct rx * rx; + struct rexp_node * node; +#endif +{ + if (node) + { + switch (node->type) + { + case r_cset: + if (node->params.cset) + rx_free_cset (rx, node->params.cset); + + case r_side_effect: + break; + + case r_concat: + case r_alternate: + case r_2phase_star: + case r_opt: + case r_star: + rx_free_rexp (rx, node->params.pair.left); + rx_free_rexp (rx, node->params.pair.right); + break; + + case r_data: + /* This shouldn't occur. */ + break; + } + free ((char *)node); + } +} + +#ifdef __STDC__ +RX_DECL struct rexp_node * +rx_copy_rexp (struct rx *rx, + struct rexp_node *node) +#else +RX_DECL struct rexp_node * +rx_copy_rexp (rx, node) + struct rx *rx; + struct rexp_node *node; +#endif +{ + if (!node) + return 0; + else + { + struct rexp_node *n = rexp_node (rx, node->type); + if (!n) + return 0; + switch (node->type) + { + case r_cset: + n->params.cset = rx_copy_cset (rx, node->params.cset); + if (!n->params.cset) + { + rx_free_rexp (rx, n); + return 0; + } + break; + + case r_side_effect: + n->params.side_effect = node->params.side_effect; + break; + + case r_concat: + case r_alternate: + case r_opt: + case r_2phase_star: + case r_star: + n->params.pair.left = + rx_copy_rexp (rx, node->params.pair.left); + n->params.pair.right = + rx_copy_rexp (rx, node->params.pair.right); + if ( (node->params.pair.left && !n->params.pair.left) + || (node->params.pair.right && !n->params.pair.right)) + { + rx_free_rexp (rx, n); + return 0; + } + break; + case r_data: + /* shouldn't happen */ + break; + } + return n; + } +} + + + +/* This page: functions to build and destroy graphs that describe nfa's */ + +/* Constructs a new nfa node. */ +#ifdef __STDC__ +RX_DECL struct rx_nfa_state * +rx_nfa_state (struct rx *rx) +#else +RX_DECL struct rx_nfa_state * +rx_nfa_state (rx) + struct rx *rx; +#endif +{ + struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n)); + if (!n) + return 0; + bzero (n, sizeof (*n)); + n->next = rx->nfa_states; + rx->nfa_states = n; + return n; +} + + +#ifdef __STDC__ +RX_DECL void +rx_free_nfa_state (struct rx_nfa_state * n) +#else +RX_DECL void +rx_free_nfa_state (n) + struct rx_nfa_state * n; +#endif +{ + free ((char *)n); +} + + +/* This looks up an nfa node, given a numeric id. Numeric id's are + * assigned after the nfa has been built. + */ +#ifdef __STDC__ +RX_DECL struct rx_nfa_state * +rx_id_to_nfa_state (struct rx * rx, + int id) +#else +RX_DECL struct rx_nfa_state * +rx_id_to_nfa_state (rx, id) + struct rx * rx; + int id; +#endif +{ + struct rx_nfa_state * n; + for (n = rx->nfa_states; n; n = n->next) + if (n->id == id) + return n; + return 0; +} + + +/* This adds an edge between two nodes, but doesn't initialize the + * edge label. + */ + +#ifdef __STDC__ +RX_DECL struct rx_nfa_edge * +rx_nfa_edge (struct rx *rx, + enum rx_nfa_etype type, + struct rx_nfa_state *start, + struct rx_nfa_state *dest) +#else +RX_DECL struct rx_nfa_edge * +rx_nfa_edge (rx, type, start, dest) + struct rx *rx; + enum rx_nfa_etype type; + struct rx_nfa_state *start; + struct rx_nfa_state *dest; +#endif +{ + struct rx_nfa_edge *e; + e = (struct rx_nfa_edge *)malloc (sizeof (*e)); + if (!e) + return 0; + e->next = start->edges; + start->edges = e; + e->type = type; + e->dest = dest; + return e; +} + + +#ifdef __STDC__ +RX_DECL void +rx_free_nfa_edge (struct rx_nfa_edge * e) +#else +RX_DECL void +rx_free_nfa_edge (e) + struct rx_nfa_edge * e; +#endif +{ + free ((char *)e); +} + + +/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure + * of an NFA. These are added to an nfa automaticly by eclose_nfa. + */ + +#ifdef __STDC__ +static struct rx_possible_future * +rx_possible_future (struct rx * rx, + struct rx_se_list * effects) +#else +static struct rx_possible_future * +rx_possible_future (rx, effects) + struct rx * rx; + struct rx_se_list * effects; +#endif +{ + struct rx_possible_future *ec; + ec = (struct rx_possible_future *) malloc (sizeof (*ec)); + if (!ec) + return 0; + ec->destset = 0; + ec->next = 0; + ec->effects = effects; + return ec; +} + + +#ifdef __STDC__ +static void +rx_free_possible_future (struct rx_possible_future * pf) +#else +static void +rx_free_possible_future (pf) + struct rx_possible_future * pf; +#endif +{ + free ((char *)pf); +} + + +#ifdef __STDC__ +RX_DECL void +rx_free_nfa (struct rx *rx) +#else +RX_DECL void +rx_free_nfa (rx) + struct rx *rx; +#endif +{ + while (rx->nfa_states) + { + while (rx->nfa_states->edges) + { + switch (rx->nfa_states->edges->type) + { + case ne_cset: + rx_free_cset (rx, rx->nfa_states->edges->params.cset); + break; + default: + break; + } + { + struct rx_nfa_edge * e; + e = rx->nfa_states->edges; + rx->nfa_states->edges = rx->nfa_states->edges->next; + rx_free_nfa_edge (e); + } + } /* while (rx->nfa_states->edges) */ + { + /* Iterate over the partial epsilon closures of rx->nfa_states */ + struct rx_possible_future * pf = rx->nfa_states->futures; + while (pf) + { + struct rx_possible_future * pft = pf; + pf = pf->next; + rx_free_possible_future (pft); + } + } + { + struct rx_nfa_state *n; + n = rx->nfa_states; + rx->nfa_states = rx->nfa_states->next; + rx_free_nfa_state (n); + } + } +} + + + +/* This page: translating a pattern expression into an nfa and doing the + * static part of the nfa->super-nfa translation. + */ + +/* This is the thompson regexp->nfa algorithm. + * It is modified to allow for `side-effect epsilons.' Those are + * edges that are taken whenever a similar epsilon edge would be, + * but which imply that some side effect occurs when the edge + * is taken. + * + * Side effects are used to model parts of the pattern langauge + * that are not regular (in the formal sense). + */ + +#ifdef __STDC__ +RX_DECL int +rx_build_nfa (struct rx *rx, + struct rexp_node *rexp, + struct rx_nfa_state **start, + struct rx_nfa_state **end) +#else +RX_DECL int +rx_build_nfa (rx, rexp, start, end) + struct rx *rx; + struct rexp_node *rexp; + struct rx_nfa_state **start; + struct rx_nfa_state **end; +#endif +{ + struct rx_nfa_edge *edge; + + /* Start & end nodes may have been allocated by the caller. */ + *start = *start ? *start : rx_nfa_state (rx); + + if (!*start) + return 0; + + if (!rexp) + { + *end = *start; + return 1; + } + + *end = *end ? *end : rx_nfa_state (rx); + + if (!*end) + { + rx_free_nfa_state (*start); + return 0; + } + + switch (rexp->type) + { + case r_data: + return 0; + + case r_cset: + edge = rx_nfa_edge (rx, ne_cset, *start, *end); + if (!edge) + return 0; + edge->params.cset = rx_copy_cset (rx, rexp->params.cset); + if (!edge->params.cset) + { + rx_free_nfa_edge (edge); + return 0; + } + return 1; + + case r_opt: + return (rx_build_nfa (rx, rexp->params.pair.left, start, end) + && rx_nfa_edge (rx, ne_epsilon, *start, *end)); + + case r_star: + { + struct rx_nfa_state * star_start = 0; + struct rx_nfa_state * star_end = 0; + return (rx_build_nfa (rx, rexp->params.pair.left, + &star_start, &star_end) + && star_start + && star_end + && rx_nfa_edge (rx, ne_epsilon, star_start, star_end) + && rx_nfa_edge (rx, ne_epsilon, *start, star_start) + && rx_nfa_edge (rx, ne_epsilon, star_end, *end) + + && rx_nfa_edge (rx, ne_epsilon, star_end, star_start)); + } + + case r_2phase_star: + { + struct rx_nfa_state * star_start = 0; + struct rx_nfa_state * star_end = 0; + struct rx_nfa_state * loop_exp_start = 0; + struct rx_nfa_state * loop_exp_end = 0; + + return (rx_build_nfa (rx, rexp->params.pair.left, + &star_start, &star_end) + && rx_build_nfa (rx, rexp->params.pair.right, + &loop_exp_start, &loop_exp_end) + && star_start + && star_end + && loop_exp_end + && loop_exp_start + && rx_nfa_edge (rx, ne_epsilon, star_start, *end) + && rx_nfa_edge (rx, ne_epsilon, *start, star_start) + && rx_nfa_edge (rx, ne_epsilon, star_end, *end) + + && rx_nfa_edge (rx, ne_epsilon, star_end, loop_exp_start) + && rx_nfa_edge (rx, ne_epsilon, loop_exp_end, star_start)); + } + + + case r_concat: + { + struct rx_nfa_state *shared = 0; + return + (rx_build_nfa (rx, rexp->params.pair.left, start, &shared) + && rx_build_nfa (rx, rexp->params.pair.right, &shared, end)); + } + + case r_alternate: + { + struct rx_nfa_state *ls = 0; + struct rx_nfa_state *le = 0; + struct rx_nfa_state *rs = 0; + struct rx_nfa_state *re = 0; + return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le) + && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re) + && rx_nfa_edge (rx, ne_epsilon, *start, ls) + && rx_nfa_edge (rx, ne_epsilon, *start, rs) + && rx_nfa_edge (rx, ne_epsilon, le, *end) + && rx_nfa_edge (rx, ne_epsilon, re, *end)); + } + + case r_side_effect: + edge = rx_nfa_edge (rx, ne_side_effect, *start, *end); + if (!edge) + return 0; + edge->params.side_effect = rexp->params.side_effect; + return 1; + } + + /* this should never happen */ + return 0; +} + + +/* RX_NAME_NFA_STATES identifies all nodes with outgoing non-epsilon + * transitions. Only these nodes can occur in super-states. + * All nodes are given an integer id. + * The id is non-negative if the node has non-epsilon out-transitions, negative + * otherwise (this is because we want the non-negative ids to be used as + * array indexes in a few places). + */ + +#ifdef __STDC__ +RX_DECL void +rx_name_nfa_states (struct rx *rx) +#else +RX_DECL void +rx_name_nfa_states (rx) + struct rx *rx; +#endif +{ + struct rx_nfa_state *n = rx->nfa_states; + + rx->nodec = 0; + rx->epsnodec = -1; + + while (n) + { + struct rx_nfa_edge *e = n->edges; + + if (n->is_start) + n->eclosure_needed = 1; + + while (e) + { + switch (e->type) + { + case ne_epsilon: + case ne_side_effect: + break; + + case ne_cset: + n->id = rx->nodec++; + { + struct rx_nfa_edge *from_n = n->edges; + while (from_n) + { + from_n->dest->eclosure_needed = 1; + from_n = from_n->next; + } + } + goto cont; + } + e = e->next; + } + n->id = rx->epsnodec--; + cont: + n = n->next; + } + rx->epsnodec = -rx->epsnodec; +} + + +/* This page: data structures for the static part of the nfa->supernfa + * translation. + * + * There are side effect lists -- lists of side effects occuring + * along an uninterrupted, acyclic path of side-effect epsilon edges. + * Such paths are collapsed to single edges in the course of computing + * epsilon closures. Such single edges are labled with a list of all + * the side effects entailed in crossing them. Like lists of side + * effects are made == by the constructors below. + * + * There are also nfa state sets. These are used to hold a list of all + * states reachable from a starting state for a given type of transition + * and side effect list. These are also hash-consed. + */ + +/* The next several functions compare, construct, etc. lists of side + * effects. See ECLOSE_NFA (below) for details. + */ + +/* Ordering of rx_se_list + * (-1, 0, 1 return value convention). + */ + +#ifdef __STDC__ +static int +se_list_cmp (void * va, void * vb) +#else +static int +se_list_cmp (va, vb) + void * va; + void * vb; +#endif +{ + struct rx_se_list * a = (struct rx_se_list *)va; + struct rx_se_list * b = (struct rx_se_list *)vb; + + return ((va == vb) + ? 0 + : (!va + ? -1 + : (!vb + ? 1 + : ((long)a->car < (long)b->car + ? 1 + : ((long)a->car > (long)b->car + ? -1 + : se_list_cmp ((void *)a->cdr, (void *)b->cdr)))))); +} + + +#ifdef __STDC__ +static int +se_list_equal (void * va, void * vb) +#else +static int +se_list_equal (va, vb) + void * va; + void * vb; +#endif +{ + return !(se_list_cmp (va, vb)); +} + +static struct rx_hash_rules se_list_hash_rules = +{ + se_list_equal, + compiler_hash_alloc, + compiler_free_hash, + compiler_hash_item_alloc, + compiler_free_hash_item +}; + + +#ifdef __STDC__ +static struct rx_se_list * +side_effect_cons (struct rx * rx, + void * se, struct rx_se_list * list) +#else +static struct rx_se_list * +side_effect_cons (rx, se, list) + struct rx * rx; + void * se; + struct rx_se_list * list; +#endif +{ + struct rx_se_list * l; + l = ((struct rx_se_list *) malloc (sizeof (*l))); + if (!l) + return 0; + l->car = se; + l->cdr = list; + return l; +} + + +#ifdef __STDC__ +static struct rx_se_list * +hash_cons_se_prog (struct rx * rx, + struct rx_hash * memo, + void * car, struct rx_se_list * cdr) +#else +static struct rx_se_list * +hash_cons_se_prog (rx, memo, car, cdr) + struct rx * rx; + struct rx_hash * memo; + void * car; + struct rx_se_list * cdr; +#endif +{ + long hash = (long)car ^ (long)cdr; + struct rx_se_list template; + + template.car = car; + template.cdr = cdr; + { + struct rx_hash_item * it = rx_hash_store (memo, hash, + (void *)&template, + &se_list_hash_rules); + if (!it) + return 0; + if (it->data == (void *)&template) + { + struct rx_se_list * consed; + consed = (struct rx_se_list *) malloc (sizeof (*consed)); + if (! consed) + { + free ((char *)it); + return 0; + } + *consed = template; + it->data = (void *)consed; + } + return (struct rx_se_list *)it->data; + } +} + + +#ifdef __STDC__ +static struct rx_se_list * +hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog) +#else +static struct rx_se_list * +hash_se_prog (rx, memo, prog) + struct rx * rx; + struct rx_hash * memo; + struct rx_se_list * prog; +#endif +{ + struct rx_se_list * answer = 0; + while (prog) + { + answer = hash_cons_se_prog (rx, memo, prog->car, answer); + if (!answer) + return 0; + prog = prog->cdr; + } + return answer; +} + +#ifdef __STDC__ +static int +nfa_set_cmp (void * va, void * vb) +#else +static int +nfa_set_cmp (va, vb) + void * va; + void * vb; +#endif +{ + struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va; + struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb; + + return ((va == vb) + ? 0 + : (!va + ? -1 + : (!vb + ? 1 + : (a->car->id < b->car->id + ? 1 + : (a->car->id > b->car->id + ? -1 + : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr)))))); +} + +#ifdef __STDC__ +static int +nfa_set_equal (void * va, void * vb) +#else +static int +nfa_set_equal (va, vb) + void * va; + void * vb; +#endif +{ + return !nfa_set_cmp (va, vb); +} + +static struct rx_hash_rules nfa_set_hash_rules = +{ + nfa_set_equal, + compiler_hash_alloc, + compiler_free_hash, + compiler_hash_item_alloc, + compiler_free_hash_item +}; + + +#ifdef __STDC__ +static struct rx_nfa_state_set * +nfa_set_cons (struct rx * rx, + struct rx_hash * memo, struct rx_nfa_state * state, + struct rx_nfa_state_set * set) +#else +static struct rx_nfa_state_set * +nfa_set_cons (rx, memo, state, set) + struct rx * rx; + struct rx_hash * memo; + struct rx_nfa_state * state; + struct rx_nfa_state_set * set; +#endif +{ + struct rx_nfa_state_set template; + struct rx_hash_item * node; + template.car = state; + template.cdr = set; + node = rx_hash_store (memo, + (((long)state) >> 8) ^ (long)set, + &template, &nfa_set_hash_rules); + if (!node) + return 0; + if (node->data == &template) + { + struct rx_nfa_state_set * l; + l = (struct rx_nfa_state_set *) malloc (sizeof (*l)); + node->data = (void *) l; + if (!l) + return 0; + *l = template; + } + return (struct rx_nfa_state_set *)node->data; +} + +#ifdef __STDC__ +static struct rx_nfa_state_set * +nfa_set_enjoin (struct rx * rx, + struct rx_hash * memo, struct rx_nfa_state * state, + struct rx_nfa_state_set * set) +#else +static struct rx_nfa_state_set * +nfa_set_enjoin (rx, memo, state, set) + struct rx * rx; + struct rx_hash * memo; + struct rx_nfa_state * state; + struct rx_nfa_state_set * set; +#endif +{ + if (!set || state->id < set->car->id) + return nfa_set_cons (rx, memo, state, set); + if (state->id == set->car->id) + return set; + else + { + struct rx_nfa_state_set * newcdr + = nfa_set_enjoin (rx, memo, state, set->cdr); + if (newcdr != set->cdr) + set = nfa_set_cons (rx, memo, set->car, newcdr); + return set; + } +} + + + +/* This page: computing epsilon closures. The closures aren't total. + * Each node's closures are partitioned according to the side effects entailed + * along the epsilon edges. Return true on success. + */ + +struct eclose_frame +{ + struct rx_se_list *prog_backwards; +}; +static int eclose_node (struct rx *, struct rx_nfa_state *, + struct rx_nfa_state *, + struct eclose_frame *); +RX_DECL int rx_eclose_nfa (struct rx *); +RX_DECL void rx_delete_epsilon_transitions + (struct rx *); +static int nfacmp (void *, void *); +static int count_hash_nodes (struct rx_hash *); +static void nfa_set_freer (struct rx_hash_item *); +RX_DECL int rx_compactify_nfa (struct rx *, void **, + unsigned long *); +static char *rx_cache_malloc (struct rx_cache *, int); +static void rx_cache_free (struct rx_cache *, + struct rx_freelist **, char *); +static void install_transition (struct rx_superstate *, + struct rx_inx *, rx_Bitset); +static int qlen (struct rx_superstate *); +static void check_cache (struct rx_cache *); +static void semifree_superstate (struct rx_cache *); +static void refresh_semifree_superstate + (struct rx_cache *, + struct rx_superstate *); +static void rx_refresh_this_superstate + (struct rx_cache *, + struct rx_superstate *); +static void release_superset_low (struct rx_cache *, + struct rx_superset *); +RX_DECL void rx_release_superset (struct rx *, struct rx_superset *); +static int rx_really_free_superstate (struct rx_cache *); +static char *rx_cache_get (struct rx_cache *, + struct rx_freelist **); +static char *rx_cache_malloc_or_get (struct rx_cache *, + struct rx_freelist **, int); +static char *rx_cache_get_superstate (struct rx_cache *); +static int supersetcmp (void *, void *); +static struct rx_hash_item + *superset_allocator (struct rx_hash_rules *, void *); +static struct rx_hash + *super_hash_allocator (struct rx_hash_rules *); +static void super_hash_liberator (struct rx_hash *, + struct rx_hash_rules *); +static void superset_hash_item_liberator + (struct rx_hash_item *, + struct rx_hash_rules *); +static int bytes_for_cache_size (int, int); +static void rx_morecore (struct rx_cache *); +RX_DECL struct rx_superset + *rx_superset_cons (struct rx *, struct rx_nfa_state *, + struct rx_superset *); +RX_DECL struct rx_superset + *rx_superstate_eclosure_union + (struct rx *, struct rx_superset *, + struct rx_nfa_state_set *); +static struct rx_distinct_future + *include_futures (struct rx *, + struct rx_distinct_future *, + struct rx_nfa_state *, + struct rx_superstate *); +RX_DECL struct rx_superstate + *rx_superstate (struct rx *, struct rx_superset *); +static int solve_destination (struct rx *, + struct rx_distinct_future *); +static int compute_super_edge (struct rx *, + struct rx_distinct_future **, + rx_Bitset, struct rx_superstate *, + unsigned char); +static struct rx_super_edge + *rx_super_edge (struct rx *, struct rx_superstate *, + rx_Bitset, + struct rx_distinct_future *); +static void install_partial_transition + (struct rx_superstate *, + struct rx_inx *, RX_subset, int); +RX_DECL struct rx_inx + *rx_handle_cache_miss (struct rx *, struct rx_superstate *, + unsigned char, void *); +static boolean + at_begline_loc_p (__const__ char *, __const__ char *, + reg_syntax_t); +static boolean + at_endline_loc_p (__const__ char *, __const__ char *, + int); +static rx_Bitset + inverse_translation (struct re_pattern_buffer *, char *, + rx_Bitset, unsigned char *, int); + + +#ifdef __STDC__ +static int +eclose_node (struct rx *rx, struct rx_nfa_state *outnode, + struct rx_nfa_state *node, struct eclose_frame *frame) +#else +static int +eclose_node (rx, outnode, node, frame) + struct rx *rx; + struct rx_nfa_state *outnode; + struct rx_nfa_state *node; + struct eclose_frame *frame; +#endif +{ + struct rx_nfa_edge *e = node->edges; + + /* For each node, we follow all epsilon paths to build the closure. + * The closure omits nodes that have only epsilon edges. + * The closure is split into partial closures -- all the states in + * a partial closure are reached by crossing the same list of + * of side effects (though not necessarily the same path). + */ + if (node->mark) + return 1; + node->mark = 1; + + if (node->id >= 0 || node->is_final) + { + struct rx_possible_future **ec; + struct rx_se_list * prog_in_order + = ((struct rx_se_list *)hash_se_prog (rx, + &rx->se_list_memo, + frame->prog_backwards)); + int cmp; + + ec = &outnode->futures; + + while (*ec) + { + cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order); + if (cmp <= 0) + break; + ec = &(*ec)->next; + } + if (!*ec || (cmp < 0)) + { + struct rx_possible_future * saved = *ec; + *ec = rx_possible_future (rx, prog_in_order); + (*ec)->next = saved; + if (!*ec) + return 0; + } + if (node->id >= 0) + { + (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo, + node, (*ec)->destset); + if (!(*ec)->destset) + return 0; + } + } + + while (e) + { + switch (e->type) + { + case ne_epsilon: + if (!eclose_node (rx, outnode, e->dest, frame)) + return 0; + break; + case ne_side_effect: + { + frame->prog_backwards = side_effect_cons (rx, + e->params.side_effect, + frame->prog_backwards); + if (!frame->prog_backwards) + return 0; + if (!eclose_node (rx, outnode, e->dest, frame)) + return 0; + { + struct rx_se_list * dying = frame->prog_backwards; + frame->prog_backwards = frame->prog_backwards->cdr; + free ((char *)dying); + } + break; + } + default: + break; + } + e = e->next; + } + node->mark = 0; + return 1; +} + +#ifdef __STDC__ +RX_DECL int +rx_eclose_nfa (struct rx *rx) +#else +RX_DECL int +rx_eclose_nfa (rx) + struct rx *rx; +#endif +{ + struct rx_nfa_state *n = rx->nfa_states; + struct eclose_frame frame; + static int rx_id = 0; + + frame.prog_backwards = 0; + rx->rx_id = rx_id++; + bzero (&rx->se_list_memo, sizeof (rx->se_list_memo)); + bzero (&rx->set_list_memo, sizeof (rx->set_list_memo)); + while (n) + { + n->futures = 0; + if (n->eclosure_needed && !eclose_node (rx, n, n, &frame)) + return 0; + /* clear_marks (rx); */ + n = n->next; + } + return 1; +} + + +/* This deletes epsilon edges from an NFA. After running eclose_node, + * we have no more need for these edges. They are removed to simplify + * further operations on the NFA. + */ + +#ifdef __STDC__ +RX_DECL void +rx_delete_epsilon_transitions (struct rx *rx) +#else +RX_DECL void +rx_delete_epsilon_transitions (rx) + struct rx *rx; +#endif +{ + struct rx_nfa_state *n = rx->nfa_states; + struct rx_nfa_edge **e; + + while (n) + { + e = &n->edges; + while (*e) + { + struct rx_nfa_edge *t; + switch ((*e)->type) + { + case ne_epsilon: + case ne_side_effect: + t = *e; + *e = t->next; + rx_free_nfa_edge (t); + break; + + default: + e = &(*e)->next; + break; + } + } + n = n->next; + } +} + + +/* This page: storing the nfa in a contiguous region of memory for + * subsequent conversion to a super-nfa. + */ + +/* This is for qsort on an array of nfa_states. The order + * is based on state ids and goes + * [0...MAX][MIN..-1] where (MAX>=0) and (MIN<0) + * This way, positive ids double as array indices. + */ + +#ifdef __STDC__ +static int +nfacmp (void * va, void * vb) +#else +static int +nfacmp (va, vb) + void * va; + void * vb; +#endif +{ + struct rx_nfa_state **a = (struct rx_nfa_state **)va; + struct rx_nfa_state **b = (struct rx_nfa_state **)vb; + return (*a == *b /* &&&& 3.18 */ + ? 0 + : (((*a)->id < 0) == ((*b)->id < 0) + ? (((*a)->id < (*b)->id) ? -1 : 1) + : (((*a)->id < 0) + ? 1 : -1))); +} + +#ifdef __STDC__ +static int +count_hash_nodes (struct rx_hash * st) +#else +static int +count_hash_nodes (st) + struct rx_hash * st; +#endif +{ + int x; + int count = 0; + for (x = 0; x < 13; ++x) + count += ((st->children[x]) + ? count_hash_nodes (st->children[x]) + : st->bucket_size[x]); + + return count; +} + + +#ifdef __STDC__ +static void +se_memo_freer (struct rx_hash_item * node) +#else +static void +se_memo_freer (node) + struct rx_hash_item * node; +#endif +{ + free ((char *)node->data); +} + + +#ifdef __STDC__ +static void +nfa_set_freer (struct rx_hash_item * node) +#else +static void +nfa_set_freer (node) + struct rx_hash_item * node; +#endif +{ + free ((char *)node->data); +} + + +/* This copies an entire NFA into a single malloced block of memory. + * Mostly this is for compatability with regex.c, though it is convenient + * to have the nfa nodes in an array. + */ + +#ifdef __STDC__ +RX_DECL int +rx_compactify_nfa (struct rx *rx, + void **mem, unsigned long *size) +#else +RX_DECL int +rx_compactify_nfa (rx, mem, size) + struct rx *rx; + void **mem; + unsigned long *size; +#endif +{ + int total_nodec; + struct rx_nfa_state *n; + int edgec = 0; + int eclosec = 0; + int se_list_consc = count_hash_nodes (&rx->se_list_memo); + int nfa_setc = count_hash_nodes (&rx->set_list_memo); + unsigned long total_size; + + /* This takes place in two stages. First, the total size of the + * nfa is computed, then structures are copied. + */ + n = rx->nfa_states; + total_nodec = 0; + while (n) + { + struct rx_nfa_edge *e = n->edges; + struct rx_possible_future *ec = n->futures; + ++total_nodec; + while (e) + { + ++edgec; + e = e->next; + } + while (ec) + { + ++eclosec; + ec = ec->next; + } + n = n->next; + } + + total_size = (total_nodec * sizeof (struct rx_nfa_state) + + edgec * rx_sizeof_bitset (rx->local_cset_size) + + edgec * sizeof (struct rx_nfa_edge) + + nfa_setc * sizeof (struct rx_nfa_state_set) + + eclosec * sizeof (struct rx_possible_future) + + se_list_consc * sizeof (struct rx_se_list) + + rx->reserved); + + if (total_size > *size) + { + *mem = remalloc (*mem, total_size); + if (*mem) + *size = total_size; + else + return 0; + } + /* Now we've allocated the memory; this copies the NFA. */ + { + static struct rx_nfa_state **scratch = 0; + static int scratch_alloc = 0; + struct rx_nfa_state *state_base = (struct rx_nfa_state *) * mem; + struct rx_nfa_state *new_state = state_base; + struct rx_nfa_edge *new_edge = + (struct rx_nfa_edge *) + ((char *) state_base + total_nodec * sizeof (struct rx_nfa_state)); + struct rx_se_list * new_se_list = + (struct rx_se_list *) + ((char *)new_edge + edgec * sizeof (struct rx_nfa_edge)); + struct rx_possible_future *new_close = + ((struct rx_possible_future *) + ((char *) new_se_list + + se_list_consc * sizeof (struct rx_se_list))); + struct rx_nfa_state_set * new_nfa_set = + ((struct rx_nfa_state_set *) + ((char *)new_close + eclosec * sizeof (struct rx_possible_future))); + char *new_bitset = + ((char *) new_nfa_set + nfa_setc * sizeof (struct rx_nfa_state_set)); + int x; + struct rx_nfa_state *n; + + if (scratch_alloc < total_nodec) + { + scratch = ((struct rx_nfa_state **) + remalloc (scratch, total_nodec * sizeof (*scratch))); + if (scratch) + scratch_alloc = total_nodec; + else + { + scratch_alloc = 0; + return 0; + } + } + + for (x = 0, n = rx->nfa_states; n; n = n->next) + scratch[x++] = n; + + qsort (scratch, total_nodec, sizeof (struct rx_nfa_state *), + (__compar_fn_t)nfacmp); + + for (x = 0; x < total_nodec; ++x) + { + struct rx_possible_future *eclose = scratch[x]->futures; + struct rx_nfa_edge *edge = scratch[x]->edges; + struct rx_nfa_state *cn = new_state++; + cn->futures = 0; + cn->edges = 0; + cn->next = (x == total_nodec - 1) ? 0 : (cn + 1); + cn->id = scratch[x]->id; + cn->is_final = scratch[x]->is_final; + cn->is_start = scratch[x]->is_start; + cn->mark = 0; + while (edge) + { + int indx = (edge->dest->id < 0 + ? (total_nodec + edge->dest->id) + : edge->dest->id); + struct rx_nfa_edge *e = new_edge++; + rx_Bitset cset = (rx_Bitset) new_bitset; + new_bitset += rx_sizeof_bitset (rx->local_cset_size); + rx_bitset_null (rx->local_cset_size, cset); + rx_bitset_union (rx->local_cset_size, cset, edge->params.cset); + e->next = cn->edges; + cn->edges = e; + e->type = edge->type; + e->dest = state_base + indx; + e->params.cset = cset; + edge = edge->next; + } + while (eclose) + { + struct rx_possible_future *ec = new_close++; + struct rx_hash_item * sp; + struct rx_se_list ** sepos; + struct rx_se_list * sesrc; + struct rx_nfa_state_set * destlst; + struct rx_nfa_state_set ** destpos; + ec->next = cn->futures; + cn->futures = ec; + for (sepos = &ec->effects, sesrc = eclose->effects; + sesrc; + sesrc = sesrc->cdr, sepos = &(*sepos)->cdr) + { + sp = rx_hash_find (&rx->se_list_memo, + (long)sesrc->car ^ (long)sesrc->cdr, + sesrc, &se_list_hash_rules); + if (sp->binding) + { + sesrc = (struct rx_se_list *)sp->binding; + break; + } + *new_se_list = *sesrc; + sp->binding = (void *)new_se_list; + *sepos = new_se_list; + ++new_se_list; + } + *sepos = sesrc; + for (destpos = &ec->destset, destlst = eclose->destset; + destlst; + destpos = &(*destpos)->cdr, destlst = destlst->cdr) + { + sp = rx_hash_find (&rx->set_list_memo, + ((((long)destlst->car) >> 8) + ^ (long)destlst->cdr), + destlst, &nfa_set_hash_rules); + if (sp->binding) + { + destlst = (struct rx_nfa_state_set *)sp->binding; + break; + } + *new_nfa_set = *destlst; + new_nfa_set->car = state_base + destlst->car->id; + sp->binding = (void *)new_nfa_set; + *destpos = new_nfa_set; + ++new_nfa_set; + } + *destpos = destlst; + eclose = e