summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Andersen <andersen@codepoet.org>2000-07-09 06:39:19 +0000
committerEric Andersen <andersen@codepoet.org>2000-07-09 06:39:19 +0000
commitf3a9360625bca4fb0b5fe9730d6e25431ac4704b (patch)
tree9de32b51a64464128da7d908a10378369f739a02
parent283f7172a1d7f3e42a38747f7a9b83bfc04c5238 (diff)
Add in a bunch of junk. Busybox now compiles (except for mkfs.minix and
fsck.minix). Of course, it doesn't link yet due to missing functions, but hey... At least it is now easy to see what isn't working. :-) -Erik
-rw-r--r--Makefile6
-rw-r--r--include/arpa/ftp.h109
-rw-r--r--include/arpa/inet.h101
-rw-r--r--include/arpa/nameser.h64
-rw-r--r--include/arpa/telnet.h11
-rw-r--r--include/arpa/tftp.h4
-rw-r--r--include/netinet/ether.h54
-rw-r--r--include/netinet/icmp6.h232
-rw-r--r--include/netinet/if_ether.h109
-rw-r--r--include/netinet/if_fddi.h37
-rw-r--r--include/netinet/if_tr.h41
-rw-r--r--include/netinet/igmp.h95
-rw-r--r--include/netinet/in.h289
-rw-r--r--include/netinet/in_systm.h42
-rw-r--r--include/netinet/ip.h291
-rw-r--r--include/netinet/ip6.h106
-rw-r--r--include/netinet/ip_icmp.h288
-rw-r--r--include/netinet/tcp.h167
-rw-r--r--include/netinet/udp.h56
-rw-r--r--include/regex.h3754
-rw-r--r--include/regexp.h241
-rw-r--r--include/stdlib.h25
-rw-r--r--include/sys/kd.h29
-rw-r--r--include/sys/klog.h34
-rw-r--r--include/sys/mtio.h277
-rw-r--r--libc/misc/regex/Makefile17
-rw-r--r--libc/misc/regex/rx.c7522
-rw-r--r--libc/stdlib/Makefile2
-rw-r--r--libc/stdlib/realpath.c168
29 files changed, 14004 insertions, 167 deletions
diff --git a/Makefile b/Makefile
index 98807e360..fcba9168a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-DIRS = headers error getent malloc-simple misc regexp stdio2 \
+DIRS = headers error getent malloc-simple misc regex stdio2 \
string sysdeps termios time #rpc
all: libc.a
@@ -29,8 +29,8 @@ misc: dummy
net: dummy
make -C net
-regexp: dummy
- make -C regexp
+regex: dummy
+ make -C regex
rpc: dummy
make -C rpc
diff --git a/include/arpa/ftp.h b/include/arpa/ftp.h
new file mode 100644
index 000000000..ac864aa70
--- /dev/null
+++ b/include/arpa/ftp.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 1983, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ftp.h 8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _ARPA_FTP_H
+#define _ARPA_FTP_H 1
+
+/* Definitions for FTP; see RFC-765. */
+
+/*
+ * Reply codes.
+ */
+#define PRELIM 1 /* positive preliminary */
+#define COMPLETE 2 /* positive completion */
+#define CONTINUE 3 /* positive intermediate */
+#define TRANSIENT 4 /* transient negative completion */
+#define ERROR 5 /* permanent negative completion */
+
+/*
+ * Type codes
+ */
+#define TYPE_A 1 /* ASCII */
+#define TYPE_E 2 /* EBCDIC */
+#define TYPE_I 3 /* image */
+#define TYPE_L 4 /* local byte size */
+
+#ifdef FTP_NAMES
+char *typenames[] = {"0", "ASCII", "EBCDIC", "Image", "Local" };
+#endif
+
+/*
+ * Form codes
+ */
+#define FORM_N 1 /* non-print */
+#define FORM_T 2 /* telnet format effectors */
+#define FORM_C 3 /* carriage control (ASA) */
+#ifdef FTP_NAMES
+char *formnames[] = {"0", "Nonprint", "Telnet", "Carriage-control" };
+#endif
+
+/*
+ * Structure codes
+ */
+#define STRU_F 1 /* file (no record structure) */
+#define STRU_R 2 /* record structure */
+#define STRU_P 3 /* page structure */
+#ifdef FTP_NAMES
+char *strunames[] = {"0", "File", "Record", "Page" };
+#endif
+
+/*
+ * Mode types
+ */
+#define MODE_S 1 /* stream */
+#define MODE_B 2 /* block */
+#define MODE_C 3 /* compressed */
+#ifdef FTP_NAMES
+char *modenames[] = {"0", "Stream", "Block", "Compressed" };
+#endif
+
+/*
+ * Record Tokens
+ */
+#define REC_ESC '\377' /* Record-mode Escape */
+#define REC_EOR '\001' /* Record-mode End-of-Record */
+#define REC_EOF '\002' /* Record-mode End-of-File */
+
+/*
+ * Block Header
+ */
+#define BLK_EOR 0x80 /* Block is End-of-Record */
+#define BLK_EOF 0x40 /* Block is End-of-File */
+#define BLK_ERRORS 0x20 /* Block is suspected of containing errors */
+#define BLK_RESTART 0x10 /* Block is Restart Marker */
+
+#define BLK_BYTECOUNT 2 /* Bytes in this block */
+
+#endif /* arpa/ftp.h */
diff --git a/include/arpa/inet.h b/include/arpa/inet.h
index cf4cd3275..e34c4726a 100644
--- a/include/arpa/inet.h
+++ b/include/arpa/inet.h
@@ -1,12 +1,95 @@
-#ifndef __ARPA_INET_H
-#define __ARPA_INET_H
+/* Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
-#include <netinet/in.h>
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
-int inet_aton(const char *cp, struct in_addr *inp);
-
-unsigned long int inet_addr(const char *cp);
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
-char *inet_ntoa(struct in_addr in);
-
-#endif
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _ARPA_INET_H
+#define _ARPA_INET_H 1
+
+#include <features.h>
+
+#include <sys/types.h>
+#include <netinet/in.h> /* To define `struct in_addr'. */
+
+__BEGIN_DECLS
+
+/* Convert Internet host address from numbers-and-dots notation in CP
+ into binary data in network byte order. */
+extern u_int32_t inet_addr __P ((__const char *__cp));
+
+/* Convert Internet host address from numbers-and-dots notation in CP
+ into binary data and store the result in the structure INP. */
+extern int inet_aton __P ((__const char *__cp, struct in_addr *__inp));
+
+/* Return the local host address part of the Internet address in IN. */
+extern u_int32_t inet_lnaof __P ((struct in_addr __in));
+
+/* Make Internet host address in network byte order by combining the
+ network number NET with the local address HOST. */
+extern struct in_addr inet_makeaddr __P ((u_int32_t __net, u_int32_t __host));
+
+/* Format a network number NET into presentation format and place result
+ in buffer starting at BUF with length of LEN bytes. */
+extern char *inet_neta __P ((u_int32_t __net, char *__buf, size_t __len));
+
+/* Return network number part of the Internet address IN. */
+extern u_int32_t inet_netof __P ((struct in_addr __in));
+
+/* Extract the network number in network byte order from the address
+ in numbers-and-dots natation starting at CP. */
+extern u_int32_t inet_network __P ((__const char *__cp));
+
+/* Convert network number for interface type AF in buffer starting at
+ CP to presentation format. The result will specifiy BITS bits of
+ the number. */
+extern char *inet_net_ntop __P ((int __af, __const void *__cp, int __bits,
+ char *__buf, size_t __len));
+
+/* Convert network number for interface type AF from presentation in
+ buffer starting at CP to network format and store result int
+ buffer starting at BUF of size LEN. */
+extern int inet_net_pton __P ((int __af, __const char *__cp,
+ void *__buf, size_t __len));
+
+/* Convert Internet number in IN to ASCII representation. The return value
+ is a pointer to an internal array containing the string. */
+extern char *inet_ntoa __P ((struct in_addr __in));
+
+/* Convert from presentation format of an Internet number in buffer
+ starting at CP to the binary network format and store result for
+ interface type AF in buffer starting at BUF. */
+extern int inet_pton __P ((int __af, __const char *__cp, void *__buf));
+
+/* Convert a Internet address in binary network format for interface
+ type AF in buffer starting at CP to presentation form and place
+ result in buffer of length LEN astarting at BUF. */
+extern __const char *inet_ntop __P ((int __af, __const void *__cp,
+ char *__buf, size_t __len));
+
+/* Convert ASCII representation in hexadecimal form of the Internet
+ address to binary form and place result in buffer of length LEN
+ starting at BUF. */
+extern unsigned int inet_nsap_addr __P ((__const char *__cp,
+ unsigned char *__buf, int __len));
+
+/* Convert internet address in binary form in LEN bytes starting at CP
+ a presentation form and place result in BUF. */
+extern char *inet_nsap_ntoa __P ((int __len, __const unsigned char *__cp,
+ char *__buf));
+
+__END_DECLS
+
+#endif /* arpa/inet.h */
diff --git a/include/arpa/nameser.h b/include/arpa/nameser.h
index e88ad8226..4a8ef67c7 100644
--- a/include/arpa/nameser.h
+++ b/include/arpa/nameser.h
@@ -77,23 +77,15 @@
/*
* @(#)nameser.h 8.1 (Berkeley) 6/2/93
- * $Id: nameser.h,v 1.2 2000/05/14 06:07:30 erik Exp $
+ * $Id: nameser.h,v 1.3 2000/07/09 06:39:14 andersen Exp $
*/
-#ifndef _NAMESER_H_
-#define _NAMESER_H_
+#ifndef _ARPA_NAMESER_H
+#define _ARPA_NAMESER_H 1
+#include <features.h>
#include <sys/param.h>
-#if (!defined(BSD)) || (BSD < 199306)
-# include <sys/bitypes.h>
-#else
-# include <sys/types.h>
-#endif
-#include <sys/cdefs.h>
-
-#ifdef _AUX_SOURCE
-# include <sys/types.h>
-#endif
+#include <sys/types.h>
/*
* revision information. this is the release date in YYYYMMDD format.
@@ -254,46 +246,10 @@
#define CONV_BADCKSUM (-3)
#define CONV_BADBUFLEN (-4)
-#ifndef BYTE_ORDER
-#if (BSD >= 199103)
-# include <machine/endian.h>
-#else
-#ifdef __linux__
-# include <endian.h>
-#else
-#define LITTLE_ENDIAN 1234 /* least-significant byte first (vax, pc) */
-#define BIG_ENDIAN 4321 /* most-significant byte first (IBM, net) */
-#define PDP_ENDIAN 3412 /* LSB first in word, MSW first in long (pdp)*/
+/* glibc always has byte order info in <endian.h> */
+#include <endian.h>
-#if defined(vax) || defined(ns32000) || defined(sun386) || defined(i386) || \
- defined(MIPSEL) || defined(_MIPSEL) || defined(BIT_ZERO_ON_RIGHT) || \
- defined(__alpha__) || defined(__alpha)
-#define BYTE_ORDER LITTLE_ENDIAN
-#endif
-
-#if defined(sel) || defined(pyr) || defined(mc68000) || defined(sparc) || \
- defined(is68k) || defined(tahoe) || defined(ibm032) || defined(ibm370) || \
- defined(MIPSEB) || defined(_MIPSEB) || defined(_IBMR2) || defined(DGUX) ||\
- defined(apollo) || defined(__convex__) || defined(_CRAY) || \
- defined(__hppa) || defined(__hp9000) || \
- defined(__hp9000s300) || defined(__hp9000s700) || \
- defined (BIT_ZERO_ON_LEFT) || defined(m68k)
-#define BYTE_ORDER BIG_ENDIAN
-#endif
-#endif /* __linux__ */
-#endif /* BSD */
-#endif /* BYTE_ORDER */
-
-#if !defined(BYTE_ORDER) || \
- (BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN && \
- BYTE_ORDER != PDP_ENDIAN)
- /* you must determine what the correct bit order is for
- * your compiler - the next line is an intentional error
- * which will force your compiles to bomb until you fix
- * the above macros.
- */
- error "Undefined or invalid BYTE_ORDER";
-#endif
+__BEGIN_DECLS
/*
* Structure for query header. The order of the fields is machine- and
@@ -389,4 +345,6 @@ extern u_int32_t _getlong __P((const u_char *));
(cp) += INT32SZ; \
}
-#endif /* !_NAMESER_H_ */
+__END_DECLS
+
+#endif /* arpa/nameser.h */
diff --git a/include/arpa/telnet.h b/include/arpa/telnet.h
index 25085b89a..3309e5d72 100644
--- a/include/arpa/telnet.h
+++ b/include/arpa/telnet.h
@@ -34,7 +34,7 @@
*/
#ifndef _ARPA_TELNET_H
-#define _ARPA_TELNET_H
+#define _ARPA_TELNET_H 1
/*
* Definitions for the TELNET protocol.
@@ -96,7 +96,7 @@ extern char *telcmds[];
#define TELOPT_NAOVTS 14 /* negotiate about vertical tab stops */
#define TELOPT_NAOVTD 15 /* negotiate about vertical tab disposition */
#define TELOPT_NAOLFD 16 /* negotiate about output LF disposition */
-#define TELOPT_XASCII 17 /* extended ascic character set */
+#define TELOPT_XASCII 17 /* extended ascii character set */
#define TELOPT_LOGOUT 18 /* force logout */
#define TELOPT_BM 19 /* byte macro */
#define TELOPT_DET 20 /* data entry terminal */
@@ -200,7 +200,7 @@ char *telopts[NTELOPTS+1] = {
#define NSLC 18
/*
- * For backwards compatability, we define SLC_NAMES to be the
+ * For backwards compatibility, we define SLC_NAMES to be the
* list of names if SLC_NAMES is not defined.
*/
#define SLC_NAMELIST "0", "SYNCH", "BRK", "IP", "AO", "AYT", "EOR", \
@@ -237,7 +237,7 @@ extern char *slc_names[];
#define NEW_ENV_VAR 0
#define NEW_ENV_VALUE 1
#define ENV_ESC 2
-#define ENV_USERVAR 3
+#define ENV_USERVAR 3
/*
* AUTHENTICATION suboptions
@@ -316,4 +316,5 @@ extern char *enctype_names[];
#define ENCTYPE_NAME_OK(x) ((unsigned int)(x) < ENCTYPE_CNT)
#define ENCTYPE_NAME(x) enctype_names[x]
-#endif /* _ARPA_TELNET_H */
+
+#endif /* arpa/telnet.h */
diff --git a/include/arpa/tftp.h b/include/arpa/tftp.h
index 0904407c7..69187da08 100644
--- a/include/arpa/tftp.h
+++ b/include/arpa/tftp.h
@@ -56,9 +56,9 @@ struct tftphdr {
unsigned short tu_block; /* block # */
short tu_code; /* error code */
char tu_stuff[1]; /* request packet stuff */
- } th_u;
+ } __attribute__ ((__packed__)) th_u;
char th_data[1]; /* data or error string */
-};
+} __attribute__ ((__packed__));
#define th_block th_u.tu_block
#define th_code th_u.tu_code
diff --git a/include/netinet/ether.h b/include/netinet/ether.h
new file mode 100644
index 000000000..c7985d7ac
--- /dev/null
+++ b/include/netinet/ether.h
@@ -0,0 +1,54 @@
+/* Functions for storing Ethernet addresses in ASCII and mapping to hostnames.
+ Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _NETINET_ETHER_H
+#define _NETINET_ETHER_H 1
+
+#include <features.h>
+
+/* Get definition of `struct ether_addr'. */
+#include <netinet/if_ether.h>
+
+__BEGIN_DECLS
+
+/* Convert 48 bit Ethernet ADDRess to ASCII. */
+extern char *ether_ntoa __P ((__const struct ether_addr *__addr));
+extern char *ether_ntoa_r __P ((__const struct ether_addr *__addr,
+ char *__buf));
+
+/* Convert ASCII string S to 48 bit Ethernet address. */
+extern struct ether_addr *ether_aton __P ((__const char *__asc));
+extern struct ether_addr *ether_aton_r __P ((__const char *__asc,
+ struct ether_addr *__addr));
+
+/* Map 48 bit Ethernet number ADDR to HOSTNAME. */
+extern int ether_ntohost __P ((char *__hostname,
+ __const struct ether_addr *__addr));
+
+/* Map HOSTNAME to 48 bit Ethernet address. */
+extern int ether_hostton __P ((__const char *__hostname,
+ struct ether_addr *__addr));
+
+/* Scan LINE and set ADDR and HOSTNAME. */
+extern int ether_line __P ((__const char *__line, struct ether_addr *__addr,
+ char *__hostname));
+
+__END_DECLS
+
+#endif /* netinet/ether.h */
diff --git a/include/netinet/icmp6.h b/include/netinet/icmp6.h
new file mode 100644
index 000000000..5a3863953
--- /dev/null
+++ b/include/netinet/icmp6.h
@@ -0,0 +1,232 @@
+/* Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _NETINET_ICMP6_H
+#define _NETINET_ICMP6_H 1
+
+#include <inttypes.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+#define ICMP6_FILTER 1
+
+#define ICMP6_FILTER_BLOCK 1
+#define ICMP6_FILTER_PASS 2
+#define ICMP6_FILTER_BLOCKOTHERS 3
+#define ICMP6_FILTER_PASSONLY 4
+
+struct icmp6_filter
+ {
+ uint32_t data[8];
+ };
+
+struct icmp6_hdr
+ {
+ uint8_t icmp6_type; /* type field */
+ uint8_t icmp6_code; /* code field */
+ uint16_t icmp6_cksum; /* checksum field */
+ union
+ {
+ uint32_t icmp6_un_data32[1]; /* type-specific field */
+ uint16_t icmp6_un_data16[2]; /* type-specific field */
+ uint8_t icmp6_un_data8[4]; /* type-specific field */
+ } icmp6_dataun;
+ };
+
+#define icmp6_data32 icmp6_dataun.icmp6_un_data32
+#define icmp6_data16 icmp6_dataun.icmp6_un_data16
+#define icmp6_data8 icmp6_dataun.icmp6_un_data8
+#define icmp6_pptr icmp6_data32[0] /* parameter prob */
+#define icmp6_mtu icmp6_data32[0] /* packet too big */
+#define icmp6_id icmp6_data16[0] /* echo request/reply */
+#define icmp6_seq icmp6_data16[1] /* echo request/reply */
+#define icmp6_maxdelay icmp6_data16[0] /* mcast group membership */
+
+#define ICMP6_DST_UNREACH 1
+#define ICMP6_PACKET_TOO_BIG 2
+#define ICMP6_TIME_EXCEEDED 3
+#define ICMP6_PARAM_PROB 4
+
+#define ICMP6_INFOMSG_MASK 0x80 /* all informational messages */
+
+#define ICMP6_ECHO_REQUEST 128
+#define ICMP6_ECHO_REPLY 129
+#define ICMP6_MEMBERSHIP_QUERY 130
+#define ICMP6_MEMBERSHIP_REPORT 131
+#define ICMP6_MEMBERSHIP_REDUCTION 132
+
+#define ICMP6_DST_UNREACH_NOROUTE 0 /* no route to destination */
+#define ICMP6_DST_UNREACH_ADMIN 1 /* communication with destination */
+ /* administratively prohibited */
+#define ICMP6_DST_UNREACH_NOTNEIGHBOR 2 /* not a neighbor */
+#define ICMP6_DST_UNREACH_ADDR 3 /* address unreachable */
+#define ICMP6_DST_UNREACH_NOPORT 4 /* bad port */
+
+#define ICMP6_TIME_EXCEED_TRANSIT 0 /* Hop Limit == 0 in transit */
+#define ICMP6_TIME_EXCEED_REASSEMBLY 1 /* Reassembly time out */
+
+#define ICMP6_PARAMPROB_HEADER 0 /* erroneous header field */
+#define ICMP6_PARAMPROB_NEXTHEADER 1 /* unrecognized Next Header */
+#define ICMP6_PARAMPROB_OPTION 2 /* unrecognized IPv6 option */
+
+#define ICMP6_FILTER_WILLPASS(type, filterp) \
+ ((((filterp)->data[(type) >> 5]) & (1 << ((type) & 31))) == 0)
+
+#define ICMP6_FILTER_WILLBLOCK(type, filterp) \
+ ((((filterp)->data[(type) >> 5]) & (1 << ((type) & 31))) != 0)
+
+#define ICMP6_FILTER_SETPASS(type, filterp) \
+ ((((filterp)->data[(type) >> 5]) &= ~(1 << ((type) & 31))))
+
+#define ICMP6_FILTER_SETBLOCK(type, filterp) \
+ ((((filterp)->data[(type) >> 5]) |= (1 << ((type) & 31))))
+
+#define ICMP6_FILTER_SETPASSALL(filterp) \
+ memset (filterp, 0, sizeof (struct icmp6_filter));
+
+#define ICMP6_FILTER_SETBLOCKALL(filterp) \
+ memset (filterp, 0xFF, sizeof (struct icmp6_filter));
+
+#define ND_ROUTER_SOLICIT 133
+#define ND_ROUTER_ADVERT 134
+#define ND_NEIGHBOR_SOLICIT 135
+#define ND_NEIGHBOR_ADVERT 136
+#define ND_REDIRECT 137
+
+struct nd_router_solicit /* router solicitation */
+ {
+ struct icmp6_hdr nd_rs_hdr;
+ /* could be followed by options */
+ };
+
+#define nd_rs_type nd_rs_hdr.icmp6_type
+#define nd_rs_code nd_rs_hdr.icmp6_code
+#define nd_rs_cksum nd_rs_hdr.icmp6_cksum
+#define nd_rs_reserved nd_rs_hdr.icmp6_data32[0]
+
+struct nd_router_advert /* router advertisement */
+ {
+ struct icmp6_hdr nd_ra_hdr;
+ uint32_t nd_ra_reachable; /* reachable time */
+ uint32_t nd_ra_retransmit; /* retransmit timer */
+ /* could be followed by options */
+ };
+
+#define nd_ra_type nd_ra_hdr.icmp6_type
+#define nd_ra_code nd_ra_hdr.icmp6_code
+#define nd_ra_cksum nd_ra_hdr.icmp6_cksum
+#define nd_ra_curhoplimit nd_ra_hdr.icmp6_data8[0]
+#define nd_ra_flags_reserved nd_ra_hdr.icmp6_data8[1]
+#define ND_RA_FLAG_MANAGED 0x80
+#define ND_RA_FLAG_OTHER 0x40
+#define nd_ra_router_lifetime nd_ra_hdr.icmp6_data16[1]
+
+struct nd_neighbor_solicit /* neighbor solicitation */
+ {
+ struct icmp6_hdr nd_ns_hdr;
+ struct in6_addr nd_ns_target; /* target address */
+ /* could be followed by options */
+ };
+
+#define nd_ns_type nd_ns_hdr.icmp6_type
+#define nd_ns_code nd_ns_hdr.icmp6_code
+#define nd_ns_cksum nd_ns_hdr.icmp6_cksum
+#define nd_ns_reserved nd_ns_hdr.icmp6_data32[0]
+
+struct nd_neighbor_advert /* neighbor advertisement */
+ {
+ struct icmp6_hdr nd_na_hdr;
+ struct in6_addr nd_na_target; /* target address */
+ /* could be followed by options */
+ };
+
+#define nd_na_type nd_na_hdr.icmp6_type
+#define nd_na_code nd_na_hdr.icmp6_code
+#define nd_na_cksum nd_na_hdr.icmp6_cksum
+#define nd_na_flags_reserved nd_na_hdr.icmp6_data32[0]
+#if BYTE_ORDER == BIG_ENDIAN
+#define ND_NA_FLAG_ROUTER 0x80000000
+#define ND_NA_FLAG_SOLICITED 0x40000000
+#define ND_NA_FLAG_OVERRIDE 0x20000000
+#else /* BYTE_ORDER == LITTLE_ENDIAN */
+#define ND_NA_FLAG_ROUTER 0x00000080
+#define ND_NA_FLAG_SOLICITED 0x00000040
+#define ND_NA_FLAG_OVERRIDE 0x00000020
+#endif
+
+struct nd_redirect /* redirect */
+ {
+ struct icmp6_hdr nd_rd_hdr;
+ struct in6_addr nd_rd_target; /* target address */
+ struct in6_addr nd_rd_dst; /* destination address */
+ /* could be followed by options */
+ };
+
+#define nd_rd_type nd_rd_hdr.icmp6_type
+#define nd_rd_code nd_rd_hdr.icmp6_code
+#define nd_rd_cksum nd_rd_hdr.icmp6_cksum
+#define nd_rd_reserved nd_rd_hdr.icmp6_data32[0]
+
+struct nd_opt_hdr /* Neighbor discovery option header */
+ {
+ uint8_t nd_opt_type;
+ uint8_t nd_opt_len; /* in units of 8 octets */
+ /* followed by option specific data */
+ };
+
+#define ND_OPT_SOURCE_LINKADDR 1
+#define ND_OPT_TARGET_LINKADDR 2
+#define ND_OPT_PREFIX_INFORMATION 3
+#define ND_OPT_REDIRECTED_HEADER 4
+#define ND_OPT_MTU 5
+
+struct nd_opt_prefix_info /* prefix information */
+ {
+ uint8_t nd_opt_pi_type;
+ uint8_t nd_opt_pi_len;
+ uint8_t nd_opt_pi_prefix_len;
+ uint8_t nd_opt_pi_flags_reserved;
+ uint32_t nd_opt_pi_valid_time;
+ uint32_t nd_opt_pi_preferred_time;
+ uint32_t nd_opt_pi_reserved2;
+ struct in6_addr nd_opt_pi_prefix;
+ };
+
+#define ND_OPT_PI_FLAG_ONLINK 0x80
+#define ND_OPT_PI_FLAG_AUTO 0x40
+
+struct nd_opt_rd_hdr /* redirected header */
+ {
+ uint8_t nd_opt_rh_type;
+ uint8_t nd_opt_rh_len;
+ uint16_t nd_opt_rh_reserved1;
+ uint32_t nd_opt_rh_reserved2;
+ /* followed by IP header and data */
+ };
+
+struct nd_opt_mtu /* MTU option */
+ {
+ uint8_t nd_opt_mtu_type;
+ uint8_t nd_opt_mtu_len;
+ uint16_t nd_opt_mtu_reserved;
+ uint32_t nd_opt_mtu_mtu;
+ };
+
+
+#endif /* netinet/icmpv6.h */
diff --git a/include/netinet/if_ether.h b/include/netinet/if_ether.h
new file mode 100644
index 000000000..7194490f5
--- /dev/null
+++ b/include/netinet/if_ether.h
@@ -0,0 +1,109 @@
+/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef __NETINET_IF_ETHER_H
+
+#define __NETINET_IF_ETHER_H 1
+#include <features.h>
+#include <sys/types.h>
+
+/* Get definitions from kernel header file. */
+#include <linux/if_ether.h>
+
+#ifdef __USE_BSD
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)if_ether.h 8.3 (Berkeley) 5/2/95
+ * $FreeBSD$
+ */
+
+#include <net/ethernet.h>
+#include <net/if_arp.h>
+
+__BEGIN_DECLS
+/*
+ * Ethernet Address Resolution Protocol.
+ *
+ * See RFC 826 for protocol description. Structure below is adapted
+ * to resolving internet addresses. Field names used correspond to
+ * RFC 826.
+ */
+struct ether_arp {
+ struct arphdr ea_hdr; /* fixed-size header */
+ u_int8_t arp_sha[ETH_ALEN]; /* sender hardware address */
+ u_int8_t arp_spa[4]; /* sender protocol address */
+ u_int8_t arp_tha[ETH_ALEN]; /* target hardware address */
+ u_int8_t arp_tpa[4]; /* target protocol address */
+};
+#define arp_hrd ea_hdr.ar_hrd
+#define arp_pro ea_hdr.ar_pro
+#define arp_hln ea_hdr.ar_hln
+#define arp_pln ea_hdr.ar_pln
+#define arp_op ea_hdr.ar_op
+
+/*
+ * Macro to map an IP multicast address to an Ethernet multicast address.
+ * The high-order 25 bits of the Ethernet address are statically assigned,
+ * and the low-order 23 bits are taken from the low end of the IP address.
+ */
+#define ETHER_MAP_IP_MULTICAST(ipaddr, enaddr) \
+ /* struct in_addr *ipaddr; */ \
+ /* u_char enaddr[ETH_ALEN]; */ \
+{ \
+ (enaddr)[0] = 0x01; \
+ (enaddr)[1] = 0x00; \
+ (enaddr)[2] = 0x5e; \
+ (enaddr)[3] = ((u_int8_t *)ipaddr)[1] & 0x7f; \
+ (enaddr)[4] = ((u_int8_t *)ipaddr)[2]; \
+ (enaddr)[5] = ((u_int8_t *)ipaddr)[3]; \
+}
+
+__END_DECLS
+#endif /* __USE_BSD */
+
+#endif /* netinet/if_ether.h */
diff --git a/include/netinet/if_fddi.h b/include/netinet/if_fddi.h
new file mode 100644
index 000000000..d5d6dbdbe
--- /dev/null
+++ b/include/netinet/if_fddi.h
@@ -0,0 +1,37 @@
+/* Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _NETINET_IF_FDDI_H
+#define _NETINET_IF_FDDI_H 1
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <asm/types.h>
+
+#include <linux/if_fddi.h>
+
+#ifdef __USE_BSD
+
+struct fddi_header {
+ u_int8_t fddi_fc; /* Frame Control (FC) value */
+ u_int8_t fddi_dhost[FDDI_K_ALEN]; /* Destination host */
+ u_int8_t fddi_shost[FDDI_K_ALEN]; /* Source host */
+};
+#endif
+
+#endif /* netinet/if_fddi.h */
diff --git a/include/netinet/if_tr.h b/include/netinet/if_tr.h
new file mode 100644
index 000000000..3c1be2102
--- /dev/null
+++ b/include/netinet/if_tr.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _NETINET_IF_TR_H
+#define _NETINET_IF_TR_H 1
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <asm/types.h>
+
+#include <linux/if_tr.h>
+
+#ifdef __USE_BSD
+
+struct trn_hdr {
+ u_int8_t trn_ac; /* access control field */
+ u_int8_t trn_fc; /* field control field */
+ u_int8_t trn_dhost[TR_ALEN]; /* destination host */
+ u_int8_t trn_shost[TR_ALEN]; /* source host */
+ u_int16_t trn_rcf; /* route control field */
+ u_int16_t trn_rseg[8]; /* routing registers */
+};
+
+#endif
+
+#endif /* netinet/if_tr.h */
diff --git a/include/netinet/igmp.h b/include/netinet/igmp.h
index 4525630e1..7a6ed6e8c 100644
--- a/include/netinet/igmp.h
+++ b/include/netinet/igmp.h
@@ -1,24 +1,93 @@
+/* Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
#ifndef _NETINET_IGMP_H
-#define _NETINET_IGMP_H
+#define _NETINET_IGMP_H 1
+#include <sys/cdefs.h>
+#include <sys/types.h>
+
+#include <asm/types.h>
#include <linux/igmp.h>
-#ifdef __BSD_SOURCE
+#ifdef __USE_BSD
+
+#include <netinet/in.h>
+
+__BEGIN_DECLS
+
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)igmp.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
-struct igmp
-{
- __u8 igmp_type;
- __u8 igmp_code;
- __u16 igmp_cksum;
- struct in_addr igmp_group;
+struct igmp {
+ u_int8_t igmp_type; /* IGMP type */
+ u_int8_t igmp_code; /* routing code */
+ u_int16_t igmp_cksum; /* checksum */
+ struct in_addr igmp_group; /* group address */
};
-#define IGMP_MINLEN 8
-#define IGMP_MAX_HOST_REPORT_DELAY 10
-#define IGMP_TIMER_SCALE 10
+/*
+ * Message types, including version number.
+ */
+#define IGMP_MEMBERSHIP_QUERY 0x11 /* membership query */
+#define IGMP_V1_MEMBERSHIP_REPORT 0x12 /* Ver. 1 membership report */
+#define IGMP_V2_MEMBERSHIP_REPORT 0x16 /* Ver. 2 membership report */
+#define IGMP_V2_LEAVE_GROUP 0x17 /* Leave-group message */
-#define IGMP_AGE_THRESHOLD 540
+__END_DECLS
#endif
-#endif /* _NETINET_IGMP_H */
+#endif /* netinet/igmp.h */
diff --git a/include/netinet/in.h b/include/netinet/in.h
index d666c67ac..3dea92632 100644
--- a/include/netinet/in.h
+++ b/include/netinet/in.h
@@ -1,29 +1,66 @@
-/* Copyright (C) 1991 Free Software Foundation, Inc.
-This file is part of the GNU C Library.
+/* Copyright (C) 1991,92,93,94,95,96,97,98,99 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
-The GNU C Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 1, or (at your option)
-any later version.
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
-The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with the GNU C Library; see the file COPYING. If not, write to
-the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
#ifndef _NETINET_IN_H
-
#define _NETINET_IN_H 1
+
#include <features.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include <sys/types.h>
+#include <bits/socket.h>
-#include <sys/socket.h>
__BEGIN_DECLS
+/* Standard well-defined IP protocols. */
+enum
+ {
+ IPPROTO_IP = 0, /* Dummy protocol for TCP. */
+ IPPROTO_HOPOPTS = 0, /* IPv6 Hop-by-Hop options. */
+ IPPROTO_ICMP = 1, /* Internet Control Message Protocol. */
+ IPPROTO_IGMP = 2, /* Internet Group Management Protocol. */
+ IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94). */
+ IPPROTO_TCP = 6, /* Transmission Control Protocol. */
+ IPPROTO_EGP = 8, /* Exterior Gateway Protocol. */
+ IPPROTO_PUP = 12, /* PUP protocol. */
+ IPPROTO_UDP = 17, /* User Datagram Protocol. */
+ IPPROTO_IDP = 22, /* XNS IDP protocol. */
+ IPPROTO_TP = 29, /* SO Transport Protocol Class 4. */
+ IPPROTO_IPV6 = 41, /* IPv6 header. */
+ IPPROTO_ROUTING = 43, /* IPv6 routing header. */
+ IPPROTO_FRAGMENT = 44, /* IPv6 fragmentation header. */
+ IPPROTO_RSVP = 46, /* Reservation Protocol. */
+ IPPROTO_GRE = 47, /* General Routing Encapsulation. */
+ IPPROTO_ESP = 50, /* encapsulating security payload. */
+ IPPROTO_AH = 51, /* authentication header. */
+ IPPROTO_ICMPV6 = 58, /* ICMPv6. */
+ IPPROTO_NONE = 59, /* IPv6 no next header. */
+ IPPROTO_DSTOPTS = 60, /* IPv6 destination options. */
+ IPPROTO_MTP = 92, /* Multicast Transport Protocol. */
+ IPPROTO_ENCAP = 98, /* Encapsulation Header. */
+ IPPROTO_PIM = 103, /* Protocol Independent Multicast. */
+ IPPROTO_COMP = 108, /* Compression Header Protocol. */
+ IPPROTO_RAW = 255, /* Raw IP packets. */
+ IPPROTO_MAX
+ };
+
/* Standard well-known ports. */
enum
{
@@ -65,23 +102,217 @@ enum
};
-/* Link numbers. */
-#define IMPLINK_IP 155
-#define IMPLINK_LOWEXPER 156
-#define IMPLINK_HIGHEXPER 158
+/* Internet address. */
+struct in_addr
+ {
+ uint32_t s_addr;
+ };
+
+
+/* Definitions of the bits in an Internet address integer.
+
+ On subnets, host and network parts are found according to
+ the subnet mask, not these masks. */
+
+#define IN_CLASSA(a) ((((uint32_t) (a)) & 0x80000000) == 0)
+#define IN_CLASSA_NET 0xff000000
+#define IN_CLASSA_NSHIFT 24
+#define IN_CLASSA_HOST (0xffffffff & ~IN_CLASSA_NET)
+#define IN_CLASSA_MAX 128
+
+#define IN_CLASSB(a) ((((uint32_t) (a)) & 0xc0000000) == 0x80000000)
+#define IN_CLASSB_NET 0xffff0000
+#define IN_CLASSB_NSHIFT 16
+#define IN_CLASSB_HOST (0xffffffff & ~IN_CLASSB_NET)
+#define IN_CLASSB_MAX 65536
+
+#define IN_CLASSC(a) ((((uint32_t) (a)) & 0xe0000000) == 0xc0000000)
+#define IN_CLASSC_NET 0xffffff00
+#define IN_CLASSC_NSHIFT 8
+#define IN_CLASSC_HOST (0xffffffff & ~IN_CLASSC_NET)
+
+#define IN_CLASSD(a) ((((uint32_t) (a)) & 0xf0000000) == 0xe0000000)
+#define IN_MULTICAST(a) IN_CLASSD(a)
+
+#define IN_EXPERIMENTAL(a) ((((uint32_t) (a)) & 0xe0000000) == 0xe0000000)
+#define IN_BADCLASS(a) ((((uint32_t) (a)) & 0xf0000000) == 0xf0000000)
+
+/* Address to accept any incoming messages. */
+#define INADDR_ANY ((uint32_t) 0x00000000)
+/* Address to send to all hosts. */
+#define INADDR_BROADCAST ((uint32_t) 0xffffffff)
+/* Address indicating an error return. */
+#define INADDR_NONE ((uint32_t) 0xffffffff)
+
+/* Network number for local host loopback. */
+#define IN_LOOPBACKNET 127
+/* Address to loopback in software to local host. */
+#ifndef INADDR_LOOPBACK
+# define INADDR_LOOPBACK ((uint32_t) 0x7f000001) /* Inet 127.0.0.1. */
+#endif
+
+/* Defines for Multicast INADDR. */
+#define INADDR_UNSPEC_GROUP ((uint32_t) 0xe0000000) /* 224.0.0.0 */
+#define INADDR_ALLHOSTS_GROUP ((uint32_t) 0xe0000001) /* 224.0.0.1 */
+#define INADDR_ALLRTRS_GROUP ((uint32_t) 0xe0000002) /* 224.0.0.2 */
+#define INADDR_MAX_LOCAL_GROUP ((uint32_t) 0xe00000ff) /* 224.0.0.255 */
+
+
+/* IPv6 address */
+struct in6_addr
+ {
+ union
+ {
+ uint8_t u6_addr8[16];
+ uint16_t u6_addr16[8];
+ uint32_t u6_addr32[4];
+#if ULONG_MAX > 0xffffffff
+ uint64_t u6_addr64[2];
+#endif
+ } in6_u;
+#define s6_addr in6_u.u6_addr8
+#define s6_addr16 in6_u.u6_addr16
+#define s6_addr32 in6_u.u6_addr32
+#define s6_addr64 in6_u.u6_addr64
+ };
+
+extern const struct in6_addr in6addr_any; /* :: */
+extern const struct in6_addr in6addr_loopback; /* ::1 */
+#define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } }
+#define IN6ADDR_LOOPBACK_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } }
+
+#define INET_ADDRSTRLEN 16
+#define INET6_ADDRSTRLEN 46
+
+/* Get the definition of the macro to define the common sockaddr members. */
+#include <bits/sockaddr.h>
+
+
+/* Structure describing an Internet socket address. */
+struct sockaddr_in
+ {
+ __SOCKADDR_COMMON (sin_);
+ uint16_t sin_port; /* Port number. */
+ struct in_addr sin_addr; /* Internet address. */
+
+ /* Pad to size of `struct sockaddr'. */
+ unsigned char sin_zero[sizeof (struct sockaddr) -
+ __SOCKADDR_COMMON_SIZE -
+ sizeof (uint16_t) -
+ sizeof (struct in_addr)];
+ };
+
+/* Ditto, for IPv6. */
+struct sockaddr_in6
+ {
+ __SOCKADDR_COMMON (sin6_);
+ uint16_t sin6_port; /* Transport layer port # */
+ uint32_t sin6_flowinfo; /* IPv6 flow information */
+ struct in6_addr sin6_addr; /* IPv6 address */
+ };
+
+/* IPv6 multicast request. */
+struct ipv6_mreq
+ {
+ /* IPv6 multicast address of group */
+ struct in6_addr ipv6mr_multiaddr;
+
+ /* local interface */
+ unsigned int ipv6mr_interface;
+ };
+
+/* Get system-specific definitions. */
+#include <bits/in.h>
+
+/* Functions to convert between host and network byte order.
+
+ Please note that these functions normally take `unsigned long int' or
+ `unsigned short int' values as arguments and also return them. But
+ this was a short-sighted decision since on different systems the types
+ may have different representations but the values are always the same. */
+
+extern uint32_t ntohl __P ((uint32_t __netlong));
+extern uint16_t ntohs __P ((uint16_t __netshort));
+extern uint32_t htonl __P ((uint32_t __hostlong));
+extern uint16_t htons __P ((uint16_t __hostshort));
+
+#include <endian.h>
+
+/* Get machine dependent optimized versions of byte swapping functions. */
+#include <bits/byteswap.h>
+
+#if __BYTE_ORDER == __BIG_ENDIAN && defined __OPTIMIZE__
+/* The host byte order is the same as network byte order,
+ so these functions are all just identity. */
+# define ntohl(x) (x)
+# define ntohs(x) (x)
+# define htonl(x) (x)
+# define htons(x) (x)
+#else
+# if __BYTE_ORDER == __LITTLE_ENDIAN && defined __OPTIMIZE__
+# define ntohl(x) __bswap_32 (x)
+# define ntohs(x) __bswap_16 (x)
+# define htonl(x) __bswap_32 (x)
+# define htons(x) __bswap_16 (x)
+# endif
+#endif
+
+#define IN6_IS_ADDR_UNSPECIFIED(a) \
+ (((uint32_t *) (a))[0] == 0 && ((uint32_t *) (a))[1] == 0 && \
+ ((uint32_t *) (a))[2] == 0 && ((uint32_t *) (a))[3] == 0)
+#define IN6_IS_ADDR_LOOPBACK(a) \
+ (((uint32_t *) (a))[0] == 0 && ((uint32_t *) (a))[1] == 0 && \
+ ((uint32_t *) (a))[2] == 0 && ((uint32_t *) (a))[3] == htonl (1))
-/*
- * Many other definitions have been moved to <linux/in.h>,
- * because several parts of the kernel need them. -FvK
- */
-#include <linux/in.h>
+#define IN6_IS_ADDR_MULTICAST(a) (((u_int8_t *) (a))[0] == 0xff)
-/*
- * Bind a socket to a privileged IP port
- */
-extern int bindresvport __P ((int __sockfd,
- struct sockaddr_in * __sin));
+#define IN6_IS_ADDR_LINKLOCAL(a) \
+ ((((uint32_t *) (a))[0] & htonl (0xffc00000)) == htonl (0xfe800000))
+
+#define IN6_IS_ADDR_SITELOCAL(a) \
+ ((((uint32_t *) (a))[0] & htonl (0xffc00000)) == htonl (0xfec00000))
+
+#define IN6_IS_ADDR_V4MAPPED(a) \
+ ((((uint32_t *) (a))[0] == 0) && (((uint32_t *) (a))[1] == 0) && \
+ (((uint32_t *) (a))[2] == htonl (0xffff)))
+
+#define IN6_IS_ADDR_V4COMPAT(a) \
+ ((((uint32_t *) (a))[0] == 0) && (((uint32_t *) (a))[1] == 0) && \
+ (((uint32_t *) (a))[2] == 0) && (ntohl (((uint32_t *) (a))[3]) > 1))
+
+#define IN6_ARE_ADDR_EQUAL(a,b) \
+ ((((uint32_t *) (a))[0] == ((uint32_t *) (b))[0]) && \
+ (((uint32_t *) (a))[1] == ((uint32_t *) (b))[2]) && \
+ (((uint32_t *) (a))[2] == ((uint32_t *) (b))[1]) && \
+ (((uint32_t *) (a))[3] == ((uint32_t *) (b))[3]))
+
+/* Bind socket to a privileged IP port. */
+extern int bindresvport __P ((int __sockfd, struct sockaddr_in *__sock_in));
+
+
+
+#define IN6_IS_ADDR_MC_NODELOCAL(a) \
+ (IN6_IS_ADDR_MULTICAST(a) && ((((u_int8_t *) (a))[1] & 0xf) == 0x1))
+
+#define IN6_IS_ADDR_MC_LINKLOCAL(a) \
+ (IN6_IS_ADDR_MULTICAST(a) && ((((u_int8_t *) (a))[1] & 0xf) == 0x2))
+
+#define IN6_IS_ADDR_MC_SITELOCAL(a) \
+ (IN6_IS_ADDR_MULTICAST(a) && ((((u_int8_t *) (a))[1] & 0xf) == 0x5))
+
+#define IN6_IS_ADDR_MC_ORGLOCAL(a) \
+ (IN6_IS_ADDR_MULTICAST(a) && ((((u_int8_t *) (a))[1] & 0xf) == 0x8))
+
+#define IN6_IS_ADDR_MC_GLOBAL(a) \
+ (IN6_IS_ADDR_MULTICAST(a) && ((((u_int8_t *) (a))[1] & 0xf) == 0xe))
+
+/* IPv6 packet information. */
+struct in6_pktinfo
+ {
+ struct in6_addr ipi6_addr; /* src/dst IPv6 address */
+ unsigned int ipi6_ifindex; /* send/recv interface index */
+ };
__END_DECLS
diff --git a/include/netinet/in_systm.h b/include/netinet/in_systm.h
index f481c5511..902fe6ea0 100644
--- a/include/netinet/in_systm.h
+++ b/include/netinet/in_systm.h
@@ -1 +1,41 @@
-#include <linux/in_systm.h>
+/* System specific type definitions for networking code.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _NETINET_IN_SYSTM_H
+#define _NETINET_IN_SYSTM_H 1
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+
+__BEGIN_DECLS
+
+/*
+ * Network order versions of various data types. Unfortunately, BSD
+ * assumes specific sizes for shorts (16 bit) and longs (32 bit) which
+ * don't hold in general. As a consequence, the network order versions
+ * may not reflect the actual size of the native data types.
+ */
+
+typedef u_int16_t n_short; /* short as received from the net */
+typedef u_int32_t n_long; /* long as received from the net */
+typedef u_int32_t n_time; /* ms since 00:00 GMT, byte rev */
+
+__END_DECLS
+
+#endif /* netinet/in_systm.h */
diff --git a/include/netinet/ip.h b/include/netinet/ip.h
index a6c49b6af..246a56a5d 100644
--- a/include/netinet/ip.h
+++ b/include/netinet/ip.h
@@ -1,39 +1,276 @@
-#ifndef _NETINET_IP_H
-#define _NETINET_IP_H
+/* Copyright (C) 1991, 92, 93, 95, 96, 97, 98 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef __NETINET_IP_H
+#define __NETINET_IP_H 1
#include <features.h>
+#include <sys/types.h>
+
#include <netinet/in.h>
-#include <linux/ip.h>
-#ifdef _BSD_SOURCE
+__BEGIN_DECLS
+
+struct timestamp
+ {
+ u_int8_t len;
+ u_int8_t ptr;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ unsigned int flags:4;
+ unsigned int overflow:4;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ unsigned int overflow:4;
+ unsigned int flags:4;
+#else
+# error "Please fix <bits/endian.h>"
+#endif
+ u_int32_t data[9];
+ };
+
+struct ip_options
+ {
+ u_int32_t faddr; /* Saved first hop address */
+ u_int8_t optlen;
+ u_int8_t srr;
+ u_int8_t rr;
+ u_int8_t ts;
+ unsigned int is_setbyuser:1; /* Set by setsockopt? */
+ unsigned int is_data:1; /* Options in __data, rather than skb */
+ unsigned int is_strictroute:1; /* Strict source route */
+ unsigned int srr_is_hit:1; /* Packet destination addr was our one */
+ unsigned int is_changed:1; /* IP checksum more not valid */
+ unsigned int rr_needaddr:1; /* Need to record addr of outgoing dev */
+ unsigned int ts_needtime:1; /* Need to record timestamp */
+ unsigned int ts_needaddr:1; /* Need to record addr of outgoing dev */
+ u_int8_t router_alert;
+ u_int8_t __pad1;
+ u_int8_t __pad2;
+#ifdef __GNUC__
+ u_int8_t __data[0];
+#endif
+ };
+
+struct iphdr
+ {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ unsigned int ihl:4;
+ unsigned int version:4;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ unsigned int version:4;
+ unsigned int ihl:4;
+#else
+# error "Please fix <bits/endian.h>"
+#endif
+ u_int8_t tos;
+ u_int16_t tot_len;
+ u_int16_t id;
+ u_int16_t frag_off;
+ u_int8_t ttl;
+ u_int8_t protocol;
+ u_int16_t check;
+ u_int32_t saddr;
+ u_int32_t daddr;
+ /*The options start here. */
+ };
+#ifdef __USE_BSD
/*
- * BSD has the following structure
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for internet protocol version 4.
+ * Per RFC 791, September 1981.
+ */
+
+/*
+ * Structure of an internet header, naked of options.
*/
-
struct ip
-{
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u8 ip_hl:4,
- ip_v:4;
-#else
- __u8 ip_v:4,
- ip_hl:4;
+ {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ unsigned int ip_hl:4; /* header length */
+ unsigned int ip_v:4; /* version */
+#endif
+#if __BYTE_ORDER == __BIG_ENDIAN
+ unsigned int ip_v:4; /* version */
+ unsigned int ip_hl:4; /* header length */
#endif
-#define IPVERSION 4
- __u8 ip_tos;
- __u16 ip_len;
- __u16 ip_id;
- __u16 ip_off;
- __u8 ip_ttl;
- __u8 ip_p;
- __u16 ip_csum;
- struct in_addr ip_src,ip_dst;
-};
-
-#define IP_DF 0x4000 /* dont fragment flag */
-#define IP_MF 0x2000 /* more fragments flag */
+ u_int8_t ip_tos; /* type of service */
+ u_short ip_len; /* total length */
+ u_short ip_id; /* identification */
+ u_short ip_off; /* fragment offset field */
+#define IP_RF 0x8000 /* reserved fragment flag */
+#define IP_DF 0x4000 /* dont fragment flag */
+#define IP_MF 0x2000 /* more fragments flag */
+#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */
+ u_int8_t ip_ttl; /* time to live */
+ u_int8_t ip_p; /* protocol */
+ u_short ip_sum; /* checksum */
+ struct in_addr ip_src, ip_dst; /* source and dest address */
+ };
+/*
+ * Time stamp option structure.
+ */
+struct ip_timestamp
+ {
+ u_int8_t ipt_code; /* IPOPT_TS */
+ u_int8_t ipt_len; /* size of structure (variable) */
+ u_int8_t ipt_ptr; /* index of current entry */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ unsigned int ipt_flg:4; /* flags, see below */
+ unsigned int ipt_oflw:4; /* overflow counter */
+#endif
+#if __BYTE_ORDER == __BIG_ENDIAN
+ unsigned int ipt_oflw:4; /* overflow counter */
+ unsigned int ipt_flg:4; /* flags, see below */
#endif
+ u_int32_t data[9];
+ };
+#endif /* __USE_BSD */
+
+#define IPVERSION 4 /* IP version number */
+#define IP_MAXPACKET 65535 /* maximum packet size */
+
+/*
+ * Definitions for IP type of service (ip_tos)
+ */
+#define IPTOS_TOS_MASK 0x1E
+#define IPTOS_TOS(tos) ((tos) & IPTOS_TOS_MASK)
+#define IPTOS_LOWDELAY 0x10
+#define IPTOS_THROUGHPUT 0x08
+#define IPTOS_RELIABILITY 0x04
+#define IPTOS_LOWCOST 0x02
+#define IPTOS_MINCOST IPTOS_LOWCOST
+
+/*
+ * Definitions for IP precedence (also in ip_tos) (hopefully unused)
+ */
+#define IPTOS_PREC_MASK 0xe0
+#define IPTOS_PREC(tos) ((tos) & IPTOS_PREC_MASK)
+#define IPTOS_PREC_NETCONTROL 0xe0
+#define IPTOS_PREC_INTERNETCONTROL 0xc0
+#define IPTOS_PREC_CRITIC_ECP 0xa0
+#define IPTOS_PREC_FLASHOVERRIDE 0x80
+#define IPTOS_PREC_FLASH 0x60
+#define IPTOS_PREC_IMMEDIATE 0x40
+#define IPTOS_PREC_PRIORITY 0x20
+#define IPTOS_PREC_ROUTINE 0x00
+
+/*
+ * Definitions for options.
+ */
+#define IPOPT_COPY 0x80
+#define IPOPT_CLASS_MASK 0x60
+#define IPOPT_NUMBER_MASK 0x1f
+
+#define IPOPT_COPIED(o) ((o) & IPOPT_COPY)
+#define IPOPT_CLASS(o) ((o) & IPOPT_CLASS_MASK)
+#define IPOPT_NUMBER(o) ((o) & IPOPT_NUMBER_MASK)
+
+#define IPOPT_CONTROL 0x00
+#define IPOPT_RESERVED1 0x20
+#define IPOPT_DEBMEAS 0x40
+#define IPOPT_MEASUREMENT IPOPT_DEBMEAS
+#define IPOPT_RESERVED2 0x60
+
+#define IPOPT_EOL 0 /* end of option list */
+#define IPOPT_END IPOPT_EOL
+#define IPOPT_NOP 1 /* no operation */
+#define IPOPT_NOOP IPOPT_NOP
+
+#define IPOPT_RR 7 /* record packet route */
+#define IPOPT_TS 68 /* timestamp */
+#define IPOPT_TIMESTAMP IPOPT_TS
+#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */
+#define IPOPT_SEC IPOPT_SECURITY
+#define IPOPT_LSRR 131 /* loose source route */
+#define IPOPT_SATID 136 /* satnet id */
+#define IPOPT_SID IPOPT_SATID
+#define IPOPT_SSRR 137 /* strict source route */
+#define IPOPT_RA 148 /* router alert */
+
+/*
+ * Offsets to fields in options other than EOL and NOP.
+ */
+#define IPOPT_OPTVAL 0 /* option ID */
+#define IPOPT_OLEN 1 /* option length */
+#define IPOPT_OFFSET 2 /* offset within option */
+#define IPOPT_MINOFF 4 /* min value of above */
+
+#define MAX_IPOPTLEN 40
+
+/* flag bits for ipt_flg */
+#define IPOPT_TS_TSONLY 0 /* timestamps only */
+#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */
+#define IPOPT_TS_PRESPEC 3 /* specified modules only */
+
+/* bits for security (not byte swapped) */
+#define IPOPT_SECUR_UNCLASS 0x0000
+#define IPOPT_SECUR_CONFID 0xf135
+#define IPOPT_SECUR_EFTO 0x789a
+#define IPOPT_SECUR_MMMM 0xbc4d
+#define IPOPT_SECUR_RESTR 0xaf13
+#define IPOPT_SECUR_SECRET 0xd788
+#define IPOPT_SECUR_TOPSECRET 0x6bc5
+
+/*
+ * Internet implementation parameters.
+ */
+#define MAXTTL 255 /* maximum time to live (seconds) */
+#define IPDEFTTL 64 /* default ttl, from RFC 1340 */
+#define IPFRAGTTL 60 /* time to live for frags, slowhz */
+#define IPTTLDEC 1 /* subtracted when forwarding */
+
+#define IP_MSS 576 /* default maximum segment size */
+
+__END_DECLS
-#endif /* _NETINET_IP_H */
+#endif /* netinet/ip.h */
diff --git a/include/netinet/ip6.h b/include/netinet/ip6.h
new file mode 100644
index 000000000..cd42ef64b
--- /dev/null
+++ b/include/netinet/ip6.h
@@ -0,0 +1,106 @@
+/* Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _NETINET_IP6_H
+#define _NETINET_IP6_H 1
+
+#include <inttypes.h>
+#include <netinet/in.h>
+
+struct ip6_hdr
+ {
+ union
+ {
+ struct ip6_hdrctl
+ {
+ uint32_t ip6_un1_flow; /* 24 bits of flow-ID */
+ uint16_t ip6_un1_plen; /* payload length */
+ uint8_t ip6_un1_nxt; /* next header */
+ uint8_t ip6_un1_hlim; /* hop limit */
+ } ip6_un1;
+ uint8_t ip6_un2_vfc; /* 4 bits version, 4 bits priority */
+ } ip6_ctlun;
+ struct in6_addr ip6_src; /* source address */
+ struct in6_addr ip6_dst; /* destination address */
+ };
+
+#define ip6_vfc ip6_ctlun.ip6_un2_vfc
+#define ip6_flow ip6_ctlun.ip6_un1.ip6_un1_flow
+#define ip6_plen ip6_ctlun.ip6_un1.ip6_un1_plen
+#define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt
+#define ip6_hlim ip6_ctlun.ip6_un1.ip6_un1_hlim
+#define ip6_hops ip6_ctlun.ip6_un1.ip6_un1_hlim
+
+/* Hop-by-Hop options header. */
+struct ip6_hbh
+ {
+ uint8_t ip6h_nxt; /* next hesder. */
+ uint8_t ip6h_len; /* length in units of 8 octets. */
+ /* followed by options */
+ };
+
+/* Destination options header */
+struct ip6_dest
+ {
+ uint8_t ip6d_nxt; /* next header */
+ uint8_t ip6d_len; /* length in units of 8 octets */
+ /* followed by options */
+ };
+
+/* Routing header */
+struct ip6_rthdr
+ {
+ uint8_t ip6r_nxt; /* next header */
+ uint8_t ip6r_len; /* length in units of 8 octets */
+ uint8_t ip6r_type; /* routing type */
+ uint8_t ip6r_segleft; /* segments left */
+ /* followed by routing type specific data */
+ };
+
+/* Type 0 Routing header */
+struct ip6_rthdr0
+ {
+ uint8_t ip6r0_nxt; /* next header */
+ uint8_t ip6r0_len; /* length in units of 8 octets */
+ uint8_t ip6r0_type; /* always zero */
+ uint8_t ip6r0_segleft; /* segments left */
+ uint8_t ip6r0_reserved; /* reserved field */
+ uint8_t ip6r0_slmap[3]; /* strict/loose bit map */
+ struct in6_addr ip6r0_addr[1]; /* up to 23 addresses */
+ };
+
+/* Fragment header */
+struct ip6_frag
+ {
+ uint8_t ip6f_nxt; /* next header */
+ uint8_t ip6f_reserved; /* reserved field */
+ uint16_t ip6f_offlg; /* offset, reserved, and flag */
+ uint32_t ip6f_ident; /* identification */
+ };
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define IP6F_OFF_MASK 0xfff8 /* mask out offset from _offlg */
+#define IP6F_RESERVED_MASK 0x0006 /* reserved bits in ip6f_offlg */
+#define IP6F_MORE_FRAG 0x0001 /* more-fragments flag */
+#else /* BYTE_ORDER == LITTLE_ENDIAN */
+#define IP6F_OFF_MASK 0xf8ff /* mask out offset from _offlg */
+#define IP6F_RESERVED_MASK 0x0600 /* reserved bits in ip6f_offlg */
+#define IP6F_MORE_FRAG 0x0100 /* more-fragments flag */
+#endif
+
+#endif /* netinet/ip6.h */
diff --git a/include/netinet/ip_icmp.h b/include/netinet/ip_icmp.h
index 1662e3f7f..be7959ee7 100644
--- a/include/netinet/ip_icmp.h
+++ b/include/netinet/ip_icmp.h
@@ -1 +1,287 @@
-#include <linux/icmp.h>
+/* Copyright (C) 1991, 92, 93, 95, 96, 97 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef __NETINET_IP_ICMP_H
+#define __NETINET_IP_ICMP_H 1
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+
+__BEGIN_DECLS
+
+struct icmphdr
+{
+ u_int8_t type; /* message type */
+ u_int8_t code; /* type sub-code */
+ u_int16_t checksum;
+ union
+ {
+ struct
+ {
+ u_int16_t id;
+ u_int16_t sequence;
+ } echo; /* echo datagram */
+ u_int32_t gateway; /* gateway address */
+ struct
+ {
+ u_int16_t __unused;
+ u_int16_t mtu;
+ } frag; /* path mtu discovery */
+ } un;
+};
+
+#define ICMP_ECHOREPLY 0 /* Echo Reply */
+#define ICMP_DEST_UNREACH 3 /* Destination Unreachable */
+#define ICMP_SOURCE_QUENCH 4 /* Source Quench */
+#define ICMP_REDIRECT 5 /* Redirect (change route) */
+#define ICMP_ECHO 8 /* Echo Request */
+#define ICMP_TIME_EXCEEDED 11 /* Time Exceeded */
+#define ICMP_PARAMETERPROB 12 /* Parameter Problem */
+#define ICMP_TIMESTAMP 13 /* Timestamp Request */
+#define ICMP_TIMESTAMPREPLY 14 /* Timestamp Reply */
+#define ICMP_INFO_REQUEST 15 /* Information Request */
+#define ICMP_INFO_REPLY 16 /* Information Reply */
+#define ICMP_ADDRESS 17 /* Address Mask Request */
+#define ICMP_ADDRESSREPLY 18 /* Address Mask Reply */
+#define NR_ICMP_TYPES 18
+
+
+/* Codes for UNREACH. */
+#define ICMP_NET_UNREACH 0 /* Network Unreachable */
+#define ICMP_HOST_UNREACH 1 /* Host Unreachable */
+#define ICMP_PROT_UNREACH 2 /* Protocol Unreachable */
+#define ICMP_PORT_UNREACH 3 /* Port Unreachable */
+#define ICMP_FRAG_NEEDED 4 /* Fragmentation Needed/DF set */
+#define ICMP_SR_FAILED 5 /* Source Route failed */
+#define ICMP_NET_UNKNOWN 6
+#define ICMP_HOST_UNKNOWN 7
+#define ICMP_HOST_ISOLATED 8
+#define ICMP_NET_ANO 9
+#define ICMP_HOST_ANO 10
+#define ICMP_NET_UNR_TOS 11
+#define ICMP_HOST_UNR_TOS 12
+#define ICMP_PKT_FILTERED 13 /* Packet filtered */
+#define ICMP_PREC_VIOLATION 14 /* Precedence violation */
+#define ICMP_PREC_CUTOFF 15 /* Precedence cut off */
+#define NR_ICMP_UNREACH 15 /* instead of hardcoding immediate value */
+
+/* Codes for REDIRECT. */
+#define ICMP_REDIR_NET 0 /* Redirect Net */
+#define ICMP_REDIR_HOST 1 /* Redirect Host */
+#define ICMP_REDIR_NETTOS 2 /* Redirect Net for TOS */
+#define ICMP_REDIR_HOSTTOS 3 /* Redirect Host for TOS */
+
+/* Codes for TIME_EXCEEDED. */
+#define ICMP_EXC_TTL 0 /* TTL count exceeded */
+#define ICMP_EXC_FRAGTIME 1 /* Fragment Reass time exceeded */
+
+
+#ifdef __USE_BSD
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93
+ */
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+/*
+ * Internal of an ICMP Router Advertisement
+ */
+struct icmp_ra_addr
+{
+ u_int32_t ira_addr;
+ u_int32_t ira_preference;
+};
+
+struct icmp
+{
+ u_int8_t icmp_type; /* type of message, see below */
+ u_int8_t icmp_code; /* type sub code */
+ u_int16_t icmp_cksum; /* ones complement checksum of struct */
+ union
+ {
+ u_char ih_pptr; /* ICMP_PARAMPROB */
+ struct in_addr ih_gwaddr; /* gateway address */
+ struct ih_idseq /* echo datagram */
+ {
+ u_int16_t icd_id;
+ u_int16_t icd_seq;
+ } ih_idseq;
+ u_int32_t ih_void;
+
+ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */
+ struct ih_pmtu
+ {
+ u_int16_t ipm_void;
+ u_int16_t ipm_nextmtu;
+ } ih_pmtu;
+
+ struct ih_rtradv
+ {
+ u_int8_t irt_num_addrs;
+ u_int8_t irt_wpa;
+ u_int16_t irt_lifetime;
+ } ih_rtradv;
+ } icmp_hun;
+#define icmp_pptr icmp_hun.ih_pptr
+#define icmp_gwaddr icmp_hun.ih_gwaddr
+#define icmp_id icmp_hun.ih_idseq.icd_id
+#define icmp_seq icmp_hun.ih_idseq.icd_seq
+#define icmp_void icmp_hun.ih_void
+#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void
+#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu
+#define icmp_num_addrs icmp_hun.ih_rtradv.irt_num_addrs
+#define icmp_wpa icmp_hun.ih_rtradv.irt_wpa
+#define icmp_lifetime icmp_hun.ih_rtradv.irt_lifetime
+ union
+ {
+ struct
+ {
+ u_int32_t its_otime;
+ u_int32_t its_rtime;
+ u_int32_t its_ttime;
+ } id_ts;
+ struct
+ {
+ struct ip idi_ip;
+ /* options and then 64 bits of data */
+ } id_ip;
+ struct icmp_ra_addr id_radv;
+ u_int32_t id_mask;
+ u_int8_t id_data[1];
+ } icmp_dun;
+#define icmp_otime icmp_dun.id_ts.its_otime
+#define icmp_rtime icmp_dun.id_ts.its_rtime
+#define icmp_ttime icmp_dun.id_ts.its_ttime
+#define icmp_ip icmp_dun.id_ip.idi_ip
+#define icmp_radv icmp_dun.id_radv
+#define icmp_mask icmp_dun.id_mask
+#define icmp_data icmp_dun.id_data
+};
+
+/*
+ * Lower bounds on packet lengths for various types.
+ * For the error advice packets must first insure that the
+ * packet is large enough to contain the returned ip header.
+ * Only then can we do the check to see if 64 bits of packet
+ * data have been returned, since we need to check the returned
+ * ip header length.
+ */
+#define ICMP_MINLEN 8 /* abs minimum */
+#define ICMP_TSLEN (8 + 3 * sizeof (n_time)) /* timestamp */
+#define ICMP_MASKLEN 12 /* address mask */
+#define ICMP_ADVLENMIN (8 + sizeof (struct ip) + 8) /* min */
+#ifndef _IP_VHL
+#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8)
+ /* N.B.: must separately check that ip_hl >= 5 */
+#else
+#define ICMP_ADVLEN(p) (8 + (IP_VHL_HL((p)->icmp_ip.ip_vhl) << 2) + 8)
+ /* N.B.: must separately check that header length >= 5 */
+#endif
+
+/* Definition of type and code fields. */
+/* defined above: ICMP_ECHOREPLY, ICMP_REDIRECT, ICMP_ECHO */
+#define ICMP_UNREACH 3 /* dest unreachable, codes: */
+#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */
+#define ICMP_ROUTERADVERT 9 /* router advertisement */
+#define ICMP_ROUTERSOLICIT 10 /* router solicitation */
+#define ICMP_TIMXCEED 11 /* time exceeded, code: */
+#define ICMP_PARAMPROB 12 /* ip header bad */
+#define ICMP_TSTAMP 13 /* timestamp request */
+#define ICMP_TSTAMPREPLY 14 /* timestamp reply */
+#define ICMP_IREQ 15 /* information request */
+#define ICMP_IREQREPLY 16 /* information reply */
+#define ICMP_MASKREQ 17 /* address mask request */
+#define ICMP_MASKREPLY 18 /* address mask reply */
+
+#define ICMP_MAXTYPE 18
+
+/* UNREACH codes */
+#define ICMP_UNREACH_NET 0 /* bad net */
+#define ICMP_UNREACH_HOST 1 /* bad host */
+#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */
+#define ICMP_UNREACH_PORT 3 /* bad port */
+#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */
+#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */
+#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */
+#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */
+#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */
+#define ICMP_UNREACH_NET_PROHIB 9 /* net denied */
+#define ICMP_UNREACH_HOST_PROHIB 10 /* host denied */
+#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */
+#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */
+#define ICMP_UNREACH_FILTER_PROHIB 13 /* admin prohib */
+#define ICMP_UNREACH_HOST_PRECEDENCE 14 /* host prec vio. */
+#define ICMP_UNREACH_PRECEDENCE_CUTOFF 15 /* prec cutoff */
+
+/* REDIRECT codes */
+#define ICMP_REDIRECT_NET 0 /* for network */
+#define ICMP_REDIRECT_HOST 1 /* for host */
+#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */
+#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */
+
+/* TIMEXCEED codes */
+#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */
+#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */
+
+/* PARAMPROB code */
+#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */
+
+#define ICMP_INFOTYPE(type) \
+ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \
+ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \
+ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \
+ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \
+ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY)
+
+#endif /* __USE_BSD */
+
+__END_DECLS
+
+#endif /* netinet/ip_icmp.h */
diff --git a/include/netinet/tcp.h b/include/netinet/tcp.h
index 8636ec7d0..36976369e 100644
--- a/include/netinet/tcp.h
+++ b/include/netinet/tcp.h
@@ -1 +1,166 @@
-#include <netinet/ip_tcp.h>
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp.h 8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef _NETINET_TCP_H
+#define _NETINET_TCP_H 1
+
+#include <features.h>
+#include <sys/types.h>
+
+__BEGIN_DECLS
+
+#ifdef __FAVOR_BSD
+typedef u_int32_t tcp_seq;
+/*
+ * TCP header.
+ * Per RFC 793, September, 1981.
+ */
+struct tcphdr
+ {
+ u_int16_t th_sport; /* source port */
+ u_int16_t th_dport; /* destination port */
+ tcp_seq th_seq; /* sequence number */
+ tcp_seq th_ack; /* acknowledgement number */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ u_int8_t th_x2:4; /* (unused) */
+ u_int8_t th_off:4; /* data offset */
+#endif
+#if __BYTE_ORDER == __BIG_ENDIAN
+ u_int8_t th_off:4; /* data offset */
+ u_int8_t th_x2:4; /* (unused) */
+#endif
+ u_int8_t th_flags;
+#define TH_FIN 0x01
+#define TH_SYN 0x02
+#define TH_RST 0x04
+#define TH_PUSH 0x08
+#define TH_ACK 0x10
+#define TH_URG 0x20
+ u_int16_t th_win; /* window */
+ u_int16_t th_sum; /* checksum */
+ u_int16_t th_urp; /* urgent pointer */
+};
+
+#else /* !__FAVOR_BSD */
+struct tcphdr
+ {
+ u_int16_t source;
+ u_int16_t dest;
+ u_int32_t seq;
+ u_int32_t ack_seq;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ u_int16_t res1:4;
+ u_int16_t doff:4;
+ u_int16_t fin:1;
+ u_int16_t syn:1;
+ u_int16_t rst:1;
+ u_int16_t psh:1;
+ u_int16_t ack:1;
+ u_int16_t urg:1;
+ u_int16_t res2:2;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ u_int16_t doff:4;
+ u_int16_t res1:4;
+ u_int16_t res2:2;
+ u_int16_t urg:1;
+ u_int16_t ack:1;
+ u_int16_t psh:1;
+ u_int16_t rst:1;
+ u_int16_t syn:1;
+ u_int16_t fin:1;
+#else
+#error "Adjust your <bits/endian.h> defines"
+#endif
+ u_int16_t window;
+ u_int16_t check;
+ u_int16_t urg_ptr;
+};
+#endif /* __FAVOR_BSD */
+
+enum
+{
+ TCP_ESTABLISHED = 1,
+ TCP_SYN_SENT,
+ TCP_SYN_RECV,
+ TCP_FIN_WAIT1,
+ TCP_FIN_WAIT2,
+ TCP_TIME_WAIT,
+ TCP_CLOSE,
+ TCP_CLOSE_WAIT,
+ TCP_LAST_ACK,
+ TCP_LISTEN,
+ TCP_CLOSING /* now a valid state */
+};
+
+#define TCPOPT_EOL 0
+#define TCPOPT_NOP 1
+#define TCPOPT_MAXSEG 2
+#define TCPOLEN_MAXSEG 4
+#define TCPOPT_WINDOW 3
+#define TCPOLEN_WINDOW 3
+#define TCPOPT_SACK_PERMITTED 4 /* Experimental */
+#define TCPOLEN_SACK_PERMITTED 2
+#define TCPOPT_SACK 5 /* Experimental */
+#define TCPOPT_TIMESTAMP 8
+#define TCPOLEN_TIMESTAMP 10
+#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */
+
+#define TCPOPT_TSTAMP_HDR \
+ (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)
+
+/*
+ * Default maximum segment size for TCP.
+ * With an IP MSS of 576, this is 536,
+ * but 512 is probably more convenient.
+ * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)).
+ */
+#define TCP_MSS 512
+
+#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */
+
+#define TCP_MAX_WINSHIFT 14 /* maximum window shift */
+
+/*
+ * User-settable options (used with setsockopt).
+ */
+#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */
+#define TCP_MAXSEG 0x02 /* set maximum segment size */
+#define TCP_CORK 0x03 /* control sending of partial frames */
+
+#define SOL_TCP 6 /* TCP level */
+
+__END_DECLS
+
+#endif /* netinet/tcp.h */
diff --git a/include/netinet/udp.h b/include/netinet/udp.h
index 39a57a3c7..9ee66f0fc 100644
--- a/include/netinet/udp.h
+++ b/include/netinet/udp.h
@@ -1 +1,55 @@
-#include <netinet/ip_udp.h>
+/* Copyright (C) 1991, 92, 93, 95, 96, 97 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/*
+ * Copyright (c) 1982, 1986 Regents of the University of California.
+ * All rights reserved. The Berkeley software License Agreement
+ * specifies the terms and conditions for redistribution.
+ */
+
+#ifndef __NETINET_UDP_H
+#define __NETINET_UDP_H 1
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+
+__BEGIN_DECLS
+
+/* UDP header as specified by RFC 768, August 1980. */
+#ifdef __FAVOR_BSD
+struct udphdr {
+ u_int16_t uh_sport; /* source port */
+ u_int16_t uh_dport; /* destination port */
+ u_int16_t uh_ulen; /* udp length */
+ u_int16_t uh_sum; /* udp checksum */
+};
+#else
+
+struct udphdr {
+ u_int16_t source;
+ u_int16_t dest;
+ u_int16_t len;
+ u_int16_t check;
+};
+#endif
+
+#define SOL_UDP 17 /* sockopt level for UDP */
+
+__END_DECLS
+
+#endif /* netinet/udp.h */
diff --git a/include/regex.h b/include/regex.h
new file mode 100644
index 000000000..813882c42
--- /dev/null
+++ b/include/regex.h
@@ -0,0 +1,3754 @@
+#if !defined(_RX_H) || defined(RX_WANT_SE_DEFS)
+#define _RX_H
+
+/* Copyright (C) 1992, 1993 Free Software Foundation, Inc.
+
+This file is part of the librx library.
+
+Librx is free software; you can redistribute it and/or modify it under
+the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+Librx is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with this software; see the file COPYING.LIB. If not,
+write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA
+02139, USA. */
+/* t. lord Wed Sep 23 18:20:57 1992 */
+
+
+
+
+#include <features.h>
+
+#define __need_size_t
+#include <stddef.h>
+
+#include <string.h>
+
+#if RX_WANT_SE_DEFS != 1
+__BEGIN_DECLS
+#endif
+
+#ifndef RX_WANT_SE_DEFS
+
+/* This page: Bitsets */
+
+#ifndef RX_subset
+typedef unsigned int RX_subset;
+#define RX_subset_bits (32)
+#define RX_subset_mask (RX_subset_bits - 1)
+#endif
+
+typedef RX_subset * rx_Bitset;
+
+#ifdef __STDC__
+typedef void (*rx_bitset_iterator) (void *, int member_index);
+#else
+typedef void (*rx_bitset_iterator) ();
+#endif
+
+#define rx_bitset_subset(N) ((N) / RX_subset_bits)
+#define rx_bitset_subset_val(B,N) ((B)[rx_bitset_subset(N)])
+#define RX_bitset_access(B,N,OP) \
+ ((B)[rx_bitset_subset(N)] OP rx_subset_singletons[(N) & RX_subset_mask])
+#define RX_bitset_member(B,N) RX_bitset_access(B, N, &)
+#define RX_bitset_enjoin(B,N) RX_bitset_access(B, N, |=)
+#define RX_bitset_remove(B,N) RX_bitset_access(B, N, &= ~)
+#define RX_bitset_toggle(B,N) RX_bitset_access(B, N, ^= )
+#define rx_bitset_numb_subsets(N) (((N) + RX_subset_bits - 1) / RX_subset_bits)
+#define rx_sizeof_bitset(N) (rx_bitset_numb_subsets(N) * sizeof(RX_subset))
+
+
+
+/* This page: Splay trees. */
+
+#ifdef __STDC__
+typedef int (*rx_sp_comparer) (void * a, void * b);
+#else
+typedef int (*rx_sp_comparer) ();
+#endif
+
+struct rx_sp_node
+{
+ void * key;
+ void * data;
+ struct rx_sp_node * kids[2];
+};
+
+#ifdef __STDC__
+typedef void (*rx_sp_key_data_freer) (struct rx_sp_node *);
+#else
+typedef void (*rx_sp_key_data_freer) ();
+#endif
+
+
+/* giant inflatable hash trees */
+
+struct rx_hash_item
+{
+ struct rx_hash_item * next_same_hash;
+ struct rx_hash * table;
+ unsigned long hash;
+ void * data;
+ void * binding;
+};
+
+struct rx_hash
+{
+ struct rx_hash * parent;
+ int refs;
+ struct rx_hash * children[13];
+ struct rx_hash_item * buckets [13];
+ int bucket_size [13];
+};
+
+struct rx_hash_rules;
+
+#ifdef __STDC__
+/* should return like == */
+typedef int (*rx_hash_eq)(void *, void *);
+typedef struct rx_hash * (*rx_alloc_hash)(struct rx_hash_rules *);
+typedef void (*rx_free_hash)(struct rx_hash *,
+ struct rx_hash_rules *);
+typedef struct rx_hash_item * (*rx_alloc_hash_item)(struct rx_hash_rules *,
+ void *);
+typedef void (*rx_free_hash_item)(struct rx_hash_item *,
+ struct rx_hash_rules *);
+#else
+typedef int (*rx_hash_eq)();
+typedef struct rx_hash * (*rx_alloc_hash)();
+typedef void (*rx_free_hash)();
+typedef struct rx_hash_item * (*rx_alloc_hash_item)();
+typedef void (*rx_free_hash_item)();
+#endif
+
+struct rx_hash_rules
+{
+ rx_hash_eq eq;
+ rx_alloc_hash hash_alloc;
+ rx_free_hash free_hash;
+ rx_alloc_hash_item hash_item_alloc;
+ rx_free_hash_item free_hash_item;
+};
+
+
+/* Forward declarations */
+
+struct rx_cache;
+struct rx_superset;
+struct rx;
+struct rx_se_list;
+
+
+
+/*
+ * GLOSSARY
+ *
+ * regexp
+ * regular expression
+ * expression
+ * pattern - a `regular' expression. The expression
+ * need not be formally regular -- it can contain
+ * constructs that don't correspond to purely regular
+ * expressions.
+ *
+ * buffer
+ * string - the string (or strings) being searched or matched.
+ *
+ * pattern buffer - a structure of type `struct re_pattern_buffer'
+ * This in turn contains a `struct rx', which holds the
+ * NFA compiled from a pattern, as well as some of the state
+ * of a matcher using the pattern.
+ *
+ * NFA - nondeterministic finite automata. Some people
+ * use this term to a member of the class of
+ * regular automata (those corresponding to a regular
+ * language). However, in this code, the meaning is
+ * more general. The automata used by Rx are comperable
+ * in power to what are usually called `push down automata'.
+ *
+ * Two NFA are built by rx for every pattern. One is built
+ * by the compiler. The other is built from the first, on
+ * the fly, by the matcher. The latter is called the `superstate
+ * NFA' because its states correspond to sets of states from
+ * the first NFA. (Joe Keane gets credit for the name
+ * `superstate NFA').
+ *
+ * NFA edges
+ * epsilon edges
+ * side-effect edges - The NFA compiled from a pattern can have three
+ * kinds of edges. Epsilon edges can be taken freely anytime
+ * their source state is reached. Character set edges can be
+ * taken when their source state is reached and when the next
+ * character in the buffer is a member of the set. Side effect
+ * edges imply a transition that can only be taken after the
+ * indicated side effect has been successfully accomplished.
+ * Some examples of side effects are:
+ *
+ * Storing the current match position to record the
+ * location of a parentesized subexpression.
+ *
+ * Advancing the matcher over N characters if they
+ * match the N characters previously matched by a
+ * parentesized subexpression.
+ *
+ * Both of those kinds of edges occur in the NFA generated
+ * by the pattern: \(.\)\1
+ *
+ * Epsilon and side effect edges are similar. Unfortunately,
+ * some of the code uses the name `epsilon edge' to mean
+ * both epsilon and side effect edges. For example, the
+ * function has_non_idempotent_epsilon_path computes the existance
+ * of a non-trivial path containing only a mix of epsilon and
+ * side effect edges. In that case `nonidempotent epsilon' is being
+ * used to mean `side effect'.
+ */
+
+
+
+
+
+/* LOW LEVEL PATTERN BUFFERS */
+
+/* Suppose that from some NFA state, more than one path through
+ * side-effect edges is possible. In what order should the paths
+ * be tried? A function of type rx_se_list_order answers that
+ * question. It compares two lists of side effects, and says
+ * which list comes first.
+ */
+
+#ifdef __STDC__
+typedef int (*rx_se_list_order) (struct rx *,
+ struct rx_se_list *,
+ struct rx_se_list *);
+#else
+typedef int (*rx_se_list_order) ();
+#endif
+
+
+
+/* Struct RX holds a compiled regular expression - that is, an nfa
+ * ready to be converted on demand to a more efficient superstate nfa.
+ * This is for the low level interface. The high-level interfaces enclose
+ * this in a `struct re_pattern_buffer'.
+ */
+struct rx
+{
+ /* The compiler assigns a unique id to every pattern.
+ * Like sequence numbers in X, there is a subtle bug here
+ * if you use Rx in a system that runs for a long time.
+ * But, because of the way the caches work out, it is almost
+ * impossible to trigger the Rx version of this bug.
+ *
+ * The id is used to validate superstates found in a cache
+ * of superstates. It isn't sufficient to let a superstate
+ * point back to the rx for which it was compiled -- the caller
+ * may be re-using a `struct rx' in which case the superstate
+ * is not really valid. So instead, superstates are validated
+ * by checking the sequence number of the pattern for which
+ * they were built.
+ */
+ int rx_id;
+
+ /* This is memory mgt. state for superstates. This may be
+ * shared by more than one struct rx.
+ */
+ struct rx_cache * cache;
+
+ /* Every regex defines the size of its own character set.
+ * A superstate has an array of this size, with each element
+ * a `struct rx_inx'. So, don't make this number too large.
+ * In particular, don't make it 2^16.
+ */
+ int local_cset_size;
+
+ /* After the NFA is built, it is copied into a contiguous region
+ * of memory (mostly for compatability with GNU regex).
+ * Here is that region, and it's size:
+ */
+ void * buffer;
+ unsigned long allocated;
+
+ /* Clients of RX can ask for some extra storage in the space pointed
+ * to by BUFFER. The field RESERVED is an input parameter to the
+ * compiler. After compilation, this much space will be available
+ * at (buffer + allocated - reserved)
+ */
+ unsigned long reserved;
+
+ /* --------- The remaining fields are for internal use only. --------- */
+ /* --------- But! they must be initialized to 0. --------- */
+
+ /* NODEC is the number of nodes in the NFA with non-epsilon
+ * transitions.
+ */
+ int nodec;
+
+ /* EPSNODEC is the number of nodes with only epsilon transitions. */
+ int epsnodec;
+
+ /* The sum (NODEC + EPSNODEC) is the total number of states in the
+ * compiled NFA.
+ */
+
+ /* Lists of side effects as stored in the NFA are `hash consed'..meaning
+ * that lists with the same elements are ==. During compilation,
+ * this table facilitates hash-consing.
+ */
+ struct rx_hash se_list_memo;
+
+ /* Lists of NFA states are also hashed.
+ */
+ struct rx_hash set_list_memo;
+
+
+
+
+ /* The compiler and matcher must build a number of instruction frames.
+ * The format of these frames is fixed (c.f. struct rx_inx). The values
+ * of the instructions is not fixed.
+ *
+ * An enumerated type (enum rx_opcode) defines the set of instructions
+ * that the compiler or matcher might generate. When filling an instruction
+ * frame, the INX field is found by indexing this instruction table
+ * with an opcode:
+ */
+ void ** instruction_table;
+
+ /* The list of all states in an NFA.
+ * During compilation, the NEXT field of NFA states links this list.
+ * After compilation, all the states are compacted into an array,
+ * ordered by state id numbers. At that time, this points to the base
+ * of that array.
+ */
+ struct rx_nfa_state *nfa_states;
+
+ /* Every nfa begins with one distinguished starting state:
+ */
+ struct rx_nfa_state *start;
+
+ /* This orders the search through super-nfa paths.
+ * See the comment near the typedef of rx_se_list_order.
+ */
+ rx_se_list_order se_list_cmp;
+
+ struct rx_superset * start_set;
+};
+
+
+
+
+/* SYNTAX TREES */
+
+/* Compilation is in stages.
+ *
+ * In the first stage, a pattern specified by a string is
+ * translated into a syntax tree. Later stages will convert
+ * the syntax tree into an NFA optimized for conversion to a
+ * superstate-NFA.
+ *
+ * This page is about syntax trees.
+ */
+
+enum rexp_node_type
+{
+ r_cset, /* Match from a character set. `a' or `[a-z]'*/
+ r_concat, /* Concat two subexpressions. `ab' */
+ r_alternate, /* Choose one of two subexpressions. `a\|b' */
+ r_opt, /* Optional subexpression. `a?' */
+ r_star, /* Repeated subexpression. `a*' */
+
+
+ /* A 2phase-star is a variation on a repeated subexpression.
+ * In this case, there are two subexpressions. The first, if matched,
+ * begins a repitition (otherwise, the whole expression is matches the
+ * empth string).
+ *
+ * After matching the first subexpression, a 2phase star either finishes,
+ * or matches the second subexpression. If the second subexpression is
+ * matched, then the whole construct repeats.
+ *
+ * 2phase stars are used in two circumstances. First, they
+ * are used as part of the implementation of POSIX intervals (counted
+ * repititions). Second, they are used to implement proper star
+ * semantics when the repeated subexpression contains paths of
+ * only side effects. See rx_compile for more information.
+ */
+ r_2phase_star,
+
+
+ /* c.f. "typedef void * rx_side_effect" */
+ r_side_effect,
+
+ /* This is an extension type: It is for transient use in source->source
+ * transformations (implemented over syntax trees).
+ */
+ r_data
+};
+
+/* A side effect is a matcher-specific action associated with
+ * transitions in the NFA. The details of side effects are up
+ * to the matcher. To the compiler and superstate constructors
+ * side effects are opaque:
+ */
+
+typedef void * rx_side_effect;
+
+/* Nodes in a syntax tree are of this type:
+ */
+struct rexp_node
+{
+ enum rexp_node_type type;
+ union
+ {
+ rx_Bitset cset;
+ rx_side_effect side_effect;
+ struct
+ {
+ struct rexp_node *left;
+ struct rexp_node *right;
+ } pair;
+ void * data;
+ } params;
+};
+
+
+
+/* NFA
+ *
+ * A syntax tree is compiled into an NFA. This page defines the structure
+ * of that NFA.
+ */
+
+struct rx_nfa_state
+{
+ /* These are kept in a list as the NFA is being built. */
+ struct rx_nfa_state *next;
+
+ /* After the NFA is built, states are given integer id's.
+ * States whose outgoing transitions are all either epsilon or
+ * side effect edges are given ids less than 0. Other states
+ * are given successive non-negative ids starting from 0.
+ */
+ int id;
+
+ /* The list of NFA edges that go from this state to some other. */
+ struct rx_nfa_edge *edges;
+
+ /* If you land in this state, then you implicitly land
+ * in all other states reachable by only epsilon translations.
+ * Call the set of maximal paths to such states the epsilon closure
+ * of this state.
+ *
+ * There may be other states that are reachable by a mixture of
+ * epsilon and side effect edges. Consider the set of maximal paths
+ * of that sort from this state. Call it the epsilon-side-effect
+ * closure of the state.
+ *
+ * The epsilon closure of the state is a subset of the epsilon-side-
+ * effect closure. It consists of all the paths that contain
+ * no side effects -- only epsilon edges.
+ *
+ * The paths in the epsilon-side-effect closure can be partitioned
+ * into equivalance sets. Two paths are equivalant if they have the
+ * same set of side effects, in the same order. The epsilon-closure
+ * is one of these equivalance sets. Let's call these equivalance
+ * sets: observably equivalant path sets. That name is chosen
+ * because equivalance of two paths means they cause the same side
+ * effects -- so they lead to the same subsequent observations other
+ * than that they may wind up in different target states.
+ *
+ * The superstate nfa, which is derived from this nfa, is based on
+ * the observation that all of the paths in an observably equivalant
+ * path set can be explored at the same time, provided that the
+ * matcher keeps track not of a single nfa state, but of a set of
+ * states. In particular, after following all the paths in an
+ * observably equivalant set, you wind up at a set of target states.
+ * That set of target states corresponds to one state in the
+ * superstate NFA.
+ *
+ * Staticly, before matching begins, it is convenient to analyze the
+ * nfa. Each state is labeled with a list of the observably
+ * equivalant path sets who's union covers all the
+ * epsilon-side-effect paths beginning in this state. This list is
+ * called the possible futures of the state.
+ *
+ * A trivial example is this NFA:
+ * s1
+ * A ---> B
+ *
+ * s2
+ * ---> C
+ *
+ * epsilon s1
+ * ---------> D ------> E
+ *
+ *
+ * In this example, A has two possible futures.
+ * One invokes the side effect `s1' and contains two paths,
+ * one ending in state B, the other in state E.
+ * The other invokes the side effect `s2' and contains only
+ * one path, landing in state C.
+ */
+ struct rx_possible_future *futures;
+
+
+ /* There are exactly two distinguished states in every NFA: */
+ unsigned int is_final:1;
+ unsigned int is_start:1;
+
+ /* These are used during NFA construction... */
+ unsigned int eclosure_needed:1;
+ unsigned int mark:1;
+};
+
+
+/* An edge in an NFA is typed: */
+enum rx_nfa_etype
+{
+ /* A cset edge is labled with a set of characters one of which
+ * must be matched for the edge to be taken.
+ */
+ ne_cset,
+
+ /* An epsilon edge is taken whenever its starting state is
+ * reached.
+ */
+ ne_epsilon,
+
+ /* A side effect edge is taken whenever its starting state is
+ * reached. Side effects may cause the match to fail or the
+ * position of the matcher to advance.
+ */
+ ne_side_effect /* A special kind of epsilon. */
+};
+
+struct rx_nfa_edge
+{
+ struct rx_nfa_edge *next;
+ enum rx_nfa_etype type;
+ struct rx_nfa_state *dest;
+ union
+ {
+ rx_Bitset cset;
+ rx_side_effect side_effect;
+ } params;
+};
+
+
+
+/* A possible future consists of a list of side effects
+ * and a set of destination states. Below are their
+ * representations. These structures are hash-consed which
+ * means that lists with the same elements share a representation
+ * (their addresses are ==).
+ */
+
+struct rx_nfa_state_set
+{
+ struct rx_nfa_state * car;
+ struct rx_nfa_state_set * cdr;
+};
+
+struct rx_se_list
+{
+ rx_side_effect car;
+ struct rx_se_list * cdr;
+};
+
+struct rx_possible_future
+{
+ struct rx_possible_future *next;
+ struct rx_se_list * effects;
+ struct rx_nfa_state_set * destset;
+};
+
+
+
+/* This begins the description of the superstate NFA.
+ *
+ * The superstate NFA corresponds to the NFA in these ways:
+ *
+ * Every superstate NFA states SUPER correspond to sets of NFA states,
+ * nfa_states(SUPER).
+ *
+ * Superstate edges correspond to NFA paths.
+ *
+ * The superstate has no epsilon transitions;
+ * every edge has a character label, and a (possibly empty) side
+ * effect label. The side effect label corresponds to a list of
+ * side effects that occur in the NFA. These parts are referred
+ * to as: superedge_character(EDGE) and superedge_sides(EDGE).
+ *
+ * For a superstate edge EDGE starting in some superstate SUPER,
+ * the following is true (in pseudo-notation :-):
+ *
+ * exists DEST in nfa_states s.t.
+ * exists nfaEDGE in nfa_edges s.t.
+ * origin (nfaEDGE) == DEST
+ * && origin (nfaEDGE) is a member of nfa_states(SUPER)
+ * && exists PF in possible_futures(dest(nfaEDGE)) s.t.
+ * sides_of_possible_future (PF) == superedge_sides (EDGE)
+ *
+ * also:
+ *
+ * let SUPER2 := superedge_destination(EDGE)
+ * nfa_states(SUPER2)
+ * == union of all nfa state sets S s.t.
+ * exists PF in possible_futures(dest(nfaEDGE)) s.t.
+ * sides_of_possible_future (PF) == superedge_sides (EDGE)
+ * && S == dests_of_possible_future (PF) }
+ *
+ * Or in english, every superstate is a set of nfa states. A given
+ * character and a superstate implies many transitions in the NFA --
+ * those that begin with an edge labeled with that character from a
+ * state in the set corresponding to the superstate.
+ *
+ * The destinations of those transitions each have a set of possible
+ * futures. A possible future is a list of side effects and a set of
+ * destination NFA states. Two sets of possible futures can be
+ * `merged' by combining all pairs of possible futures that have the
+ * same side effects. A pair is combined by creating a new future
+ * with the same side effect but the union of the two destination sets.
+ * In this way, all the possible futures suggested by a superstate
+ * and a character can be merged into a set of possible futures where
+ * no two elements of the set have the same set of side effects.
+ *
+ * The destination of a possible future, being a set of NFA states,
+ * corresponds to a supernfa state. So, the merged set of possible
+ * futures we just created can serve as a set of edges in the
+ * supernfa.
+ *
+ * The representation of the superstate nfa and the nfa is critical.
+ * The nfa has to be compact, but has to facilitate the rapid
+ * computation of missing superstates. The superstate nfa has to
+ * be fast to interpret, lazilly constructed, and bounded in space.
+ *
+ * To facilitate interpretation, the superstate data structures are
+ * peppered with `instruction frames'. There is an instruction set
+ * defined below which matchers using the supernfa must be able to
+ * interpret.
+ *
+ * We'd like to make it possible but not mandatory to use code
+ * addresses to represent instructions (c.f. gcc's computed goto).
+ * Therefore, we define an enumerated type of opcodes, and when
+ * writing one of these instructions into a data structure, use
+ * the opcode as an index into a table of instruction values.
+ *
+ * Here are the opcodes that occur in the superstate nfa:
+ */
+
+
+/* Every superstate contains a table of instruction frames indexed
+ * by characters. A normal `move' in a matcher is to fetch the next
+ * character and use it as an index into a superstates transition
+ * table.
+ *
+ * In the fasted case, only one edge follows from that character.
+ * In other cases there is more work to do.
+ *
+ * The descriptions of the opcodes refer to data structures that are
+ * described further below.
+ */
+
+enum rx_opcode
+{
+ /*
+ * BACKTRACK_POINT is invoked when a character transition in
+ * a superstate leads to more than one edge. In that case,
+ * the edges have to be explored independently using a backtracking
+ * strategy.
+ *
+ * A BACKTRACK_POINT instruction is stored in a superstate's
+ * transition table for some character when it is known that that
+ * character crosses more than one edge. On encountering this
+ * instruction, the matcher saves enough state to backtrack to this
+ * point in the match later.
+ */
+ rx_backtrack_point = 0, /* data is (struct transition_class *) */
+
+ /*
+ * RX_DO_SIDE_EFFECTS evaluates the side effects of an epsilon path.
+ * There is one occurence of this instruction per rx_distinct_future.
+ * This instruction is skipped if a rx_distinct_future has no side effects.
+ */
+ rx_do_side_effects = rx_backtrack_point + 1,
+
+ /* data is (struct rx_distinct_future *) */
+
+ /*
+ * RX_CACHE_MISS instructions are stored in rx_distinct_futures whose
+ * destination superstate has been reclaimed (or was never built).
+ * It recomputes the destination superstate.
+ * RX_CACHE_MISS is also stored in a superstate transition table before
+ * any of its edges have been built.
+ */
+ rx_cache_miss = rx_do_side_effects + 1,
+ /* data is (struct rx_distinct_future *) */
+
+ /*
+ * RX_NEXT_CHAR is called to consume the next character and take the
+ * corresponding transition. This is the only instruction that uses
+ * the DATA field of the instruction frame instead of DATA_2.
+ * (see EXPLORE_FUTURE in regex.c).
+ */
+ rx_next_char = rx_cache_miss + 1, /* data is (struct superstate *) */
+
+ /* RX_BACKTRACK indicates that a transition fails.
+ */
+ rx_backtrack = rx_next_char + 1, /* no data */
+
+ /*
+ * RX_ERROR_INX is stored only in places that should never be executed.
+ */
+ rx_error_inx = rx_backtrack + 1, /* Not supposed to occur. */
+
+ rx_num_instructions = rx_error_inx + 1
+};
+
+/* An id_instruction_table holds the values stored in instruction
+ * frames. The table is indexed by the enums declared above.
+ */
+extern void * rx_id_instruction_table[rx_num_instructions];
+
+/* The heart of the matcher is a `word-code-interpreter'
+ * (like a byte-code interpreter, except that instructions
+ * are a full word wide).
+ *
+ * Instructions are not stored in a vector of code, instead,
+ * they are scattered throughout the data structures built
+ * by the regexp compiler and the matcher. One word-code instruction,
+ * together with the arguments to that instruction, constitute
+ * an instruction frame (struct rx_inx).
+ *
+ * This structure type is padded by hand to a power of 2 because
+ * in one of the dominant cases, we dispatch by indexing a table
+ * of instruction frames. If that indexing can be accomplished
+ * by just a shift of the index, we're happy.
+ *
+ * Instructions take at most one argument, but there are two
+ * slots in an instruction frame that might hold that argument.
+ * These are called data and data_2. The data slot is only
+ * used for one instruction (RX_NEXT_CHAR). For all other
+ * instructions, data should be set to 0.
+ *
+ * RX_NEXT_CHAR is the most important instruction by far.
+ * By reserving the data field for its exclusive use,
+ * instruction dispatch is sped up in that case. There is
+ * no need to fetch both the instruction and the data,
+ * only the data is needed. In other words, a `cycle' begins
+ * by fetching the field data. If that is non-0, then it must
+ * be the destination state of a next_char transition, so
+ * make that value the current state, advance the match position
+ * by one character, and start a new cycle. On the other hand,
+ * if data is 0, fetch the instruction and do a more complicated
+ * dispatch on that.
+ */
+
+struct rx_inx
+{
+ void * data;
+ void * data_2;
+ void * inx;
+ void * fnord;
+};
+
+#ifndef RX_TAIL_ARRAY
+#define RX_TAIL_ARRAY 1
+#endif
+
+/* A superstate corresponds to a set of nfa states. Those sets are
+ * represented by STRUCT RX_SUPERSET. The constructors
+ * guarantee that only one (shared) structure is created for a given set.
+ */
+struct rx_superset
+{
+ int refs; /* This is a reference counted structure. */
+
+ /* We keep these sets in a cache because (in an unpredictable way),
+ * the same set is often created again and again. But that is also
+ * problematic -- compatibility with POSIX and GNU regex requires
+ * that we not be able to tell when a program discards a particular
+ * NFA (thus invalidating the supersets created from it).
+ *
+ * But when a cache hit appears to occur, we will have in hand the
+ * nfa for which it may have happened. That is why every nfa is given
+ * its own sequence number. On a cache hit, the cache is validated
+ * by comparing the nfa sequence number to this field:
+ */
+ int id;
+
+ struct rx_nfa_state * car; /* May or may not be a valid addr. */
+ struct rx_superset * cdr;
+
+ /* If the corresponding superstate exists: */
+ struct rx_superstate * superstate;
+
+
+ /* There is another bookkeeping problem. It is expensive to
+ * compute the starting nfa state set for an nfa. So, once computed,
+ * it is cached in the `struct rx'.
+ *
+ * But, the state set can be flushed from the superstate cache.
+ * When that happens, we can't know if the corresponding `struct rx'
+ * is still alive or if it has been freed or re-used by the program.
+ * So, the cached pointer to this set in a struct rx might be invalid
+ * and we need a way to validate it.
+ *
+ * Fortunately, even if this set is flushed from the cache, it is
+ * not freed. It just goes on the free-list of supersets.
+ * So we can still examine it.
+ *
+ * So to validate a starting set memo, check to see if the
+ * starts_for field still points back to the struct rx in question,
+ * and if the ID matches the rx sequence number.
+ */
+ struct rx * starts_for;
+
+ /* This is used to link into a hash bucket so these objects can
+ * be `hash-consed'.
+ */
+ struct rx_hash_item hash_item;
+};
+
+#define rx_protect_superset(RX,CON) (++(CON)->refs)
+
+/* The terminology may be confusing (rename this structure?).
+ * Every character occurs in at most one rx_super_edge per super-state.
+ * But, that structure might have more than one option, indicating a point
+ * of non-determinism.
+ *
+ * In other words, this structure holds a list of superstate edges
+ * sharing a common starting state and character label. The edges
+ * are in the field OPTIONS. All superstate edges sharing the same
+ * starting state and character are in this list.
+ */
+struct rx_super_edge
+{
+ struct rx_super_edge *next;
+ struct rx_inx rx_backtrack_frame;
+ int cset_size;
+ rx_Bitset cset;
+ struct rx_distinct_future *options;
+};
+
+/* A superstate is a set of nfa states (RX_SUPERSET) along
+ * with a transition table. Superstates are built on demand and reclaimed
+ * without warning. To protect a superstate from this ghastly fate,
+ * use LOCK_SUPERSTATE.
+ */
+struct rx_superstate
+{
+ int rx_id; /* c.f. the id field of rx_superset */
+ int locks; /* protection from reclamation */
+
+ /* Within a superstate cache, all the superstates are kept in a big
+ * queue. The tail of the queue is the state most likely to be
+ * reclaimed. The *recyclable fields hold the queue position of
+ * this state.
+ */
+ struct rx_superstate * next_recyclable;
+ struct rx_superstate * prev_recyclable;
+
+ /* The supernfa edges that exist in the cache and that have
+ * this state as their destination are kept in this list:
+ */
+ struct rx_distinct_future * transition_refs;
+
+ /* The list of nfa states corresponding to this superstate: */
+ struct rx_superset * contents;
+
+ /* The list of edges in the cache beginning from this state. */
+ struct rx_super_edge * edges;
+
+ /* A tail of the recyclable queue is marked as semifree. A semifree
+ * state has no incoming next_char transitions -- any transition
+ * into a semifree state causes a complex dispatch with the side
+ * effect of rescuing the state from its semifree state.
+ *
+ * An alternative to this might be to make next_char more expensive,
+ * and to move a state to the head of the recyclable queue whenever
+ * it is entered. That way, popular states would never be recycled.
+ *
+ * But unilaterally making next_char more expensive actually loses.
+ * So, incoming transitions are only made expensive for states near
+ * the tail of the recyclable queue. The more cache contention
+ * there is, the more frequently a state will have to prove itself
+ * and be moved back to the front of the queue. If there is less
+ * contention, then popular states just aggregate in the front of
+ * the queue and stay there.
+ */
+ int is_semifree;
+
+
+ /* This keeps track of the size of the transition table for this
+ * state. There is a half-hearted attempt to support variable sized
+ * superstates.
+ */
+ int trans_size;
+
+ /* Indexed by characters... */
+ struct rx_inx transitions[RX_TAIL_ARRAY];
+};
+
+
+/* A list of distinct futures define the edges that leave from a
+ * given superstate on a given character. c.f. rx_super_edge.
+ */
+
+struct rx_distinct_future
+{
+ struct rx_distinct_future * next_same_super_edge[2];
+ struct rx_distinct_future * next_same_dest;
+ struct rx_distinct_future * prev_same_dest;
+ struct rx_superstate * present; /* source state */
+ struct rx_superstate * future; /* destination state */
+ struct rx_super_edge * edge;
+
+
+ /* The future_frame holds the instruction that should be executed
+ * after all the side effects are done, when it is time to complete
+ * the transition to the next state.
+ *
+ * Normally this is a next_char instruction, but it may be a
+ * cache_miss instruction as well, depending on whether or not
+ * the superstate is in the cache and semifree.
+ *
+ * If this is the only future for a given superstate/char, and
+ * if there are no side effects to be performed, this frame is
+ * not used (directly) at all. Instead, its contents are copied
+ * into the transition table of the starting state of this dist. future.
+ */
+ struct rx_inx future_frame;
+
+ struct rx_inx side_effects_frame;
+ struct rx_se_list * effects;
+};
+
+#define rx_lock_superstate(R,S) ((S)->locks++)
+#define rx_unlock_superstate(R,S) (--(S)->locks)
+
+
+/* This page destined for rx.h */
+
+struct rx_blocklist
+{
+ struct rx_blocklist * next;
+ int bytes;
+};
+
+struct rx_freelist
+{
+ struct rx_freelist * next;
+};
+
+struct rx_cache;
+
+#ifdef __STDC__
+typedef void (*rx_morecore_fn)(struct rx_cache *);
+#else
+typedef void (*rx_morecore_fn)();
+#endif
+
+/* You use this to control the allocation of superstate data
+ * during matching. Most of it should be initialized to 0.
+ *
+ * A MORECORE function is necessary. It should allocate
+ * a new block of memory or return 0.
+ * A default that uses malloc is called `rx_morecore'.
+ *
+ * The number of SUPERSTATES_ALLOWED indirectly limits how much memory
+ * the system will try to allocate. The default is 128. Batch style
+ * applications that are very regexp intensive should use as high a number
+ * as possible without thrashing.
+ *
+ * The LOCAL_CSET_SIZE is the number of characters in a character set.
+ * It is therefore the number of entries in a superstate transition table.
+ * Generally, it should be 256. If your character set has 16 bits,
+ * it is better to translate your regexps into equivalent 8 bit patterns.
+ */
+
+struct rx_cache
+{
+ struct rx_hash_rules superset_hash_rules;
+
+ /* Objects are allocated by incrementing a pointer that
+ * scans across rx_blocklists.
+ */
+ struct rx_blocklist * memory;
+ struct rx_blocklist * memory_pos;
+ int bytes_left;
+ char * memory_addr;
+ rx_morecore_fn morecore;
+
+ /* Freelists. */
+ struct rx_freelist * free_superstates;
+ struct rx_freelist * free_transition_classes;
+ struct rx_freelist * free_discernable_futures;
+ struct rx_freelist * free_supersets;
+ struct rx_freelist * free_hash;
+
+ /* Two sets of superstates -- those that are semifreed, and those
+ * that are being used.
+ */
+ struct rx_superstate * lru_superstate;
+ struct rx_superstate * semifree_superstate;
+
+ struct rx_superset * empty_superset;
+
+ int superstates;
+ int semifree_superstates;
+ int hits;
+ int misses;
+ int superstates_allowed;
+
+ int local_cset_size;
+ void ** instruction_table;
+
+ struct rx_hash superset_table;
+};
+
+
+
+/* The lowest-level search function supports arbitrarily fragmented
+ * strings and (optionally) suspendable/resumable searches.
+ *
+ * Callers have to provide a few hooks.
+ */
+
+#ifndef __GNUC__
+#ifdef __STDC__
+#define __const__ const
+#else
+#define __const__
+#endif
+#endif
+
+/* This holds a matcher position */
+struct rx_string_position
+{
+ __const__ unsigned char * pos; /* The current pos. */
+ __const__ unsigned char * string; /* The current string burst. */
+ __const__ unsigned char * end; /* First invalid position >= POS. */
+ int offset; /* Integer address of the current burst. */
+ int size; /* Current string's size. */
+ int search_direction; /* 1 or -1 */
+ int search_end; /* First position to not try. */
+};
+
+
+enum rx_get_burst_return
+{
+ rx_get_burst_continuation,
+ rx_get_burst_error,
+ rx_get_burst_ok,
+ rx_get_burst_no_more
+};
+
+
+/* A call to get burst should make POS valid. It might be invalid
+ * if the STRING field doesn't point to a burst that actually
+ * contains POS.
+ *
+ * GET_BURST should take a clue from SEARCH_DIRECTION (1 or -1) as to
+ * whether or not to pad to the left. Padding to the right is always
+ * appropriate, but need not go past the point indicated by STOP.
+ *
+ * If a continuation is returned, then the reentering call to
+ * a search function will retry the get_burst.
+ */
+
+#ifdef __STDC__
+typedef enum rx_get_burst_return
+ (*rx_get_burst_fn) (struct rx_string_position * pos,
+ void * app_closure,
+ int stop);
+
+#else
+typedef enum rx_get_burst_return (*rx_get_burst_fn) ();
+#endif
+
+
+enum rx_back_check_return
+{
+ rx_back_check_continuation,
+ rx_back_check_error,
+ rx_back_check_pass,
+ rx_back_check_fail
+};
+
+/* Back_check should advance the position it is passed
+ * over rparen - lparen characters and return pass iff
+ * the characters starting at POS match those indexed
+ * by [LPAREN..RPAREN].
+ *
+ * If a continuation is returned, then the reentering call to
+ * a search function will retry the back_check.
+ */
+
+#ifdef __STDC__
+typedef enum rx_back_check_return
+ (*rx_back_check_fn) (struct rx_string_position * pos,
+ int lparen,
+ int rparen,
+ unsigned char * translate,
+ void * app_closure,
+ int stop);
+
+#else
+typedef enum rx_back_check_return (*rx_back_check_fn) ();
+#endif
+
+
+
+
+/* A call to fetch_char should return the character at POS or POS + 1.
+ * Returning continuations here isn't supported. OFFSET is either 0 or 1
+ * and indicates which characters is desired.
+ */
+
+#ifdef __STDC__
+typedef int (*rx_fetch_char_fn) (struct rx_string_position * pos,
+ int offset,
+ void * app_closure,
+ int stop);
+#else
+typedef int (*rx_fetch_char_fn) ();
+#endif
+
+
+enum rx_search_return
+{
+ rx_search_continuation = -4,
+ rx_search_error = -3,
+ rx_search_soft_fail = -2, /* failed by running out of string */
+ rx_search_fail = -1 /* failed only by reaching failure states */
+ /* return values >= 0 indicate the position of a successful match */
+};
+
+
+
+
+
+
+/* regex.h
+ *
+ * The remaining declarations replace regex.h.
+ */
+
+/* This is an array of error messages corresponding to the error codes.
+ */
+extern __const__ char *re_error_msg[];
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* The regex.c support, as a client of rx, defines a set of possible
+ * side effects that can be added to the edge lables of nfa edges.
+ * Here is the list of sidef effects in use.
+ */
+
+enum re_side_effects
+{
+#define RX_WANT_SE_DEFS 1
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#define RX_DEF_SE(IDEM, NAME, VALUE) NAME VALUE,
+#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) NAME VALUE,
+#include <regex.h>
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#undef RX_WANT_SE_DEFS
+ re_floogle_flap = 65533
+};
+
+/* These hold paramaters for the kinds of side effects that are possible
+ * in the supported pattern languages. These include things like the
+ * numeric bounds of {} operators and the index of paren registers for
+ * subexpression measurement or backreferencing.
+ */
+struct re_se_params
+{
+ enum re_side_effects se;
+ int op1;
+ int op2;
+};
+
+typedef unsigned reg_syntax_t;
+
+struct re_pattern_buffer
+{
+ struct rx rx;
+ reg_syntax_t syntax; /* See below for syntax bit definitions. */
+
+ unsigned int no_sub:1; /* If set, don't return register offsets. */
+ unsigned int not_bol:1; /* If set, the anchors ('^' and '$') don't */
+ unsigned int not_eol:1; /* match at the ends of the string. */
+ unsigned int newline_anchor:1;/* If true, an anchor at a newline matches.*/
+ unsigned int least_subs:1; /* If set, and returning registers, return
+ * as few values as possible. Only
+ * backreferenced groups and group 0 (the whole
+ * match) will be returned.
+ */
+
+ /* If true, this says that the matcher should keep registers on its
+ * backtracking stack. For many patterns, we can easily determine that
+ * this isn't necessary.
+ */
+ unsigned int match_regs_on_stack:1;
+ unsigned int search_regs_on_stack:1;
+
+ /* is_anchored and begbuf_only are filled in by rx_compile. */
+ unsigned int is_anchored:1; /* Anchorded by ^? */
+ unsigned int begbuf_only:1; /* Anchored to char position 0? */
+
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ * for `max (RE_NREGS, re_nsub + 1)' groups.
+ * If REGS_REALLOCATE, reallocate space if necessary.
+ * If REGS_FIXED, use what's there.
+ */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned int regs_allocated:2;
+
+
+ /* Either a translate table to apply to all characters before
+ * comparing them, or zero for no translation. The translation
+ * is applied to a pattern when it is compiled and to a string
+ * when it is matched.
+ */
+ unsigned char * translate;
+
+ /* If this is a valid pointer, it tells rx not to store the extents of
+ * certain subexpressions (those corresponding to non-zero entries).
+ * Passing 0x1 is the same as passing an array of all ones. Passing 0x0
+ * is the same as passing an array of all zeros.
+ * The array should contain as many entries as their are subexps in the
+ * regexp.
+ *
+ * For POSIX compatability, when using regcomp and regexec this field
+ * is zeroed and ignored.
+ */
+ char * syntax_parens;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ void * buffer; /* Malloced memory for the nfa. */
+ unsigned long allocated; /* Size of that memory. */
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ * the fastmap, if there is one, to skip over impossible
+ * starting points for matches. */
+ char *fastmap;
+
+ unsigned int fastmap_accurate:1; /* These three are internal. */
+ unsigned int can_match_empty:1;
+ struct rx_nfa_state * start; /* The nfa starting state. */
+
+ /* This is the list of iterator bounds for {lo,hi} constructs.
+ * The memory pointed to is part of the rx->buffer.
+ */
+ struct re_se_params *se_params;
+
+ /* This is a bitset representation of the fastmap.
+ * This is a true fastmap that already takes the translate
+ * table into account.
+ */
+ rx_Bitset fastset;
+};
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+ IF not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) \
+ & ~(RE_DOT_NOT_NULL|RE_INTERVALS))
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+/* if sizeof(int) == 2, then ((1 << 15) - 1) overflows */
+#define RE_DUP_MAX (0x7fff)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ * `re_match_2' returns information about at least this many registers
+ * the first time a `regs' structure is passed.
+ *
+ * Also, this is the greatest number of backreferenced subexpressions
+ * allowed in a pattern being matched without caller-supplied registers.
+ */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+extern int rx_cache_bound;
+extern char rx_version_string[];
+
+
+
+#ifdef RX_WANT_RX_DEFS
+
+/* This is decls to the interesting subsystems and lower layers
+ * of rx. Everything which doesn't have a public counterpart in
+ * regex.c is declared here.
+ */
+
+
+#ifdef __STDC__
+typedef void (*rx_hash_freefn) (struct rx_hash_item * it);
+#else /* ndef __STDC__ */
+typedef void (*rx_hash_freefn) ();
+#endif /* ndef __STDC__ */
+
+
+
+
+#ifdef __STDC__
+RX_DECL int rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL int rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL int rx_bitset_empty (int size, rx_Bitset set);
+RX_DECL void rx_bitset_null (int size, rx_Bitset b);
+RX_DECL void rx_bitset_universe (int size, rx_Bitset b);
+RX_DECL void rx_bitset_complement (int size, rx_Bitset b);
+RX_DECL void rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL void rx_bitset_union (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL void rx_bitset_intersection (int size,
+ rx_Bitset a, rx_Bitset b);
+RX_DECL void rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL void rx_bitset_revdifference (int size,
+ rx_Bitset a, rx_Bitset b);
+RX_DECL void rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b);
+RX_DECL unsigned long rx_bitset_hash (int size, rx_Bitset b);
+RX_DECL struct rx_hash_item * rx_hash_find (struct rx_hash * table,
+ unsigned long hash,
+ void * value,
+ struct rx_hash_rules * rules);
+RX_DECL struct rx_hash_item * rx_hash_store (struct rx_hash * table,
+ unsigned long hash,
+ void * value,
+ struct rx_hash_rules * rules);
+RX_DECL void rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules);
+RX_DECL void rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn,
+ struct rx_hash_rules * rules);
+RX_DECL rx_Bitset rx_cset (struct rx *rx);
+RX_DECL rx_Bitset rx_copy_cset (struct rx *rx, rx_Bitset a);
+RX_DECL void rx_free_cset (struct rx * rx, rx_Bitset c);
+RX_DECL struct rexp_node * rexp_node (struct rx *rx,
+ enum rexp_node_type type);
+RX_DECL struct rexp_node * rx_mk_r_cset (struct rx * rx,
+ rx_Bitset b);
+RX_DECL struct rexp_node * rx_mk_r_concat (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b);
+RX_DECL struct rexp_node * rx_mk_r_alternate (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b);
+RX_DECL struct rexp_node * rx_mk_r_opt (struct rx * rx,
+ struct rexp_node * a);
+RX_DECL struct rexp_node * rx_mk_r_star (struct rx * rx,
+ struct rexp_node * a);
+RX_DECL struct rexp_node * rx_mk_r_2phase_star (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b);
+RX_DECL struct rexp_node * rx_mk_r_side_effect (struct rx * rx,
+ rx_side_effect a);
+RX_DECL struct rexp_node * rx_mk_r_data (struct rx * rx,
+ void * a);
+RX_DECL void rx_free_rexp (struct rx * rx, struct rexp_node * node);
+RX_DECL struct rexp_node * rx_copy_rexp (struct rx *rx,
+ struct rexp_node *node);
+RX_DECL struct rx_nfa_state * rx_nfa_state (struct rx *rx);
+RX_DECL void rx_free_nfa_state (struct rx_nfa_state * n);
+RX_DECL struct rx_nfa_state * rx_id_to_nfa_state (struct rx * rx,
+ int id);
+RX_DECL struct rx_nfa_edge * rx_nfa_edge (struct rx *rx,
+ enum rx_nfa_etype type,
+ struct rx_nfa_state *start,
+ struct rx_nfa_state *dest);
+RX_DECL void rx_free_nfa_edge (struct rx_nfa_edge * e);
+RX_DECL void rx_free_nfa (struct rx *rx);
+RX_DECL int rx_build_nfa (struct rx *rx,
+ struct rexp_node *rexp,
+ struct rx_nfa_state **start,
+ struct rx_nfa_state **end);
+RX_DECL void rx_name_nfa_states (struct rx *rx);
+RX_DECL int rx_eclose_nfa (struct rx *rx);
+RX_DECL void rx_delete_epsilon_transitions (struct rx *rx);
+RX_DECL int rx_compactify_nfa (struct rx *rx,
+ void **mem, unsigned long *size);
+RX_DECL void rx_release_superset (struct rx *rx,
+ struct rx_superset *set);
+RX_DECL struct rx_superset * rx_superset_cons (struct rx * rx,
+ struct rx_nfa_state *car, struct rx_superset *cdr);
+RX_DECL struct rx_superset * rx_superstate_eclosure_union
+ (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl);
+RX_DECL struct rx_superstate * rx_superstate (struct rx *rx,
+ struct rx_superset *set);
+RX_DECL struct rx_inx * rx_handle_cache_miss
+ (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data);
+RX_DECL reg_errcode_t rx_compile (__const__ char *pattern, int size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer * rxb);
+RX_DECL void rx_blow_up_fastmap (struct re_pattern_buffer * rxb);
+#else /* STDC */
+RX_DECL int rx_bitset_is_equal ();
+RX_DECL int rx_bitset_is_subset ();
+RX_DECL int rx_bitset_empty ();
+RX_DECL void rx_bitset_null ();
+RX_DECL void rx_bitset_universe ();
+RX_DECL void rx_bitset_complement ();
+RX_DECL void rx_bitset_assign ();
+RX_DECL void rx_bitset_union ();
+RX_DECL void rx_bitset_intersection ();
+RX_DECL void rx_bitset_difference ();
+RX_DECL void rx_bitset_revdifference ();
+RX_DECL void rx_bitset_xor ();
+RX_DECL unsigned long rx_bitset_hash ();
+RX_DECL struct rx_hash_item * rx_hash_find ();
+RX_DECL struct rx_hash_item * rx_hash_store ();
+RX_DECL void rx_hash_free ();
+RX_DECL void rx_free_hash_table ();
+RX_DECL rx_Bitset rx_cset ();
+RX_DECL rx_Bitset rx_copy_cset ();
+RX_DECL void rx_free_cset ();
+RX_DECL struct rexp_node * rexp_node ();
+RX_DECL struct rexp_node * rx_mk_r_cset ();
+RX_DECL struct rexp_node * rx_mk_r_concat ();
+RX_DECL struct rexp_node * rx_mk_r_alternate ();
+RX_DECL struct rexp_node * rx_mk_r_opt ();
+RX_DECL struct rexp_node * rx_mk_r_star ();
+RX_DECL struct rexp_node * rx_mk_r_2phase_star ();
+RX_DECL struct rexp_node * rx_mk_r_side_effect ();
+RX_DECL struct rexp_node * rx_mk_r_data ();
+RX_DECL void rx_free_rexp ();
+RX_DECL struct rexp_node * rx_copy_rexp ();
+RX_DECL struct rx_nfa_state * rx_nfa_state ();
+RX_DECL void rx_free_nfa_state ();
+RX_DECL struct rx_nfa_state * rx_id_to_nfa_state ();
+RX_DECL struct rx_nfa_edge * rx_nfa_edge ();
+RX_DECL void rx_free_nfa_edge ();
+RX_DECL void rx_free_nfa ();
+RX_DECL int rx_build_nfa ();
+RX_DECL void rx_name_nfa_states ();
+RX_DECL int rx_eclose_nfa ();
+RX_DECL void rx_delete_epsilon_transitions ();
+RX_DECL int rx_compactify_nfa ();
+RX_DECL void rx_release_superset ();
+RX_DECL struct rx_superset * rx_superset_cons ();
+RX_DECL struct rx_superset * rx_superstate_eclosure_union ();
+RX_DECL struct rx_superstate * rx_superstate ();
+RX_DECL struct rx_inx * rx_handle_cache_miss ();
+RX_DECL reg_errcode_t rx_compile ();
+RX_DECL void rx_blow_up_fastmap ();
+#endif /* STDC */
+
+
+#endif /* RX_WANT_RX_DEFS */
+
+
+
+#ifdef __STDC__
+extern int re_search_2 (struct re_pattern_buffer *rxb,
+ __const__ char * string1, int size1,
+ __const__ char * string2, int size2,
+ int startpos, int range,
+ struct re_registers *regs,
+ int stop);
+extern int re_search (struct re_pattern_buffer * rxb, __const__ char *string,
+ int size, int startpos, int range,
+ struct re_registers *regs);
+extern int re_match_2 (struct re_pattern_buffer * rxb,
+ __const__ char * string1, int size1,
+ __const__ char * string2, int size2,
+ int pos, struct re_registers *regs, int stop);
+extern int re_match (struct re_pattern_buffer * rxb,
+ __const__ char * string,
+ int size, int pos,
+ struct re_registers *regs);
+extern reg_syntax_t re_set_syntax (reg_syntax_t syntax);
+extern void re_set_registers (struct re_pattern_buffer *bufp,
+ struct re_registers *regs,
+ unsigned num_regs,
+ regoff_t * starts, regoff_t * ends);
+extern __const__ char * re_compile_pattern (__const__ char *pattern,
+ int length,
+ struct re_pattern_buffer * rxb);
+extern int re_compile_fastmap (struct re_pattern_buffer * rxb);
+extern char * re_comp (__const__ char *s);
+extern int re_exec (__const__ char *s);
+extern int regcomp (regex_t * preg, __const__ char * pattern, int cflags);
+extern int regexec (__const__ regex_t *preg, __const__ char *string,
+ size_t nmatch, regmatch_t pmatch[],
+ int eflags);
+extern size_t regerror (int errcode, __const__ regex_t *preg,
+ char *errbuf, size_t errbuf_size);
+extern void regfree (regex_t *preg);
+
+#else /* STDC */
+extern int re_search_2 ();
+extern int re_search ();
+extern int re_match_2 ();
+extern int re_match ();
+extern reg_syntax_t re_set_syntax ();
+extern void re_set_registers ();
+extern __const__ char * re_compile_pattern ();
+extern int re_compile_fastmap ();
+extern char * re_comp ();
+extern int re_exec ();
+extern int regcomp ();
+extern int regexec ();
+extern size_t regerror ();
+extern void regfree ();
+
+#endif /* STDC */
+
+
+
+#ifdef RX_WANT_RX_DEFS
+
+struct rx_counter_frame
+{
+ int tag;
+ int val;
+ struct rx_counter_frame * inherited_from; /* If this is a copy. */
+ struct rx_counter_frame * cdr;
+};
+
+struct rx_backtrack_frame
+{
+ char * counter_stack_sp;
+
+ /* A frame is used to save the matchers state when it crosses a
+ * backtracking point. The `stk_' fields correspond to variables
+ * in re_search_2 (just strip off thes `stk_'). They are documented
+ * tere.
+ */
+ struct rx_superstate * stk_super;
+ unsigned int stk_c;
+ struct rx_string_position stk_test_pos;
+ int stk_last_l;
+ int stk_last_r;
+ int stk_test_ret;
+
+ /* This is the list of options left to explore at the backtrack
+ * point for which this frame was created.
+ */
+ struct rx_distinct_future * df;
+ struct rx_distinct_future * first_df;
+
+#ifdef RX_DEBUG
+ int stk_line_no;
+#endif
+};
+
+struct rx_stack_chunk
+{
+ struct rx_stack_chunk * next_chunk;
+ int bytes_left;
+ char * sp;
+};
+
+enum rx_outer_entry
+{
+ rx_outer_start,
+ rx_outer_fastmap,
+ rx_outer_test,
+ rx_outer_restore_pos
+};
+
+enum rx_fastmap_return
+{
+ rx_fastmap_continuation,
+ rx_fastmap_error,
+ rx_fastmap_ok,
+ rx_fastmap_fail
+};
+
+enum rx_fastmap_entry
+{
+ rx_fastmap_start,
+ rx_fastmap_string_break
+};
+
+enum rx_test_return
+{
+ rx_test_continuation,
+ rx_test_error,
+ rx_test_fail,
+ rx_test_ok
+};
+
+enum rx_test_internal_return
+{
+ rx_test_internal_error,
+ rx_test_found_first,
+ rx_test_line_finished
+};
+
+enum rx_test_match_entry
+{
+ rx_test_start,
+ rx_test_cache_hit_loop,
+ rx_test_backreference_check,
+ rx_test_backtrack_return
+};
+
+struct rx_search_state
+{
+ /* Two groups of registers are kept. The group with the register state
+ * of the current test match, and the group that holds the state at the end
+ * of the best known match, if any.
+ *
+ * For some patterns, there may also be registers saved on the stack.
+ */
+ unsigned num_regs; /* Includes an element for register zero. */
+ regoff_t * lparen; /* scratch space for register returns */
+ regoff_t * rparen;
+ regoff_t * best_lpspace; /* in case the user doesn't want these */
+ regoff_t * best_rpspace; /* values, we still need space to store
+ * them. Normally, this memoryis unused
+ * and the space pointed to by REGS is
+ * used instead.
+ */
+
+ int last_l; /* Highest index of a valid lparen. */
+ int last_r; /* It's dual. */
+
+ int * best_lparen; /* This contains the best known register */
+ int * best_rparen; /* assignments.
+ * This may point to the same mem as
+ * best_lpspace, or it might point to memory
+ * passed by the caller.
+ */
+ int best_last_l; /* best_last_l:best_lparen::last_l:lparen */
+ int best_last_r;
+
+
+ unsigned char * translate;
+
+ struct rx_string_position outer_pos;
+
+ struct rx_superstate * start_super;
+ int nfa_choice;
+ int first_found; /* If true, return after finding any match. */
+ int ret_val;
+
+ /* For continuations... */
+ enum rx_outer_entry outer_search_resume_pt;
+ struct re_pattern_buffer * saved_rxb;
+ int saved_startpos;
+ int saved_range;
+ int saved_stop;
+ int saved_total_size;
+ rx_get_burst_fn saved_get_burst;
+ rx_back_check_fn saved_back_check;
+ struct re_registers * saved_regs;
+
+ /**
+ ** state for fastmap
+ **/
+ char * fastmap;
+ int fastmap_chr;
+ int fastmap_val;
+
+ /* for continuations in the fastmap procedure: */
+ enum rx_fastmap_entry fastmap_resume_pt;
+
+ /**
+ ** state for test_match
+ **/
+
+ /* The current superNFA position of the matcher. */
+ struct rx_superstate * super;
+
+ /* The matcher interprets a series of instruction frames.
+ * This is the `instruction counter' for the interpretation.
+ */
+ struct rx_inx * ifr;
+
+ /* We insert a ghost character in the string to prime
+ * the nfa. test_pos.pos, test_pos.str_half, and test_pos.end_half
+ * keep track of the test-match position and string-half.
+ */
+ unsigned char c;
+
+ /* Position within the string. */
+ struct rx_string_position test_pos;
+
+ struct rx_stack_chunk * counter_stack;
+ struct rx_stack_chunk * backtrack_stack;
+ int backtrack_frame_bytes;
+ int chunk_bytes;
+ struct rx_stack_chunk * free_chunks;
+
+ /* To return from this function, set test_ret and
+ * `goto test_do_return'.
+ *
+ * Possible return values are:
+ * 1 --- end of string while the superNFA is still going
+ * 0 --- internal error (out of memory)
+ * -1 --- search completed by reaching the superNFA fail state
+ * -2 --- a match was found, maybe not the longest.
+ *
+ * When the search is complete (-1), best_last_r indicates whether
+ * a match was found.
+ *
+ * -2 is return only if search_state.first_found is non-zero.
+ *
+ * if search_state.first_found is non-zero, a return of -1 indicates no match,
+ * otherwise, best_last_r has to be checked.
+ */
+ int test_ret;
+
+ int could_have_continued;
+
+#ifdef RX_DEBUG
+ int backtrack_depth;
+ /* There is a search tree with every node as set of deterministic
+ * transitions in the super nfa. For every branch of a
+ * backtrack point is an edge in the tree.
+ * This counts up a pre-order of nodes in that tree.
+ * It's saved on the search stack and printed when debugging.
+ */
+ int line_no;
+ int lines_found;
+#endif
+
+
+ /* For continuations within the match tester */
+ enum rx_test_match_entry test_match_resume_pt;
+ struct rx_inx * saved_next_tr_table;
+ struct rx_inx * saved_this_tr_table;
+ int saved_reg;
+ struct rx_backtrack_frame * saved_bf;
+
+};
+static __inline__ void init_fastmap( struct re_pattern_buffer *,
+ struct rx_search_state * );
+
+
+extern char rx_slowmap[];
+extern unsigned char rx_id_translation[];
+
+static __inline__ void
+init_fastmap( struct re_pattern_buffer * rxb,
+ struct rx_search_state * search_state )
+{
+ search_state->fastmap = (rxb->fastmap
+ ? (char *)rxb->fastmap
+ : (char *)rx_slowmap);
+ /* Update the fastmap now if not correct already.
+ * When the regexp was compiled, the fastmap was computed
+ * and stored in a bitset. This expands the bitset into a
+ * character array containing 1s and 0s.
+ */
+ if ((search_state->fastmap == rxb->fastmap) && !rxb->fastmap_accurate)
+ rx_blow_up_fastmap (rxb);
+ search_state->fastmap_chr = -1;
+ search_state->fastmap_val = 0;
+ search_state->fastmap_resume_pt = rx_fastmap_start;
+}
+
+static __inline__ void
+uninit_fastmap ( struct re_pattern_buffer * rxb,
+ struct rx_search_state * search_state )
+{
+ /* Unset the fastmap sentinel */
+ if (search_state->fastmap_chr >= 0)
+ search_state->fastmap[search_state->fastmap_chr]
+ = search_state->fastmap_val;
+}
+
+static __inline__ int
+fastmap_search ( struct re_pattern_buffer * rxb, int stop,
+ rx_get_burst_fn get_burst, void * app_closure,
+ struct rx_search_state * search_state )
+{
+ enum rx_fastmap_entry pc;
+
+ if (0)
+ {
+ return_continuation:
+ search_state->fastmap_resume_pt = pc;
+ return rx_fastmap_continuation;
+ }
+
+ pc = search_state->fastmap_resume_pt;
+
+ switch (pc)
+ {
+ default:
+ return rx_fastmap_error;
+ case rx_fastmap_start:
+ init_fastmap_sentinal:
+ /* For the sake of fast fastmapping, set a sentinal in the fastmap.
+ * This sentinal will trap the fastmap loop when it reaches the last
+ * valid character in a string half.
+ *
+ * This must be reset when the fastmap/search loop crosses a string
+ * boundry, and before returning to the caller. So sometimes,
+ * the fastmap loop is restarted with `continue', othertimes by
+ * `goto init_fastmap_sentinal'.
+ */
+ if (search_state->outer_pos.size)
+ {
+ search_state->fastmap_chr = ((search_state->outer_pos.search_direction == 1)
+ ? *(search_state->outer_pos.end - 1)
+ : *search_state->outer_pos.string);
+ search_state->fastmap_val
+ = search_state->fastmap[search_state->fastmap_chr];
+ search_state->fastmap[search_state->fastmap_chr] = 1;
+ }
+ else
+ {
+ search_state->fastmap_chr = -1;
+ search_state->fastmap_val = 0;
+ }
+
+ if (search_state->outer_pos.pos >= search_state->outer_pos.end)
+ goto fastmap_hit_bound;
+ else
+ {
+ if (search_state->outer_pos.search_direction == 1)
+ {
+ if (search_state->fastmap_val)
+ {
+ for (;;)
+ {
+ while (!search_state->fastmap[*search_state->outer_pos.pos])
+ ++search_state->outer_pos.pos;
+ return rx_fastmap_ok;
+ }
+ }
+ else
+ {
+ for (;;)
+ {
+ while (!search_state->fastmap[*search_state->outer_pos.pos])
+ ++search_state->outer_pos.pos;
+ if (*search_state->outer_pos.pos != search_state->fastmap_chr)
+ return rx_fastmap_ok;
+ else
+ {
+ ++search_state->outer_pos.pos;
+ if (search_state->outer_pos.pos == search_state->outer_pos.end)
+ goto fastmap_hit_bound;
+ }
+ }
+ }
+ }
+ else
+ {
+ __const__ unsigned char * bound;
+ bound = search_state->outer_pos.string - 1;
+ if (search_state->fastmap_val)
+ {
+ for (;;)
+ {
+ while (!search_state->fastmap[*search_state->outer_pos.pos])
+ --search_state->outer_pos.pos;
+ return rx_fastmap_ok;
+ }
+ }
+ else
+ {
+ for (;;)
+ {
+ while (!search_state->fastmap[*search_state->outer_pos.pos])
+ --search_state->outer_pos.pos;
+ if ((*search_state->outer_pos.pos != search_state->fastmap_chr) || search_state->fastmap_val)
+ return rx_fastmap_ok;
+ else
+ {
+ --search_state->outer_pos.pos;
+ if (search_state->outer_pos.pos == bound)
+ goto fastmap_hit_bound;
+ }
+ }
+ }
+ }
+ }
+
+ case rx_fastmap_string_break:
+ fastmap_hit_bound:
+ {
+ /* If we hit a bound, it may be time to fetch another burst
+ * of string, or it may be time to return a continuation to
+ * the caller, or it might be time to fail.
+ */
+
+ int burst_state;
+ burst_state = get_burst (&search_state->outer_pos, app_closure, stop);
+ switch (burst_state)
+ {
+ default:
+ case rx_get_burst_error:
+ return rx_fastmap_error;
+ case rx_get_burst_continuation:
+ {
+ pc = rx_fastmap_string_break;
+ goto return_continuation;
+ }
+ case rx_get_burst_ok:
+ goto init_fastmap_sentinal;
+ case rx_get_burst_no_more:
+ /* ...not a string split, simply no more string.
+ *
+ * When searching backward, running out of string
+ * is reason to quit.
+ *
+ * When searching forward, we allow the possibility
+ * of an (empty) match after the last character in the
+ * virtual string. So, fall through to the matcher
+ */
+ return ( (search_state->outer_pos.search_direction == 1)
+ ? rx_fastmap_ok
+ : rx_fastmap_fail);
+ }
+ }
+ }
+
+}
+
+
+
+#ifdef emacs
+/* The `emacs' switch turns on certain matching commands
+ * that make sense only in Emacs.
+ */
+#include "config.h"
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
+#endif /* emacs */
+
+/* Setting RX_MEMDBUG is useful if you have dbmalloc. Maybe with similar
+ * packages too.
+ */
+#ifdef RX_MEMDBUG
+#include <malloc.h>
+#endif /* RX_RX_MEMDBUG */
+
+/* We used to test for `BSTRING' here, but only GCC and Emacs define
+ * `BSTRING', as far as I know, and neither of them use this code.
+ */
+#if HAVE_STRING_H || __STDC__
+#include <string.h>
+
+#ifndef bcmp
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#endif
+
+#ifndef bcopy
+#define bcopy(s, d, n) memcpy ((d), (s), (n))
+#endif
+
+#ifndef bzero
+#define bzero(s, n) memset ((s), 0, (n))
+#endif
+
+#else /* HAVE_STRING_H || __STDC__ */
+#include <strings.h>
+#endif /* not (HAVE_STRING_H || __STDC__) */
+
+#ifdef __STDC__
+#include <stdlib.h>
+#else /* not __STDC__ */
+char *malloc ();
+char *realloc ();
+#endif /* not __STDC__ */
+
+
+
+
+/* How many characters in the character set. */
+#define CHAR_SET_SIZE (1 << CHARBITS)
+
+#ifndef emacs
+/* Define the syntax basics for \<, \>, etc.
+ * This must be nonzero for the wordchar and notwordchar pattern
+ * commands in re_match_2.
+ */
+#ifndef Sword
+#define Sword 1
+#endif
+#define SYNTAX(c) re_syntax_table[c]
+RX_DECL char re_syntax_table[CHAR_SET_SIZE];
+#endif /* not emacs */
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ * of `string1' and `string2'. If only one string, it's `string2'.
+ */
+
+#define AT_STRINGS_BEG() \
+ ( -1 \
+ == ((search_state.test_pos.pos - search_state.test_pos.string) \
+ + search_state.test_pos.offset))
+
+#define AT_STRINGS_END() \
+ ( (total_size - 1) \
+ == ((search_state.test_pos.pos - search_state.test_pos.string) \
+ + search_state.test_pos.offset))
+
+
+/* Test if POS + 1 points to a character which is word-constituent. We have
+ * two special cases to check for: if past the end of string1, look at
+ * the first character in string2; and if before the beginning of
+ * string2, look at the last character in string1.
+ *
+ * Assumes `string1' exists, so use in conjunction with AT_STRINGS_BEG ().
+ */
+#define LETTER_P(POS,OFF) \
+ ( SYNTAX (fetch_char(POS, OFF, app_closure, stop)) \
+ == Sword)
+
+/* Test if the character at D and the one after D differ with respect
+ * to being word-constituent.
+ */
+#define AT_WORD_BOUNDARY(d) \
+ (AT_STRINGS_BEG () || AT_STRINGS_END () || LETTER_P (d,0) != LETTER_P (d, 1))
+
+
+#ifdef RX_SUPPORT_CONTINUATIONS
+#define RX_STACK_ALLOC(BYTES) malloc(BYTES)
+#define RX_STACK_FREE(MEM) free(MEM)
+#else
+#define RX_STACK_ALLOC(BYTES) alloca(BYTES)
+#define RX_STACK_FREE(MEM) \
+ ((struct rx_stack_chunk *)MEM)->next_chunk = search_state.free_chunks; \
+ search_state.free_chunks = ((struct rx_stack_chunk *)MEM);
+
+#endif
+
+#define PUSH(CHUNK_VAR,BYTES) \
+ if (!CHUNK_VAR || (CHUNK_VAR->bytes_left < (BYTES))) \
+ { \
+ struct rx_stack_chunk * new_chunk; \
+ if (search_state.free_chunks) \
+ { \
+ new_chunk = search_state.free_chunks; \
+ search_state.free_chunks = search_state.free_chunks->next_chunk; \
+ } \
+ else \
+ { \
+ new_chunk = (struct rx_stack_chunk *)RX_STACK_ALLOC(search_state.chunk_bytes); \
+ if (!new_chunk) \
+ { \
+ search_state.ret_val = 0; \
+ goto test_do_return; \
+ } \
+ } \
+ new_chunk->sp = (char *)new_chunk + sizeof (struct rx_stack_chunk); \
+ new_chunk->bytes_left = (search_state.chunk_bytes \
+ - (BYTES) \
+ - sizeof (struct rx_stack_chunk)); \
+ new_chunk->next_chunk = CHUNK_VAR; \
+ CHUNK_VAR = new_chunk; \
+ } \
+ else \
+ (CHUNK_VAR->sp += (BYTES)), (CHUNK_VAR->bytes_left -= (BYTES))
+
+#define POP(CHUNK_VAR,BYTES) \
+ if (CHUNK_VAR->sp == ((char *)CHUNK_VAR + sizeof(*CHUNK_VAR))) \
+ { \
+ struct rx_stack_chunk * new_chunk = CHUNK_VAR->next_chunk; \
+ RX_STACK_FREE(CHUNK_VAR); \
+ CHUNK_VAR = new_chunk; \
+ } \
+ else \
+ (CHUNK_VAR->sp -= BYTES), (CHUNK_VAR->bytes_left += BYTES)
+
+
+
+#define SRCH_TRANSLATE(C) search_state.translate[(unsigned char) (C)]
+
+
+
+
+#ifdef __STDC__
+RX_DECL __inline__ int
+rx_search (struct re_pattern_buffer * rxb,
+ int startpos,
+ int range,
+ int stop,
+ int total_size,
+ rx_get_burst_fn get_burst,
+ rx_back_check_fn back_check,
+ rx_fetch_char_fn fetch_char,
+ void * app_closure,
+ struct re_registers * regs,
+ struct rx_search_state * resume_state,
+ struct rx_search_state * save_state)
+#else
+RX_DECL __inline__ int
+rx_search (rxb, startpos, range, stop, total_size,
+ get_burst, back_check, fetch_char,
+ app_closure, regs, resume_state, save_state)
+ struct re_pattern_buffer * rxb;
+ int startpos;
+ int range;
+ int stop;
+ int total_size;
+ rx_get_burst_fn get_burst;
+ rx_back_check_fn back_check;
+ rx_fetch_char_fn fetch_char;
+ void * app_closure;
+ struct re_registers * regs;
+ struct rx_search_state * resume_state;
+ struct rx_search_state * save_state;
+#endif
+{
+ int pc;
+ int test_state;
+ struct rx_search_state search_state;
+
+ search_state.free_chunks = 0;
+ if (!resume_state)
+ pc = rx_outer_start;
+ else
+ {
+ search_state = *resume_state;
+ regs = search_state.saved_regs;
+ rxb = search_state.saved_rxb;
+ startpos = search_state.saved_startpos;
+ range = search_state.saved_range;
+ stop = search_state.saved_stop;
+ total_size = search_state.saved_total_size;
+ get_burst = search_state.saved_get_burst;
+ back_check = search_state.saved_back_check;
+ pc = search_state.outer_search_resume_pt;
+ if (0)
+ {
+ return_continuation:
+ if (save_state)
+ {
+ *save_state = search_state;
+ save_state->saved_regs = regs;
+ save_state->saved_rxb = rxb;
+ save_state->saved_startpos = startpos;
+ save_state->saved_range = range;
+ save_state->saved_stop = stop;
+ save_state->saved_total_size = total_size;
+ save_state->saved_get_burst = get_burst;
+ save_state->saved_back_check = back_check;
+ save_state->outer_search_resume_pt = pc;
+ }
+ return rx_search_continuation;
+ }
+ }
+
+ switch (pc)
+ {
+ case rx_outer_start:
+ search_state.ret_val = rx_search_fail;
+ ( search_state.lparen
+ = search_state.rparen
+ = search_state.best_lpspace
+ = search_state.best_rpspace
+ = 0);
+
+ /* figure the number of registers we may need for use in backreferences.
+ * the number here includes an element for register zero.
+ */
+ search_state.num_regs = rxb->re_nsub + 1;
+
+
+ /* check for out-of-range startpos. */
+ if ((startpos < 0) || (startpos > total_size))
+ return rx_search_fail;
+
+ /* fix up range if it might eventually take us outside the string. */
+ {
+ int endpos;
+ endpos = startpos + range;
+ if (endpos < -1)
+ range = (-1 - startpos);
+ else if (endpos > (total_size + 1))
+ range = total_size - startpos;
+ }
+
+ /* if the search isn't to be a backwards one, don't waste time in a
+ * long search for a pattern that says it is anchored.
+ */
+ if (rxb->begbuf_only && (range > 0))
+ {
+ if (startpos > 0)
+ return rx_search_fail;
+ else
+ range = 1;
+ }
+
+ /* decide whether to use internal or user-provided reg buffers. */
+ if (!regs || rxb->no_sub)
+ {
+ search_state.best_lpspace =
+ (regoff_t *)REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
+ search_state.best_rpspace =
+ (regoff_t *)REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
+ search_state.best_lparen = search_state.best_lpspace;
+ search_state.best_rparen = search_state.best_rpspace;
+ }
+ else
+ {
+ /* have the register data arrays been allocated? */
+ if (rxb->regs_allocated == REGS_UNALLOCATED)
+ { /* no. so allocate them with malloc. we need one
+ extra element beyond `search_state.num_regs' for the `-1' marker
+ gnu code uses. */
+ regs->num_regs = MAX (RE_NREGS, rxb->re_nsub + 1);
+ regs->start = ((regoff_t *)
+ malloc (regs->num_regs * sizeof ( regoff_t)));
+ regs->end = ((regoff_t *)
+ malloc (regs->num_regs * sizeof ( regoff_t)));
+ if (regs->start == 0 || regs->end == 0)
+ return rx_search_error;
+ rxb->regs_allocated = REGS_REALLOCATE;
+ }
+ else if (rxb->regs_allocated == REGS_REALLOCATE)
+ { /* yes. if we need more elements than were already
+ allocated, reallocate them. if we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < search_state.num_regs + 1)
+ {
+ regs->num_regs = search_state.num_regs + 1;
+ regs->start = ((regoff_t *)
+ realloc (regs->start,
+ regs->num_regs * sizeof (regoff_t)));
+ regs->end = ((regoff_t *)
+ realloc (regs->end,
+ regs->num_regs * sizeof ( regoff_t)));
+ if (regs->start == 0 || regs->end == 0)
+ return rx_search_error;
+ }
+ }
+ else if (rxb->regs_allocated != REGS_FIXED)
+ return rx_search_error;
+
+ if (regs->num_regs < search_state.num_regs + 1)
+ {
+ search_state.best_lpspace =
+ ((regoff_t *)
+ REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t)));
+ search_state.best_rpspace =
+ ((regoff_t *)
+ REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t)));
+ search_state.best_lparen = search_state.best_lpspace;
+ search_state.best_rparen = search_state.best_rpspace;
+ }
+ else
+ {
+ search_state.best_lparen = regs->start;
+ search_state.best_rparen = regs->end;
+ }
+ }
+
+ search_state.lparen =
+ (regoff_t *) REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
+ search_state.rparen =
+ (regoff_t *) REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t));
+
+ if (! ( search_state.best_rparen
+ && search_state.best_lparen
+ && search_state.lparen && search_state.rparen))
+ return rx_search_error;
+
+ search_state.best_last_l = search_state.best_last_r = -1;
+
+ search_state.translate = (rxb->translate
+ ? rxb->translate
+ : rx_id_translation);
+
+
+
+ /*
+ * two nfa's were compiled.
+ * `0' is complete.
+ * `1' faster but gets registers wrong and ends too soon.
+ */
+ search_state.nfa_choice = (regs && !rxb->least_subs) ? '\0' : '\1';
+
+ /* we have the option to look for the best match or the first
+ * one we can find. if the user isn't asking for register information,
+ * we don't need to find the best match.
+ */
+ search_state.first_found = !regs;
+
+ if (range >= 0)
+ {
+ search_state.outer_pos.search_end = startpos + range;
+ search_state.outer_pos.search_direction = 1;
+ }
+ else
+ {
+ search_state.outer_pos.search_end = startpos + range;
+ search_state.outer_pos.search_direction = -1;
+ }
+
+ /* the vacuous search always turns up nothing. */
+ if ((search_state.outer_pos.search_direction == 1)
+ ? (startpos > search_state.outer_pos.search_end)
+ : (startpos < search_state.outer_pos.search_end))
+ return rx_search_fail;
+
+ /* now we build the starting state of the supernfa. */
+ {
+ struct rx_superset * start_contents;
+ struct rx_nfa_state_set * start_nfa_set;
+
+ /* we presume here that the nfa start state has only one
+ * possible future with no side effects.
+ */
+ start_nfa_set = rxb->start->futures->destset;
+ if ( rxb->rx.start_set
+ && (rxb->rx.start_set->starts_for == &rxb->rx))
+ start_contents = rxb->rx.start_set;
+ else
+ {
+ start_contents =
+ rx_superstate_eclosure_union (&rxb->rx,
+ rx_superset_cons (&rxb->rx, 0, 0),
+ start_nfa_set);
+
+ if (!start_contents)
+ return rx_search_fail;
+
+ start_contents->starts_for = &rxb->rx;
+ rxb->rx.start_set = start_contents;
+ }
+ if ( start_contents->superstate
+ && (start_contents->superstate->rx_id == rxb->rx.rx_id))
+ {
+ search_state.start_super = start_contents->superstate;
+ rx_lock_superstate (&rxb->rx, search_state.start_super);
+ }
+ else
+ {
+ rx_protect_superset (&rxb->rx, start_contents);
+
+ search_state.start_super = rx_superstate (&rxb->rx, start_contents);
+ if (!search_state.start_super)
+ return rx_search_fail;
+ rx_lock_superstate (&rxb->rx, search_state.start_super);
+ rx_release_superset (&rxb->rx, start_contents);
+ }
+ }
+
+
+ /* The outer_pos tracks the position within the strings
+ * as seen by loop that calls fastmap_search.
+ *
+ * The caller supplied get_burst function actually
+ * gives us pointers to chars.
+ *
+ * Communication with the get_burst function is through an
+ * rx_string_position structure. Here, the structure for
+ * outer_pos is initialized. It is set to point to the
+ * NULL string, at an offset of STARTPOS. STARTPOS is out
+ * of range of the NULL string, so the first call to
+ * getburst will patch up the rx_string_position to point
+ * to valid characters.
+ */
+
+ ( search_state.outer_pos.string
+ = search_state.outer_pos.end
+ = 0);
+
+ search_state.outer_pos.offset = 0;
+ search_state.outer_pos.size = 0;
+ search_state.outer_pos.pos = (unsigned char *)startpos;
+ init_fastmap (rxb, &search_state);
+
+ search_state.fastmap_resume_pt = rx_fastmap_start;
+ case rx_outer_fastmap:
+ /* do { */
+ pseudo_do:
+ {
+ {
+ int fastmap_state;
+ fastmap_state = fastmap_search (rxb, stop, get_burst, app_closure,
+ &search_state);
+ switch (fastmap_state)
+ {
+ case rx_fastmap_continuation:
+ pc = rx_outer_fastmap;
+ goto return_continuation;
+ case rx_fastmap_fail:
+ goto finish;
+ case rx_fastmap_ok:
+ break;
+ }
+ }
+
+ /* now the fastmap loop has brought us to a plausible
+ * starting point for a match. so, it's time to run the
+ * nfa and see if a match occured.
+ */
+ startpos = ( search_state.outer_pos.pos
+ - search_state.outer_pos.string
+ + search_state.outer_pos.offset);
+#if 0
+/*|*/ if ((range > 0) && (startpos == search_state.outer_pos.search_end))
+/*|*/ goto finish;
+#endif
+ }
+
+ search_state.test_match_resume_pt = rx_test_start;
+ /* do interrupted for entry point... */
+ case rx_outer_test:
+ /* ...do continued */
+ {
+ goto test_match;
+ test_returns_to_search:
+ switch (test_state)
+ {
+ case rx_test_continuation:
+ pc = rx_outer_test;
+ goto return_continuation;
+ case rx_test_error:
+ search_state.ret_val = rx_search_error;
+ goto finish;
+ case rx_test_fail:
+ break;
+ case rx_test_ok:
+ goto finish;
+ }
+ search_state.outer_pos.pos += search_state.outer_pos.search_direction;
+ startpos += search_state.outer_pos.search_direction;
+#if 0
+/*|*/ if (search_state.test_pos.pos < search_state.test_pos.end)
+/*|*/ break;
+#endif
+ }
+ /* do interrupted for entry point... */
+ case rx_outer_restore_pos:
+ {
+ int x;
+ x = get_burst (&search_state.outer_pos, app_closure, stop);
+ switch (x)
+ {
+ case rx_get_burst_continuation:
+ pc = rx_outer_restore_pos;
+ goto return_continuation;
+ case rx_get_burst_error:
+ search_state.ret_val = rx_search_error;
+ goto finish;
+ case rx_get_burst_no_more:
+ if (rxb->can_match_empty)
+ break;
+ goto finish;
+ case rx_get_burst_ok:
+ break;
+ }
+ } /* } while (...see below...) */
+
+ if ((search_state.outer_pos.search_direction == 1)
+ ? (startpos <= search_state.outer_pos.search_end)
+ : (startpos > search_state.outer_pos.search_end))
+ goto pseudo_do;
+
+
+ finish:
+ uninit_fastmap (rxb, &search_state);
+ if (search_state.start_super)
+ rx_unlock_superstate (&rxb->rx, search_state.start_super);
+
+#ifdef regex_malloc
+ if (search_state.lparen) free (search_state.lparen);
+ if (search_state.rparen) free (search_state.rparen);
+ if (search_state.best_lpspace) free (search_state.best_lpspace);
+ if (search_state.best_rpspace) free (search_state.best_rpspace);
+#endif
+ return search_state.ret_val;
+ }
+
+
+ test_match:
+ {
+ enum rx_test_match_entry test_pc;
+ int inx;
+ test_pc = search_state.test_match_resume_pt;
+ if (test_pc == rx_test_start)
+ {
+#ifdef RX_DEBUG
+ search_state.backtrack_depth = 0;
+#endif
+ search_state.last_l = search_state.last_r = 0;
+ search_state.lparen[0] = startpos;
+ search_state.super = search_state.start_super;
+ search_state.c = search_state.nfa_choice;
+ search_state.test_pos.pos = search_state.outer_pos.pos - 1;
+ search_state.test_pos.string = search_state.outer_pos.string;
+ search_state.test_pos.end = search_state.outer_pos.end;
+ search_state.test_pos.offset = search_state.outer_pos.offset;
+ search_state.test_pos.size = search_state.outer_pos.size;
+ search_state.test_pos.search_direction = 1;
+ search_state.counter_stack = 0;
+ search_state.backtrack_stack = 0;
+ search_state.backtrack_frame_bytes =
+ (sizeof (struct rx_backtrack_frame)
+ + (rxb->match_regs_on_stack
+ ? sizeof (regoff_t) * (search_state.num_regs + 1) * 2
+ : 0));
+ search_state.chunk_bytes = search_state.backtrack_frame_bytes * 64;
+ search_state.test_ret = rx_test_line_finished;
+ search_state.could_have_continued = 0;
+ }
+ /* This is while (1)...except that the body of the loop is interrupted
+ * by some alternative entry points.
+ */
+ pseudo_while_1:
+ switch (test_pc)
+ {
+ case rx_test_cache_hit_loop:
+ goto resume_continuation_1;
+ case rx_test_backreference_check:
+ goto resume_continuation_2;
+ case rx_test_backtrack_return:
+ goto resume_continuation_3;
+ case rx_test_start:
+#ifdef RX_DEBUG
+ /* There is a search tree with every node as set of deterministic
+ * transitions in the super nfa. For every branch of a
+ * backtrack point is an edge in the tree.
+ * This counts up a pre-order of nodes in that tree.
+ * It's saved on the search stack and printed when debugging.
+ */
+ search_state.line_no = 0;
+ search_state.lines_found = 0;
+#endif
+
+ top_of_cycle:
+ /* A superstate is basicly a transition table, indexed by
+ * characters from the string being tested, and containing
+ * RX_INX (`instruction frame') structures.
+ */
+ search_state.ifr = &search_state.super->transitions [search_state.c];
+
+ recurse_test_match:
+ /* This is the point to which control is sent when the
+ * test matcher `recurses'. Before jumping here, some variables
+ * need to be saved on the stack and the next instruction frame
+ * has to be computed.
+ */
+
+ restart:
+ /* Some instructions don't advance the matcher, but just
+ * carry out some side effects and fetch a new instruction.
+ * To dispatch that new instruction, `goto restart'.
+ */
+
+ {
+ struct rx_inx * next_tr_table;
+ struct rx_inx * this_tr_table;
+
+ /* The fastest route through the loop is when the instruction
+ * is RX_NEXT_CHAR. This case is detected when SEARCH_STATE.IFR->DATA
+ * is non-zero. In that case, it points to the next
+ * superstate.
+ *
+ * This allows us to not bother fetching the bytecode.
+ */
+ next_tr_table = (struct rx_inx *)search_state.ifr->data;
+ this_tr_table = search_state.super->transitions;
+ while (next_tr_table)
+ {
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ {
+ struct rx_superset * setp;
+
+ fprintf (stderr, "%d %d>> re_next_char @ %d (%d)",
+ search_state.line_no,
+ search_state.backtrack_depth,
+ (search_state.test_pos.pos - search_state.test_pos.string
+ + search_state.test_pos.offset), search_state.c);
+
+ search_state.super =
+ ((struct rx_superstate *)
+ ((char *)this_tr_table
+ - ((unsigned long)
+ ((struct rx_superstate *)0)->transitions)));
+
+ setp = search_state.super->contents;
+ fprintf (stderr, " superstet (rx=%d, &=%x: ",
+ rxb->rx.rx_id, setp);
+ while (setp)
+ {
+ fprintf (stderr, "%d ", setp->id);
+ setp = setp->cdr;
+ }
+ fprintf (stderr, "\n");
+ }
+#endif
+ this_tr_table = next_tr_table;
+ ++search_state.test_pos.pos;
+ if (search_state.test_pos.pos == search_state.test_pos.end)
+ {
+ int burst_state;
+ try_burst_1:
+ burst_state = get_burst (&search_state.test_pos,
+ app_closure, stop);
+ switch (burst_state)
+ {
+ case rx_get_burst_continuation:
+ search_state.saved_this_tr_table = this_tr_table;
+ search_state.saved_next_tr_table = next_tr_table;
+ test_pc = rx_test_cache_hit_loop;
+ goto test_return_continuation;
+
+ resume_continuation_1:
+ /* Continuation one jumps here to do its work: */
+ search_state.saved_this_tr_table = this_tr_table;
+ search_state.saved_next_tr_table = next_tr_table;
+ goto try_burst_1;
+
+ case rx_get_burst_ok:
+ /* get_burst succeeded...keep going */
+ break;
+
+ case rx_get_burst_no_more:
+ search_state.test_ret = rx_test_line_finished;
+ search_state.could_have_continued = 1;
+ goto test_do_return;
+
+ case rx_get_burst_error:
+ /* An error... */
+ search_state.test_ret = rx_test_internal_error;
+ goto test_do_return;
+ }
+ }
+ search_state.c = *search_state.test_pos.pos;
+ search_state.ifr = this_tr_table + search_state.c;
+ next_tr_table = (struct rx_inx *)search_state.ifr->data;
+ } /* Fast loop through cached transition tables */
+
+ /* Here when we ran out of cached next-char transitions.
+ * So, it will be necessary to do a more expensive
+ * dispatch on the current instruction. The superstate
+ * pointer is allowed to become invalid during next-char
+ * transitions -- now we must bring it up to date.
+ */
+ search_state.super =
+ ((struct rx_superstate *)
+ ((char *)this_tr_table
+ - ((unsigned long)
+ ((struct rx_superstate *)0)->transitions)));
+ }
+
+ /* We've encountered an instruction other than next-char.
+ * Dispatch that instruction:
+ */
+ inx = (int)search_state.ifr->inx;
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ {
+ struct rx_superset * setp = search_state.super->contents;
+
+ fprintf (stderr, "%d %d>> %s @ %d (%d)", search_state.line_no,
+ search_state.backtrack_depth,
+ inx_names[inx],
+ (search_state.test_pos.pos - search_state.test_pos.string
+ + (test_pos.half == 0 ? 0 : size1)), search_state.c);
+
+ fprintf (stderr, " superstet (rx=%d, &=%x: ",
+ rxb->rx.rx_id, setp);
+ while (setp)
+ {
+ fprintf (stderr, "%d ", setp->id);
+ setp = setp->cdr;
+ }
+ fprintf (stderr, "\n");
+ }
+#endif
+ switch ((enum rx_opcode)inx)
+ {
+ case rx_do_side_effects:
+
+ /* RX_DO_SIDE_EFFECTS occurs when we cross epsilon
+ * edges associated with parentheses, backreferencing, etc.
+ */
+ {
+ struct rx_distinct_future * df =
+ (struct rx_distinct_future *)search_state.ifr->data_2;
+ struct rx_se_list * el = df->effects;
+ /* Side effects come in lists. This walks down
+ * a list, dispatching.
+ */
+ while (el)
+ {
+ long effect;
+ effect = (long)el->car;
+ if (effect < 0)
+ {
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ {
+ struct rx_superset * setp = search_state.super->contents;
+
+ fprintf (stderr, "....%d %d>> %s\n", search_state.line_no,
+ search_state.backtrack_depth,
+ efnames[-effect]);
+ }
+#endif
+ switch ((enum re_side_effects) effect)
+
+ {
+ case re_se_pushback:
+ search_state.ifr = &df->future_frame;
+ if (!search_state.ifr->data)
+ {
+ struct rx_superstate * sup;
+ sup = search_state.super;
+ rx_lock_superstate (rx, sup);
+ if (!rx_handle_cache_miss (&rxb->rx,
+ search_state.super,
+ search_state.c,
+ (search_state.ifr
+ ->data_2)))
+ {
+ rx_unlock_superstate (rx, sup);
+ search_state.test_ret = rx_test_internal_error;
+ goto test_do_return;
+ }
+ rx_unlock_superstate (rx, sup);
+ }
+ /* --search_state.test_pos.pos; */
+ search_state.c = 't';
+ search_state.super
+ = ((struct rx_superstate *)
+ ((char *)search_state.ifr->data
+ - (long)(((struct rx_superstate *)0)
+ ->transitions)));
+ goto top_of_cycle;
+ break;
+ case re_se_push0:
+ {
+ struct rx_counter_frame * old_cf
+ = (search_state.counter_stack
+ ? ((struct rx_counter_frame *)
+ search_state.counter_stack->sp)
+ : 0);
+ struct rx_counter_frame * cf;
+ PUSH (search_state.counter_stack,
+ sizeof (struct rx_counter_frame));
+ cf = ((struct rx_counter_frame *)
+ search_state.counter_stack->sp);
+ cf->tag = re_se_iter;
+ cf->val = 0;
+ cf->inherited_from = 0;
+ cf->cdr = old_cf;
+ break;
+ }
+ case re_se_fail:
+ goto test_do_return;
+ case re_se_begbuf:
+ if (!AT_STRINGS_BEG ())
+ goto test_do_return;
+ break;
+ case re_se_endbuf:
+ if (!AT_STRINGS_END ())
+ goto test_do_return;
+ break;
+ case re_se_wordbeg:
+ if ( LETTER_P (&search_state.test_pos, 1)
+ && ( AT_STRINGS_BEG()
+ || !LETTER_P (&search_state.test_pos, 0)))
+ break;
+ else
+ goto test_do_return;
+ case re_se_wordend:
+ if ( !AT_STRINGS_BEG ()
+ && LETTER_P (&search_state.test_pos, 0)
+ && (AT_STRINGS_END ()
+ || !LETTER_P (&search_state.test_pos, 1)))
+ break;
+ else
+ goto test_do_return;
+ case re_se_wordbound:
+ if (AT_WORD_BOUNDARY (&search_state.test_pos))
+ break;
+ else
+ goto test_do_return;
+ case re_se_notwordbound:
+ if (!AT_WORD_BOUNDARY (&search_state.test_pos))
+ break;
+ else
+ goto test_do_return;
+ case re_se_hat:
+ if (AT_STRINGS_BEG ())
+ {
+ if (rxb->not_bol)
+ goto test_do_return;
+ else
+ break;
+ }
+ else
+ {
+ char pos_c = *search_state.test_pos.pos;
+ if ( (SRCH_TRANSLATE (pos_c)
+ == SRCH_TRANSLATE('\n'))
+ && rxb->newline_anchor)
+ break;
+ else
+ goto test_do_return;
+ }
+ case re_se_dollar:
+ if (AT_STRINGS_END ())
+ {
+ if (rxb->not_eol)
+ goto test_do_return;
+ else
+ break;
+ }
+ else
+ {
+ if ( ( SRCH_TRANSLATE (fetch_char
+ (&search_state.test_pos, 1,
+ app_closure, stop))
+ == SRCH_TRANSLATE ('\n'))
+ && rxb->newline_anchor)
+ break;
+ else
+ goto test_do_return;
+ }
+
+ case re_se_try:
+ /* This is the first side effect in every
+ * expression.
+ *
+ * FOR NO GOOD REASON...get rid of it...
+ */
+ break;
+
+ case re_se_pushpos:
+ {
+ int urhere =
+ ((int)(search_state.test_pos.pos
+ - search_state.test_pos.string)
+ + search_state.test_pos.offset);
+ struct rx_counter_frame * old_cf
+ = (search_state.counter_stack
+ ? ((struct rx_counter_frame *)
+ search_state.counter_stack->sp)
+ : 0);
+ struct rx_counter_frame * cf;
+ PUSH(search_state.counter_stack,
+ sizeof (struct rx_counter_frame));
+ cf = ((struct rx_counter_frame *)
+ search_state.counter_stack->sp);
+ cf->tag = re_se_pushpos;
+ cf->val = urhere;
+ cf->inherited_from = 0;
+ cf->cdr = old_cf;
+ break;
+ }
+
+ case re_se_chkpos:
+ {
+ int urhere =
+ ((int)(search_state.test_pos.pos
+ - search_state.test_pos.string)
+ + search_state.test_pos.offset);
+ struct rx_counter_frame * cf
+ = ((struct rx_counter_frame *)
+ search_state.counter_stack->sp);
+ if (cf->val == urhere)
+ goto test_do_return;
+ cf->val = urhere;
+ break;
+ }
+ break;
+
+ case re_se_poppos:
+ POP(search_state.counter_stack,
+ sizeof (struct rx_counter_frame));
+ break;
+
+
+ case re_se_at_dot:
+ case re_se_syntax:
+ case re_se_not_syntax:
+#ifdef emacs
+ /*
+ * this release lacks emacs support
+ */
+#endif
+ break;
+ case re_se_win:
+ case re_se_lparen:
+ case re_se_rparen:
+ case re_se_backref:
+ case re_se_iter:
+ case re_se_end_iter:
+ case re_se_tv:
+ case re_floogle_flap:
+ search_state.ret_val = 0;
+ goto test_do_return;
+ }
+ }
+ else
+ {
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ fprintf (stderr, "....%d %d>> %s %d %d\n", search_state.line_no,
+ search_state.backtrack_depth,
+ efnames2[rxb->se_params [effect].se],
+ rxb->se_params [effect].op1,
+ rxb->se_params [effect].op2);
+#endif
+ switch (rxb->se_params [effect].se)
+ {
+ case re_se_win:
+ /* This side effect indicates that we've
+ * found a match, though not necessarily the
+ * best match. This is a fancy assignment to
+ * register 0 unless the caller didn't
+ * care about registers. In which case,
+ * this stops the match.
+ */
+ {
+ int urhere =
+ ((int)(search_state.test_pos.pos
+ - search_state.test_pos.string)
+ + search_state.test_pos.offset);
+
+ if ( (search_state.best_last_r < 0)
+ || (urhere + 1 > search_state.best_rparen[0]))
+ {
+ /* Record the best known and keep
+ * looking.
+ */
+ int x;
+ for (x = 0; x <= search_state.last_l; ++x)
+ search_state.best_lparen[x] = search_state.lparen[x];
+ search_state.best_last_l = search_state.last_l;
+ for (x = 0; x <= search_state.last_r; ++x)
+ search_state.best_rparen[x] = search_state.rparen[x];
+ search_state.best_rparen[0] = urhere + 1;
+ search_state.best_last_r = search_state.last_r;
+ }
+ /* If we're not reporting the match-length
+ * or other register info, we need look no
+ * further.
+ */
+ if (search_state.first_found)
+ {
+ search_state.test_ret = rx_test_found_first;
+ goto test_do_return;
+ }
+ }
+ break;
+ case re_se_lparen:
+ {
+ int urhere =
+ ((int)(search_state.test_pos.pos
+ - search_state.test_pos.string)
+ + search_state.test_pos.offset);
+
+ int reg = rxb->se_params [effect].op1;
+#if 0
+ if (reg > search_state.last_l)
+#endif
+ {
+ search_state.lparen[reg] = urhere + 1;
+ /* In addition to making this assignment,
+ * we now know that lower numbered regs
+ * that haven't already been assigned,
+ * won't be. We make sure they're
+ * filled with -1, so they can be
+ * recognized as unassigned.
+ */
+ if (search_state.last_l < reg)
+ while (++search_state.last_l < reg)
+ search_state.lparen[search_state.last_l] = -1;
+ }
+ break;
+ }
+
+ case re_se_rparen:
+ {
+ int urhere =
+ ((int)(search_state.test_pos.pos
+ - search_state.test_pos.string)
+ + search_state.test_pos.offset);
+ int reg = rxb->se_params [effect].op1;
+ search_state.rparen[reg] = urhere + 1;
+ if (search_state.last_r < reg)
+ {
+ while (++search_state.last_r < reg)
+ search_state.rparen[search_state.last_r]
+ = -1;
+ }
+ break;
+ }
+
+ case re_se_backref:
+ {
+ int reg = rxb->se_params [effect].op1;
+ if ( reg > search_state.last_r
+ || search_state.rparen[reg] < 0)
+ goto test_do_return;
+
+ {
+ int backref_status;
+ check_backreference:
+ backref_status
+ = back_check (&search_state.test_pos,
+ search_state.lparen[reg],
+ search_state.rparen[reg],
+ search_state.translate,
+ app_closure,
+ stop);
+ switch (backref_status)
+ {
+ case rx_back_check_continuation:
+ search_state.saved_reg = reg;
+ test_pc = rx_test_backreference_check;
+ goto test_return_continuation;
+ resume_continuation_2:
+ reg = search_state.saved_reg;
+ goto check_backreference;
+ case rx_back_check_fail:
+ /* Fail */
+ goto test_do_return;
+ case rx_back_check_pass:
+ /* pass --
+ * test_pos now advanced to last
+ * char matched by backref
+ */
+ break;
+ }
+ }
+ break;
+ }
+ case re_se_iter:
+ {
+ struct rx_counter_frame * csp
+ = ((struct rx_counter_frame *)
+ search_state.counter_stack->sp);
+ if (csp->val == rxb->se_params[effect].op2)
+ goto test_do_return;
+ else
+ ++csp->val;
+ break;
+ }
+ case re_se_end_iter:
+ {
+ struct rx_counter_frame * csp
+ = ((struct rx_counter_frame *)
+ search_state.counter_stack->sp);
+ if (csp->val < rxb->se_params[effect].op1)
+ goto test_do_return;
+ else
+ {
+ struct rx_counter_frame * source = csp;
+ while (source->inherited_from)
+ source = source->inherited_from;
+ if (!source || !source->cdr)
+ {
+ POP(search_state.counter_stack,
+ sizeof(struct rx_counter_frame));
+ }
+ else
+ {
+ source = source->cdr;
+ csp->val = source->val;
+ csp->tag = source->tag;
+ csp->cdr = 0;
+ csp->inherited_from = source;
+ }
+ }
+ break;
+ }
+ case re_se_tv:
+ /* is a noop */
+ break;
+ case re_se_try:
+ case re_se_pushback:
+ case re_se_push0:
+ case re_se_pushpos:
+ case re_se_chkpos:
+ case re_se_poppos:
+ case re_se_at_dot:
+ case re_se_syntax:
+ case re_se_not_syntax:
+ case re_se_begbuf:
+ case re_se_hat:
+ case re_se_wordbeg:
+ case re_se_wordbound:
+ case re_se_notwordbound:
+ case re_se_wordend:
+ case re_se_endbuf:
+ case re_se_dollar:
+ case re_se_fail:
+ case re_floogle_flap:
+ search_state.ret_val = 0;
+ goto test_do_return;
+ }
+ }
+ el = el->cdr;
+ }
+ /* Now the side effects are done,
+ * so get the next instruction.
+ * and move on.
+ */
+ search_state.ifr = &df->future_frame;
+ goto restart;
+ }
+
+ case rx_backtrack_point:
+ {
+ /* A backtrack point indicates that we've reached a
+ * non-determinism in the superstate NFA. This is a
+ * loop that exhaustively searches the possibilities.
+ *
+ * A backtracking strategy is used. We keep track of what
+ * registers are valid so we can erase side effects.
+ *
+ * First, make sure there is some stack space to hold
+ * our state.
+ */
+
+ struct rx_backtrack_frame * bf;
+
+ PUSH(search_state.backtrack_stack,
+ search_state.backtrack_frame_bytes);
+#ifdef RX_DEBUG_0
+ ++search_state.backtrack_depth;
+#endif
+
+ bf = ((struct rx_backtrack_frame *)
+ search_state.backtrack_stack->sp);
+ {
+ bf->stk_super = search_state.super;
+ /* We prevent the current superstate from being
+ * deleted from the superstate cache.
+ */
+ rx_lock_superstate (&rxb->rx, search_state.super);
+#ifdef RX_DEBUG_0
+ bf->stk_search_state.line_no = search_state.line_no;
+#endif
+ bf->stk_c = search_state.c;
+ bf->stk_test_pos = search_state.test_pos;
+ bf->stk_last_l = search_state.last_l;
+ bf->stk_last_r = search_state.last_r;
+ bf->df = ((struct rx_super_edge *)
+ search_state.ifr->data_2)->options;
+ bf->first_df = bf->df;
+ bf->counter_stack_sp = (search_state.counter_stack
+ ? search_state.counter_stack->sp
+ : 0);
+ bf->stk_test_ret = search_state.test_ret;
+ if (rxb->match_regs_on_stack)
+ {
+ int x;
+ regoff_t * stk =
+ (regoff_t *)((char *)bf + sizeof (*bf));
+ for (x = 0; x <= search_state.last_l; ++x)
+ stk[x] = search_state.lparen[x];
+ stk += x;
+ for (x = 0; x <= search_state.last_r; ++x)
+ stk[x] = search_state.rparen[x];
+ }
+ }
+
+ /* Here is a while loop whose body is mainly a function
+ * call and some code to handle a return from that
+ * function.
+ *
+ * From here on for the rest of `case backtrack_point' it
+ * is unsafe to assume that the search_state copies of
+ * variables saved on the backtracking stack are valid
+ * -- so read their values from the backtracking stack.
+ *
+ * This lets us use one generation fewer stack saves in
+ * the call-graph of a search.
+ */
+
+ while_non_det_options:
+#ifdef RX_DEBUG_0
+ ++search_state.lines_found;
+ if (rx_debug_trace)
+ fprintf (stderr, "@@@ %d calls %d @@@\n",
+ search_state.line_no, search_state.lines_found);
+
+ search_state.line_no = search_state.lines_found;
+#endif
+
+ if (bf->df->next_same_super_edge[0] == bf->first_df)
+ {
+ /* This is a tail-call optimization -- we don't recurse
+ * for the last of the possible futures.
+ */
+ search_state.ifr = (bf->df->effects
+ ? &bf->df->side_effects_frame
+ : &bf->df->future_frame);
+
+ rx_unlock_superstate (&rxb->rx, search_state.super);
+ POP(search_state.backtrack_stack,
+ search_state.backtrack_frame_bytes);
+#ifdef RX_DEBUG
+ --search_state.backtrack_depth;
+#endif
+ goto restart;
+ }
+ else
+ {
+ if (search_state.counter_stack)
+ {
+ struct rx_counter_frame * old_cf
+ = ((struct rx_counter_frame *)search_state.counter_stack->sp);
+ struct rx_counter_frame * cf;
+ PUSH(search_state.counter_stack, sizeof (struct rx_counter_frame));
+ cf = ((struct rx_counter_frame *)search_state.counter_stack->sp);
+ cf->tag = old_cf->tag;
+ cf->val = old_cf->val;
+ cf->inherited_from = old_cf;
+ cf->cdr = 0;
+ }
+ /* `Call' this test-match block */
+ search_state.ifr = (bf->df->effects
+ ? &bf->df->side_effects_frame
+ : &bf->df->future_frame);
+ goto recurse_test_match;
+ }
+
+ /* Returns in this block are accomplished by
+ * goto test_do_return. There are two cases.
+ * If there is some search-stack left,
+ * then it is a return from a `recursive' call.
+ * If there is no search-stack left, then
+ * we should return to the fastmap/search loop.
+ */
+
+ test_do_return:
+
+ if (!search_state.backtrack_stack)
+ {
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ fprintf (stderr, "!!! %d bails returning %d !!!\n",
+ search_state.line_no, search_state.test_ret);
+#endif
+
+ /* No more search-stack -- this test is done. */
+ if (search_state.test_ret != rx_test_internal_error)
+ goto return_from_test_match;
+ else
+ goto error_in_testing_match;
+ }
+
+ /* Returning from a recursive call to
+ * the test match block:
+ */
+
+ bf = ((struct rx_backtrack_frame *)
+ search_state.backtrack_stack->sp);
+#ifdef RX_DEBUG_0
+ if (rx_debug_trace)
+ fprintf (stderr, "+++ %d returns %d (to %d)+++\n",
+ search_state.line_no,
+ search_state.test_ret,
+ bf->stk_search_state.line_no);
+#endif
+
+ while (search_state.counter_stack
+ && (!bf->counter_stack_sp
+ || (bf->counter_stack_sp
+ != search_state.counter_stack->sp)))
+ {
+ POP(search_state.counter_stack,
+ sizeof (struct rx_counter_frame));
+ }
+
+ if (search_state.test_ret == rx_test_internal_error)
+ {
+ POP (search_state.backtrack_stack,
+ search_state.backtrack_frame_bytes);
+ goto test_do_return;
+ }
+
+ /* If a non-longest match was found and that is good
+ * enough, return immediately.
+ */
+ if ( (search_state.test_ret == rx_test_found_first)
+ && search_state.first_found)
+ {
+ rx_unlock_superstate (&rxb->rx, bf->stk_super);
+ POP (search_state.backtrack_stack,
+ search_state.backtrack_frame_bytes);
+ goto test_do_return;
+ }
+
+ search_state.test_ret = bf->stk_test_ret;
+ search_state.last_l = bf->stk_last_l;
+ search_state.last_r = bf->stk_last_r;
+ bf->df = bf->df->next_same_super_edge[0];
+ search_state.super = bf->stk_super;
+ search_state.c = bf->stk_c;
+#ifdef RX_DEBUG_0
+ search_state.line_no = bf->stk_search_state.line_no;
+#endif
+
+ if (rxb->match_regs_on_stack)
+ {
+ int x;
+ regoff_t * stk =
+ (regoff_t *)((char *)bf + sizeof (*bf));
+ for (x = 0; x <= search_state.last_l; ++x)
+ search_state.lparen[x] = stk[x];
+ stk += x;
+ for (x = 0; x <= search_state.last_r; ++x)
+ search_state.rparen[x] = stk[x];
+ }
+
+ if ((search_state.test_ret != rx_test_line_finished) &&
+ (search_state.test_ret != rx_test_internal_error))
+ {
+ int x;
+ try_burst_2:
+ x = get_burst (&bf->stk_test_pos, app_closure, stop);
+ switch (x)
+ {
+ case rx_get_burst_continuation:
+ search_state.saved_bf = bf;
+ test_pc = rx_test_backtrack_return;
+ goto test_return_continuation;
+ resume_continuation_3:
+ bf = search_state.saved_bf;
+ goto try_burst_2;
+ case rx_get_burst_no_more:
+ /* Since we've been here before, it is some kind of
+ * error that we can't return.
+ */
+ case rx_get_burst_error:
+ search_state.test_ret = rx_test_internal_error;
+ goto test_do_return;
+ case rx_get_burst_ok:
+ break;
+ }
+ }
+ search_state.test_pos = bf->stk_test_pos;
+ goto while_non_det_options;
+ }
+
+
+ case rx_cache_miss:
+ /* Because the superstate NFA is lazily constructed,
+ * and in fact may erode from underneath us, we sometimes
+ * have to construct the next instruction from the hard way.
+ * This invokes one step in the lazy-conversion.
+ */
+ search_state.ifr = rx_handle_cache_miss (&rxb->rx,
+ search_state.super,
+ search_state.c,
+ search_state.ifr->data_2);
+ if (!search_state.ifr)
+ {
+ search_state.test_ret = rx_test_internal_error;
+ goto test_do_return;
+ }
+ goto restart;
+
+ case rx_backtrack:
+ /* RX_BACKTRACK means that we've reached the empty
+ * superstate, indicating that match can't succeed
+ * from this point.
+ */
+ goto test_do_return;
+
+ case rx_next_char:
+ case rx_error_inx:
+ case rx_num_instructions:
+ search_state.ret_val = 0;
+ goto test_do_return;
+ }
+ goto pseudo_while_1;
+ }
+
+ /* Healthy exits from the test-match loop do a
+ * `goto return_from_test_match' On the other hand,
+ * we might end up here.
+ */
+ error_in_testing_match:
+ test_state = rx_test_error;
+ goto test_returns_to_search;
+
+ /***** fastmap/search loop body
+ * considering the results testing for a match
+ */
+
+ return_from_test_match:
+
+ if (search_state.best_last_l >= 0)
+ {
+ if (regs && (regs->start != search_state.best_lparen))
+ {
+ bcopy (search_state.best_lparen, regs->start,
+ regs->num_regs * sizeof (int));
+ bcopy (search_state.best_rparen, regs->end,
+ regs->num_regs * sizeof (int));
+ }
+ if (regs && !rxb->no_sub)
+ {
+ int q;
+ int bound = (regs->num_regs < search_state.num_regs
+ ? regs->num_regs
+ : search_state.num_regs);
+ regoff_t * s = regs->start;
+ regoff_t * e = regs->end;
+ for (q = search_state.best_last_l + 1; q < bound; ++q)
+ s[q] = e[q] = -1;
+ }
+ search_state.ret_val = search_state.best_lparen[0];
+ test_state = rx_test_ok;
+ goto test_returns_to_search;
+ }
+ else
+ {
+ test_state = rx_test_fail;
+ goto test_returns_to_search;
+ }
+
+ test_return_continuation:
+ search_state.test_match_resume_pt = test_pc;
+ test_state = rx_test_continuation;
+ goto test_returns_to_search;
+ }
+}
+
+
+
+#endif /* RX_WANT_RX_DEFS */
+
+
+
+#else /* RX_WANT_SE_DEFS */
+ /* Integers are used to represent side effects.
+ *
+ * Simple side effects are given negative integer names by these enums.
+ *
+ * Non-negative names are reserved for complex effects.
+ *
+ * Complex effects are those that take arguments. For example,
+ * a register assignment associated with a group is complex because
+ * it requires an argument to tell which group is being matched.
+ *
+ * The integer name of a complex effect is an index into rxb->se_params.
+ */
+
+ RX_DEF_SE(1, re_se_try, = -1) /* Epsilon from start state */
+
+ RX_DEF_SE(0, re_se_pushback, = re_se_try - 1)
+ RX_DEF_SE(0, re_se_push0, = re_se_pushback -1)
+ RX_DEF_SE(0, re_se_pushpos, = re_se_push0 - 1)
+ RX_DEF_SE(0, re_se_chkpos, = re_se_pushpos -1)
+ RX_DEF_SE(0, re_se_poppos, = re_se_chkpos - 1)
+
+ RX_DEF_SE(1, re_se_at_dot, = re_se_poppos - 1) /* Emacs only */
+ RX_DEF_SE(0, re_se_syntax, = re_se_at_dot - 1) /* Emacs only */
+ RX_DEF_SE(0, re_se_not_syntax, = re_se_syntax - 1) /* Emacs only */
+
+ RX_DEF_SE(1, re_se_begbuf, = re_se_not_syntax - 1) /* match beginning of buffer */
+ RX_DEF_SE(1, re_se_hat, = re_se_begbuf - 1) /* match beginning of line */
+
+ RX_DEF_SE(1, re_se_wordbeg, = re_se_hat - 1)
+ RX_DEF_SE(1, re_se_wordbound, = re_se_wordbeg - 1)
+ RX_DEF_SE(1, re_se_notwordbound, = re_se_wordbound - 1)
+
+ RX_DEF_SE(1, re_se_wordend, = re_se_notwordbound - 1)
+ RX_DEF_SE(1, re_se_endbuf, = re_se_wordend - 1)
+
+ /* This fails except at the end of a line.
+ * It deserves to go here since it is typicly one of the last steps
+ * in a match.
+ */
+ RX_DEF_SE(1, re_se_dollar, = re_se_endbuf - 1)
+
+ /* Simple effects: */
+ RX_DEF_SE(1, re_se_fail, = re_se_dollar - 1)
+
+ /* Complex effects. These are used in the 'se' field of
+ * a struct re_se_params. Indexes into the se array
+ * are stored as instructions on nfa edges.
+ */
+ RX_DEF_CPLX_SE(1, re_se_win, = 0)
+ RX_DEF_CPLX_SE(1, re_se_lparen, = re_se_win + 1)
+ RX_DEF_CPLX_SE(1, re_se_rparen, = re_se_lparen + 1)
+ RX_DEF_CPLX_SE(0, re_se_backref, = re_se_rparen + 1)
+ RX_DEF_CPLX_SE(0, re_se_iter, = re_se_backref + 1)
+ RX_DEF_CPLX_SE(0, re_se_end_iter, = re_se_iter + 1)
+ RX_DEF_CPLX_SE(0, re_se_tv, = re_se_end_iter + 1)
+
+#endif
+
+#if RX_WANT_SE_DEFS != 1
+__END_DECLS
+#endif
+
+#endif
diff --git a/include/regexp.h b/include/regexp.h
index 73d6bf412..174e10b75 100644
--- a/include/regexp.h
+++ b/include/regexp.h
@@ -1,21 +1,224 @@
/*
- * Definitions etc. for regexp(3) routines.
+ * regexp.h -- old-style regexp compile and step (emulated with POSIX regex)
+ * Copyright (C) 1993 Rick Sladkey <jrs@world.std.com>
*
- * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
- * not the System V one.
- */
-#define NSUBEXP 10
-typedef struct regexp {
- char *startp[NSUBEXP];
- char *endp[NSUBEXP];
- char regstart; /* Internal use only. */
- char reganch; /* Internal use only. */
- char *regmust; /* Internal use only. */
- int regmlen; /* Internal use only. */
- char program[1]; /* Unwarranted chumminess with compiler. */
-} regexp;
-
-extern regexp *regcomp();
-extern int regexec();
-extern void regsub();
-extern void regerror();
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ */
+
+/*
+ * Think really hard before you intentionally include this file.
+ * You should really be using the POSIX regex interface instead.
+ * This emulation file is intended solely for compiling old code.
+ *
+ * A program that uses this file must define six macros: INIT,
+ * GETC, PEEKC, UNGETC, RETURN, and ERROR. This interface is
+ * so arcane that VMS hackers point at it in ridicule.
+ */
+
+#ifndef _REGEXP_H
+#define _REGEXP_H
+
+#include <sys/types.h> /* regex.h needs size_t */
+#include <regex.h> /* POSIX.2 regexp routines */
+#include <stdlib.h> /* for malloc, realloc and free */
+
+/*
+ * These three advertised external variables record state information
+ * for compile and step. They are so gross, I'm choking as I write this.
+ */
+char *loc1; /* the beginning of a match */
+char *loc2; /* the end of a match */
+int circf; /* current pattern begins with '^' */
+
+/*
+ * These are the other variables mentioned in the regexp.h manpage.
+ * Since we don't emulate them (whatever they do), we want errors if
+ * they are referenced. Therefore they are commented out here.
+ */
+#if 0
+char *locs;
+int sed;
+int nbra;
+#endif
+
+/*
+ * We need to stuff a regex_t into an arbitrary buffer so align it.
+ * GCC make this easy. For the others we have to guess.
+ */
+#ifdef __GNUC__
+#define __REGEX_T_ALIGN (__alignof__(regex_t))
+#else /* !__GNUC__ */
+#define __REGEX_T_ALIGN 8
+#endif /* !__GNUC__ */
+
+#define __regex_t_align(p) \
+ ((regex_t *) ((((unsigned long) p) + __REGEX_T_ALIGN - 1) \
+ / __REGEX_T_ALIGN * __REGEX_T_ALIGN))
+
+/*
+ * We just slurp the whole pattern into a string and then compile
+ * it `normally'. With this implementation we never use the PEEKC
+ * macro. Please feel free to die laughing when we translate
+ * error symbols into hard-coded numbers.
+ */
+char *
+compile(char *instring, char *expbuf, char *endbuf, int eof)
+{
+ int __c;
+ int __len;
+ char *__buf;
+ int __buflen;
+ int __error;
+ regex_t *__preg;
+ INIT;
+
+ __buflen = 128;
+ __buf = malloc(__buflen);
+ if (!__buf) {
+ ERROR(50);
+ return 0;
+ }
+ __len = 0;
+ circf = 0;
+ for (;;) {
+ __c = GETC();
+ if (__c == eof)
+ break;
+ if (__c == '\0' || __c == '\n') {
+ UNGETC(__c);
+ break;
+ }
+ if (__len + 2 > __buflen) {
+ __buflen *= 2;
+ __buf = realloc(__buf, __buflen);
+ if (!__buf) {
+ ERROR(50);
+ return 0;
+ }
+ }
+ if (__len == 0 && !circf && __c == '^')
+ circf = 1;
+ else
+ __buf[__len++] = __c;
+ }
+ if (__len == 0 && !circf) {
+ free(__buf);
+ ERROR(41);
+ return 0;
+ }
+ __buf[__len] = '\0';
+ if (endbuf <= expbuf + sizeof(regex_t)) {
+ free(__buf);
+ ERROR(50);
+ return 0;
+ }
+ __preg = __regex_t_align(expbuf);
+ __preg->buffer = (char *) (__preg + 1);
+ __preg->allocated = endbuf - (char *) __preg->buffer;
+ __error = regcomp(__preg, __buf, REG_NEWLINE);
+ free(__buf);
+ switch (__error) {
+ case 0:
+ break;
+ case REG_BADRPT:
+ __error = 36; /* poor fit */
+ break;
+ case REG_BADBR:
+ __error = 16;
+ break;
+ case REG_EBRACE:
+ __error = 44; /* poor fit */
+ break;
+ case REG_EBRACK:
+ __error = 49;
+ break;
+ case REG_ERANGE:
+ __error = 36; /* poor fit */
+ break;
+ case REG_ECTYPE:
+ __error = 36; /* poor fit */
+ break;
+ case REG_EPAREN:
+ __error = 42;
+ break;
+ case REG_ESUBREG:
+ __error = 36; /* poor fit */
+ break;
+ case REG_EEND:
+ __error = 36; /* poor fit */
+ break;
+ case REG_EESCAPE:
+ __error = 36;
+ break;
+ case REG_BADPAT:
+ __error = 36; /* poor fit */
+ break;
+ case REG_ESIZE:
+ __error = 50;
+ break;
+ case REG_ESPACE:
+ __error = 50;
+ break;
+ default:
+ __error = 36; /* as good as any */
+ break;
+ }
+ if (__error) {
+ ERROR(__error);
+ return 0;
+ }
+#ifdef _RX_H
+ RETURN((__preg->buffer + __preg->rx.allocated - __preg->rx.reserved));
+#else
+ RETURN((__preg->buffer + __preg->used));
+#endif
+}
+
+/*
+ * Note how we carefully emulate the gross `circf' hack. Otherwise,
+ * this just looks like an ordinary matching call that records the
+ * starting and ending match positions.
+ */
+int
+step(char *string, char *expbuf)
+{
+ int __result;
+ regmatch_t __pmatch[1];
+
+ __result = regexec(__regex_t_align(expbuf), string, 1, __pmatch, 0);
+ if (circf && __pmatch[0].rm_so != 0)
+ __result = REG_NOMATCH;
+ if (__result == 0) {
+ loc1 = string + __pmatch[0].rm_so;
+ loc2 = string + __pmatch[0].rm_eo;
+ }
+ return __result == 0;
+}
+
+/*
+ * For advance we are only supposed to match at the beginning of the
+ * string. You have to read the man page really carefully to find this
+ * one. We'll match them kludge-for-kludge.
+ */
+int
+advance(char *string, char *expbuf)
+{
+ int __old_circf;
+ int __result;
+
+ __old_circf = circf;
+ circf = 1;
+ __result = step(string, expbuf);
+ circf = __old_circf;
+ return __result;
+}
+
+#endif /* _REGEXP_H */
diff --git a/include/stdlib.h b/include/stdlib.h
index daff04b07..51ce80ba2 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -61,10 +61,29 @@ extern int setenv __P ((__const char *__name, __const char *__value,
extern int system __P ((__const char *__command));
-extern int qsort __P ((char *base, int num, int size, int (*cmp)()));
-
extern char * gcvt __P ((float number, size_t ndigit, char * buf));
+#if defined __USE_BSD || defined __USE_XOPEN_EXTENDED
+/* Return the canonical absolute name of file NAME. The last file name
+ * component need not exist, and may be a symlink to a nonexistent file.
+ * If RESOLVED is null, the result is malloc'd; otherwise, if the canonical
+ * name is PATH_MAX chars or more, returns null with `errno' set to
+ * ENAMETOOLONG; if the name fits in fewer than PATH_MAX chars, returns the
+ * name in RESOLVED. */
+extern char *realpath __P ((__const char *__restrict __name,
+ char *__restrict __resolved));
+#endif
+
+
+/* Shorthand for type of comparison functions. */
+typedef int (*__compar_fn_t) __P ((__const __ptr_t, __const __ptr_t));
+typedef __compar_fn_t comparison_fn_t;
+/* Sort NMEMB elements of BASE, of SIZE bytes each,
+ using COMPAR to perform the comparisons. */
+extern void qsort __P ((__ptr_t __base, size_t __nmemb, size_t __size,
+ __compar_fn_t __compar));
+
+
#define atof(x) strtod((x),(char**)0)
#define atoi(x) (int)strtol((x),(char**)0,10)
#define atol(x) strtol((x),(char**)0,10)
@@ -84,4 +103,6 @@ typedef struct
long int rem; /* Remainder. */
} ldiv_t;
+
+
#endif /* __STDLIB_H */
diff --git a/include/sys/kd.h b/include/sys/kd.h
new file mode 100644
index 000000000..b37586abf
--- /dev/null
+++ b/include/sys/kd.h
@@ -0,0 +1,29 @@
+/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _SYS_KD_H
+#define _SYS_KD_H 1
+
+/* Make sure the <linux/types.h> header is not loaded. */
+#ifndef _LINUX_TYPES_H
+# define _LINUX_TYPES_H 1
+#endif
+
+#include <linux/kd.h>
+
+#endif /* sys/kd.h */
diff --git a/include/sys/klog.h b/include/sys/klog.h
new file mode 100644
index 000000000..62243e9ec
--- /dev/null
+++ b/include/sys/klog.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _SYS_KLOG_H
+
+#define _SYS_KLOG_H 1
+#include <features.h>
+
+__BEGIN_DECLS
+
+/* Control the kernel's logging facility. This corresponds exactly to
+ the kernel's syslog system call, but that name is easily confused
+ with the user-level syslog facility, which is something completely
+ different. */
+extern int klogctl __P ((int __type, char *__bufp, int __len));
+
+__END_DECLS
+
+#endif /* _SYS_KLOG_H */
diff --git a/include/sys/mtio.h b/include/sys/mtio.h
new file mode 100644
index 000000000..7498dfa78
--- /dev/null
+++ b/include/sys/mtio.h
@@ -0,0 +1,277 @@
+/* Structures and definitions for magnetic tape I/O control commands.
+ Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* Written by H. Bergman <hennus@cybercomm.nl>. */
+
+#ifndef _SYS_MTIO_H
+#define _SYS_MTIO_H 1
+
+/* Get necessary definitions from system and kernel headers. */
+#include <sys/types.h>
+#include <sys/ioctl.h>
+
+
+/* Structure for MTIOCTOP - magnetic tape operation command. */
+struct mtop
+ {
+ short int mt_op; /* Operations defined below. */
+ int mt_count; /* How many of them. */
+ };
+#define _IOT_mtop /* Hurd ioctl type field. */ \
+ _IOT (_IOTS (short), 1, _IOTS (int), 1, 0, 0)
+
+/* Magnetic Tape operations [Not all operations supported by all drivers]. */
+#define MTRESET 0 /* +reset drive in case of problems. */
+#define MTFSF 1 /* Forward space over FileMark,
+ * position at first record of next file. */
+#define MTBSF 2 /* Backward space FileMark (position before FM). */
+#define MTFSR 3 /* Forward space record. */
+#define MTBSR 4 /* Backward space record. */
+#define MTWEOF 5 /* Write an end-of-file record (mark). */
+#define MTREW 6 /* Rewind. */
+#define MTOFFL 7 /* Rewind and put the drive offline (eject?). */
+#define MTNOP 8 /* No op, set status only (read with MTIOCGET). */
+#define MTRETEN 9 /* Retension tape. */
+#define MTBSFM 10 /* +backward space FileMark, position at FM. */
+#define MTFSFM 11 /* +forward space FileMark, position at FM. */
+#define MTEOM 12 /* Goto end of recorded media (for appending files).
+ MTEOM positions after the last FM, ready for
+ appending another file. */
+#define MTERASE 13 /* Erase tape -- be careful! */
+
+#define MTRAS1 14 /* Run self test 1 (nondestructive). */
+#define MTRAS2 15 /* Run self test 2 (destructive). */
+#define MTRAS3 16 /* Reserved for self test 3. */
+
+#define MTSETBLK 20 /* Set block length (SCSI). */
+#define MTSETDENSITY 21 /* Set tape density (SCSI). */
+#define MTSEEK 22 /* Seek to block (Tandberg, etc.). */
+#define MTTELL 23 /* Tell block (Tandberg, etc.). */
+#define MTSETDRVBUFFER 24 /* Set the drive buffering according to SCSI-2.
+ Ordinary buffered operation with code 1. */
+#define MTFSS 25 /* Space forward over setmarks. */
+#define MTBSS 26 /* Space backward over setmarks. */
+#define MTWSM 27 /* Write setmarks. */
+
+#define MTLOCK 28 /* Lock the drive door. */
+#define MTUNLOCK 29 /* Unlock the drive door. */
+#define MTLOAD 30 /* Execute the SCSI load command. */
+#define MTUNLOAD 31 /* Execute the SCSI unload command. */
+#define MTCOMPRESSION 32/* Control compression with SCSI mode page 15. */
+#define MTSETPART 33 /* Change the active tape partition. */
+#define MTMKPART 34 /* Format the tape with one or two partitions. */
+
+/* structure for MTIOCGET - mag tape get status command */
+
+struct mtget
+ {
+ long int mt_type; /* Type of magtape device. */
+ long int mt_resid; /* Residual count: (not sure)
+ number of bytes ignored, or
+ number of files not skipped, or
+ number of records not skipped. */
+ /* The following registers are device dependent. */
+ long int mt_dsreg; /* Status register. */
+ long int mt_gstat; /* Generic (device independent) status. */
+ long int mt_erreg; /* Error register. */
+ /* The next two fields are not always used. */
+ __daddr_t mt_fileno; /* Number of current file on tape. */
+ __daddr_t mt_blkno; /* Current block number. */
+ };
+#define _IOT_mtget /* Hurd ioctl type field. */ \
+ _IOT (_IOTS (long), 7, 0, 0, 0, 0)
+
+
+/* Constants for mt_type. Not all of these are supported, and
+ these are not all of the ones that are supported. */
+#define MT_ISUNKNOWN 0x01
+#define MT_ISQIC02 0x02 /* Generic QIC-02 tape streamer. */
+#define MT_ISWT5150 0x03 /* Wangtek 5150EQ, QIC-150, QIC-02. */
+#define MT_ISARCHIVE_5945L2 0x04 /* Archive 5945L-2, QIC-24, QIC-02?. */
+#define MT_ISCMSJ500 0x05 /* CMS Jumbo 500 (QIC-02?). */
+#define MT_ISTDC3610 0x06 /* Tandberg 6310, QIC-24. */
+#define MT_ISARCHIVE_VP60I 0x07 /* Archive VP60i, QIC-02. */
+#define MT_ISARCHIVE_2150L 0x08 /* Archive Viper 2150L. */
+#define MT_ISARCHIVE_2060L 0x09 /* Archive Viper 2060L. */
+#define MT_ISARCHIVESC499 0x0A /* Archive SC-499 QIC-36 controller. */
+#define MT_ISQIC02_ALL_FEATURES 0x0F /* Generic QIC-02 with all features. */
+#define MT_ISWT5099EEN24 0x11 /* Wangtek 5099-een24, 60MB, QIC-24. */
+#define MT_ISTEAC_MT2ST 0x12 /* Teac MT-2ST 155mb drive,
+ Teac DC-1 card (Wangtek type). */
+#define MT_ISEVEREX_FT40A 0x32 /* Everex FT40A (QIC-40). */
+#define MT_ISDDS1 0x51 /* DDS device without partitions. */
+#define MT_ISDDS2 0x52 /* DDS device with partitions. */
+#define MT_ISSCSI1 0x71 /* Generic ANSI SCSI-1 tape unit. */
+#define MT_ISSCSI2 0x72 /* Generic ANSI SCSI-2 tape unit. */
+
+/* QIC-40/80/3010/3020 ftape supported drives.
+ 20bit vendor ID + 0x800000 (see vendors.h in ftape distribution). */
+#define MT_ISFTAPE_UNKNOWN 0x800000 /* obsolete */
+#define MT_ISFTAPE_FLAG 0x800000
+
+struct mt_tape_info
+ {
+ long int t_type; /* Device type id (mt_type). */
+ char *t_name; /* Descriptive name. */
+ };
+
+#define MT_TAPE_INFO \
+ { \
+ {MT_ISUNKNOWN, "Unknown type of tape device"}, \
+ {MT_ISQIC02, "Generic QIC-02 tape streamer"}, \
+ {MT_ISWT5150, "Wangtek 5150, QIC-150"}, \
+ {MT_ISARCHIVE_5945L2, "Archive 5945L-2"}, \
+ {MT_ISCMSJ500, "CMS Jumbo 500"}, \
+ {MT_ISTDC3610, "Tandberg TDC 3610, QIC-24"}, \
+ {MT_ISARCHIVE_VP60I, "Archive VP60i, QIC-02"}, \
+ {MT_ISARCHIVE_2150L, "Archive Viper 2150L"}, \
+ {MT_ISARCHIVE_2060L, "Archive Viper 2060L"}, \
+ {MT_ISARCHIVESC499, "Archive SC-499 QIC-36 controller"}, \
+ {MT_ISQIC02_ALL_FEATURES, "Generic QIC-02 tape, all features"}, \
+ {MT_ISWT5099EEN24, "Wangtek 5099-een24, 60MB"}, \
+ {MT_ISTEAC_MT2ST, "Teac MT-2ST 155mb data cassette drive"}, \
+ {MT_ISEVEREX_FT40A, "Everex FT40A, QIC-40"}, \
+ {MT_ISSCSI1, "Generic SCSI-1 tape"}, \
+ {MT_ISSCSI2, "Generic SCSI-2 tape"}, \
+ {0, NULL} \
+ }
+
+
+/* Structure for MTIOCPOS - mag tape get position command. */
+
+struct mtpos
+ {
+ long int mt_blkno; /* Current block number. */
+ };
+#define _IOT_mtpos /* Hurd ioctl type field. */ \
+ _IOT_SIMPLE (long)
+
+
+/* Structure for MTIOCGETCONFIG/MTIOCSETCONFIG primarily intended
+ as an interim solution for QIC-02 until DDI is fully implemented. */
+struct mtconfiginfo
+ {
+ long int mt_type; /* Drive type. */
+ long int ifc_type; /* Interface card type. */
+ unsigned short int irqnr; /* IRQ number to use. */
+ unsigned short int dmanr; /* DMA channel to use. */
+ unsigned short int port; /* IO port base address. */
+
+ unsigned long int debug; /* Debugging flags. */
+
+ unsigned have_dens:1;
+ unsigned have_bsf:1;
+ unsigned have_fsr:1;
+ unsigned have_bsr:1;
+ unsigned have_eod:1;
+ unsigned have_seek:1;
+ unsigned have_tell:1;
+ unsigned have_ras1:1;
+ unsigned have_ras2:1;
+ unsigned have_ras3:1;
+ unsigned have_qfa:1;
+
+ unsigned pad1:5;
+ char reserved[10];
+ };
+#define _IOT_mtconfiginfo /* Hurd ioctl type field. */ \
+ _IOT (_IOTS (long), 2, _IOTS (short), 3, _IOTS (long), 1) /* XXX wrong */
+
+
+/* Magnetic tape I/O control commands. */
+#define MTIOCTOP _IOW('m', 1, struct mtop) /* Do a mag tape op. */
+#define MTIOCGET _IOR('m', 2, struct mtget) /* Get tape status. */
+#define MTIOCPOS _IOR('m', 3, struct mtpos) /* Get tape position.*/
+
+/* The next two are used by the QIC-02 driver for runtime reconfiguration.
+ See tpqic02.h for struct mtconfiginfo. */
+#define MTIOCGETCONFIG _IOR('m', 4, struct mtconfiginfo) /* Get tape config.*/
+#define MTIOCSETCONFIG _IOW('m', 5, struct mtconfiginfo) /* Set tape config.*/
+
+/* Generic Mag Tape (device independent) status macros for examining
+ mt_gstat -- HP-UX compatible.
+ There is room for more generic status bits here, but I don't
+ know which of them are reserved. At least three or so should
+ be added to make this really useful. */
+#define GMT_EOF(x) ((x) & 0x80000000)
+#define GMT_BOT(x) ((x) & 0x40000000)
+#define GMT_EOT(x) ((x) & 0x20000000)
+#define GMT_SM(x) ((x) & 0x10000000) /* DDS setmark */
+#define GMT_EOD(x) ((x) & 0x08000000) /* DDS EOD */
+#define GMT_WR_PROT(x) ((x) & 0x04000000)
+/* #define GMT_ ? ((x) & 0x02000000) */
+#define GMT_ONLINE(x) ((x) & 0x01000000)
+#define GMT_D_6250(x) ((x) & 0x00800000)
+#define GMT_D_1600(x) ((x) & 0x00400000)
+#define GMT_D_800(x) ((x) & 0x00200000)
+/* #define GMT_ ? ((x) & 0x00100000) */
+/* #define GMT_ ? ((x) & 0x00080000) */
+#define GMT_DR_OPEN(x) ((x) & 0x00040000) /* Door open (no tape). */
+/* #define GMT_ ? ((x) & 0x00020000) */
+#define GMT_IM_REP_EN(x) ((x) & 0x00010000) /* Immediate report mode.*/
+/* 16 generic status bits unused. */
+
+
+/* SCSI-tape specific definitions. Bitfield shifts in the status */
+#define MT_ST_BLKSIZE_SHIFT 0
+#define MT_ST_BLKSIZE_MASK 0xffffff
+#define MT_ST_DENSITY_SHIFT 24
+#define MT_ST_DENSITY_MASK 0xff000000
+
+#define MT_ST_SOFTERR_SHIFT 0
+#define MT_ST_SOFTERR_MASK 0xffff
+
+/* Bitfields for the MTSETDRVBUFFER ioctl. */
+#define MT_ST_OPTIONS 0xf0000000
+#define MT_ST_BOOLEANS 0x10000000
+#define MT_ST_SETBOOLEANS 0x30000000
+#define MT_ST_CLEARBOOLEANS 0x40000000
+#define MT_ST_WRITE_THRESHOLD 0x20000000
+#define MT_ST_DEF_BLKSIZE 0x50000000
+#define MT_ST_DEF_OPTIONS 0x60000000
+
+#define MT_ST_BUFFER_WRITES 0x1
+#define MT_ST_ASYNC_WRITES 0x2
+#define MT_ST_READ_AHEAD 0x4
+#define MT_ST_DEBUGGING 0x8
+#define MT_ST_TWO_FM 0x10
+#define MT_ST_FAST_MTEOM 0x20
+#define MT_ST_AUTO_LOCK 0x40
+#define MT_ST_DEF_WRITES 0x80
+#define MT_ST_CAN_BSR 0x100
+#define MT_ST_NO_BLKLIMS 0x200
+#define MT_ST_CAN_PARTITIONS 0x400
+#define MT_ST_SCSI2LOGICAL 0x800
+
+/* The mode parameters to be controlled. Parameter chosen with bits 20-28. */
+#define MT_ST_CLEAR_DEFAULT 0xfffff
+#define MT_ST_DEF_DENSITY (MT_ST_DEF_OPTIONS | 0x100000)
+#define MT_ST_DEF_COMPRESSION (MT_ST_DEF_OPTIONS | 0x200000)
+#define MT_ST_DEF_DRVBUFFER (MT_ST_DEF_OPTIONS | 0x300000)
+
+/* The offset for the arguments for the special HP changer load command. */
+#define MT_ST_HPLOADER_OFFSET 10000
+
+
+/* Specify default tape device. */
+#ifndef DEFTAPE
+# define DEFTAPE "/dev/tape"
+#endif
+
+#endif /* mtio.h */
diff --git a/libc/misc/regex/Makefile b/libc/misc/regex/Makefile
new file mode 100644
index 000000000..c6c8d8e52
--- /dev/null
+++ b/libc/misc/regex/Makefile
@@ -0,0 +1,17 @@
+TOPDIR=../
+include $(TOPDIR)Rules.make
+
+LIBC=../libc.a
+
+OBJ=rx.o
+
+all: $(LIBC)
+
+$(LIBC): $(LIBC)($(OBJ))
+
+$(LIBC)(rx.o): rx.c
+ $(CC) $(CFLAGS) -DL_$* $< -c -o $*.o
+ $(AR) $(ARFLAGS) $@ $*.o
+
+clean:
+ rm -f libc.a *.o core mon.out timer.t.h dMakefile dtr try timer
diff --git a/libc/misc/regex/rx.c b/libc/misc/regex/rx.c
new file mode 100644
index 000000000..8e85782f2
--- /dev/null
+++ b/libc/misc/regex/rx.c
@@ -0,0 +1,7522 @@
+/* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the librx library.
+
+Librx is free software; you can redistribute it and/or modify it under
+the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+Librx is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with this software; see the file COPYING.LIB. If not,
+write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA
+02139, USA. */
+
+/* NOTE!!! AIX is so losing it requires this to be the first thing in the
+ * file.
+ * Do not put ANYTHING before it!
+ */
+#if !defined (__GNUC__) && defined (_AIX)
+ #pragma alloca
+#endif
+
+/* To make linux happy? */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#ifndef isgraph
+#define isgraph(c) (isprint (c) && !isspace (c))
+#endif
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+
+#include <sys/types.h>
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+#ifndef __GCC__
+#undef __inline__
+#define __inline__
+#endif
+
+/* Emacs already defines alloca, sometimes. */
+#ifndef alloca
+
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#ifndef _AIX /* Already did AIX, up at the top. */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+/* Memory management and stuff for emacs. */
+
+#define CHARBITS 8
+#define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
+
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ * use `alloca' instead of `malloc' for the backtracking stack.
+ *
+ * Emacs will die miserably if we don't do this.
+ */
+
+#ifdef REGEX_MALLOC
+#define REGEX_ALLOCATE malloc
+#else /* not REGEX_MALLOC */
+#define REGEX_ALLOCATE alloca
+#endif /* not REGEX_MALLOC */
+
+
+#ifdef RX_WANT_RX_DEFS
+#define RX_DECL extern
+#define RX_DEF_QUAL
+#else
+#define RX_WANT_RX_DEFS
+#define RX_DECL static
+#define RX_DEF_QUAL static
+#endif
+
+#include <regex.h>
+#undef RX_DECL
+#define RX_DECL RX_DEF_QUAL
+
+
+/*
+ * Prototypes.
+ */
+#ifdef __STDC__
+RX_DECL struct rx_hash_item
+ *rx_hash_find (struct rx_hash *, unsigned long,
+ void *, struct rx_hash_rules *);
+RX_DECL struct rx_hash_item
+ *rx_hash_find (struct rx_hash *, unsigned long,
+ void *, struct rx_hash_rules *);
+RX_DECL struct rx_hash_item
+ *rx_hash_store (struct rx_hash *, unsigned long,
+ void *, struct rx_hash_rules *);
+RX_DECL void rx_hash_free (struct rx_hash_item *,
+ struct rx_hash_rules *);
+RX_DECL void rx_free_hash_table (struct rx_hash *, rx_hash_freefn,
+ struct rx_hash_rules *);
+RX_DECL rx_Bitset
+ rx_cset (struct rx *);
+RX_DECL rx_Bitset
+ rx_copy_cset (struct rx *, rx_Bitset);
+RX_DECL void rx_free_cset (struct rx *, rx_Bitset);
+static struct rx_hash_item
+ *compiler_hash_item_alloc (struct rx_hash_rules *, void *);
+static struct rx_hash
+ *compiler_hash_alloc (struct rx_hash_rules *);
+static void compiler_free_hash (struct rx_hash *,
+ struct rx_hash_rules *);
+static void compiler_free_hash_item (struct rx_hash_item *,
+ struct rx_hash_rules *);
+RX_DECL struct rexp_node
+ *rexp_node (struct rx *, enum rexp_node_type);
+RX_DECL struct rexp_node
+ *rx_mk_r_cset (struct rx *, rx_Bitset);
+RX_DECL struct rexp_node
+ *rx_mk_r_concat (struct rx *, struct rexp_node *,
+ struct rexp_node *);
+RX_DECL struct rexp_node
+ *rx_mk_r_alternate (struct rx *, struct rexp_node *,
+ struct rexp_node *);
+RX_DECL struct rexp_node
+ *rx_mk_r_alternate (struct rx *, struct rexp_node *,
+ struct rexp_node *);
+RX_DECL struct rexp_node
+ *rx_mk_r_opt (struct rx *, struct rexp_node *);
+RX_DECL struct rexp_node
+ *rx_mk_r_star (struct rx *, struct rexp_node *);
+RX_DECL struct rexp_node
+ *rx_mk_r_2phase_star (struct rx *, struct rexp_node *,
+ struct rexp_node *);
+RX_DECL struct rexp_node
+ *rx_mk_r_side_effect (struct rx *, rx_side_effect);
+RX_DECL struct rexp_node
+ *rx_mk_r_data (struct rx *, void *);
+RX_DECL void rx_free_rexp (struct rx *, struct rexp_node *);
+RX_DECL struct rexp_node
+ *rx_copy_rexp (struct rx *, struct rexp_node *);
+RX_DECL struct rx_nfa_state
+ *rx_nfa_state (struct rx *);
+RX_DECL void rx_free_nfa_state (struct rx_nfa_state *);
+RX_DECL struct rx_nfa_state
+ *rx_id_to_nfa_state (struct rx *, int);
+RX_DECL struct rx_nfa_edge
+ *rx_nfa_edge (struct rx *, enum rx_nfa_etype,
+ struct rx_nfa_state *,
+ struct rx_nfa_state *);
+RX_DECL void rx_free_nfa_edge (struct rx_nfa_edge *);
+static struct rx_possible_future
+ *rx_possible_future (struct rx *, struct rx_se_list *);
+static void rx_free_possible_future (struct rx_possible_future *);
+RX_DECL void rx_free_nfa (struct rx *);
+RX_DECL int rx_build_nfa (struct rx *, struct rexp_node *,
+ struct rx_nfa_state **,
+ struct rx_nfa_state **);
+RX_DECL void rx_name_nfa_states (struct rx *);
+static int se_list_cmp (void *, void *);
+static int se_list_equal (void *, void *);
+static struct rx_se_list
+ *hash_cons_se_prog (struct rx *, struct rx_hash *,
+ void *, struct rx_se_list *);
+static struct rx_se_list
+ *hash_se_prog (struct rx *, struct rx_hash *,
+ struct rx_se_list *);
+static int nfa_set_cmp (void *, void *);
+static int nfa_set_equal (void *, void *);
+static struct rx_nfa_state_set
+ *nfa_set_cons (struct rx *, struct rx_hash *,
+ struct rx_nfa_state *,
+ struct rx_nfa_state_set *);
+static struct rx_nfa_state_set
+ *nfa_set_enjoin (struct rx *, struct rx_hash *,
+ struct rx_nfa_state *,
+ struct rx_nfa_state_set *);
+#endif
+
+#ifndef emacs
+
+#ifdef SYNTAX_TABLE
+extern char *re_syntax_table;
+#else /* not SYNTAX_TABLE */
+
+#ifndef RX_WANT_RX_DEFS
+RX_DECL char re_syntax_table[CHAR_SET_SIZE];
+#endif
+
+#ifdef __STDC__
+static void
+init_syntax_once (void)
+#else
+static void
+init_syntax_once ()
+#endif
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 'a'; c <= 'z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = 'A'; c <= 'Z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = '0'; c <= '9'; c++)
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+#endif /* not SYNTAX_TABLE */
+#endif /* not emacs */
+
+/* Compile with `-DRX_DEBUG' and use the following flags.
+ *
+ * Debugging flags:
+ * rx_debug - print information as a regexp is compiled
+ * rx_debug_trace - print information as a regexp is executed
+ */
+
+#ifdef RX_DEBUG
+
+int rx_debug_compile = 0;
+int rx_debug_trace = 0;
+static struct re_pattern_buffer * dbug_rxb = 0;
+
+
+/*
+ * More Prototypes
+ */
+#ifdef __STDC__
+typedef void (*side_effect_printer) (struct rx *, void *, FILE *);
+static void print_cset (struct rx *, rx_Bitset, FILE *);
+static void print_rexp (struct rx *, struct rexp_node *, int,
+ side_effect_printer, FILE *);
+static void print_nfa (struct rx *, struct rx_nfa_state *,
+ side_effect_printer, FILE *);
+static void re_seprint (struct rx *, void *, FILE *);
+void print_compiled_pattern (struct re_pattern_buffer *);
+void print_fastmap (char *);
+#else
+typedef void (*side_effect_printer) ();
+static void print_cset ();
+#endif
+
+#ifdef __STDC__
+static void
+print_rexp (struct rx *rx,
+ struct rexp_node *node, int depth,
+ side_effect_printer seprint, FILE * fp)
+#else
+static void
+print_rexp (rx, node, depth, seprint, fp)
+ struct rx *rx;
+ struct rexp_node *node;
+ int depth;
+ side_effect_printer seprint;
+ FILE * fp;
+#endif
+{
+ if (!node)
+ return;
+ else
+ {
+ switch (node->type)
+ {
+ case r_cset:
+ {
+ fprintf (fp, "%*s", depth, "");
+ print_cset (rx, node->params.cset, fp);
+ fputc ('\n', fp);
+ break;
+ }
+
+ case r_opt:
+ case r_star:
+ fprintf (fp, "%*s%s\n", depth, "",
+ node->type == r_opt ? "opt" : "star");
+ print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
+ break;
+
+ case r_2phase_star:
+ fprintf (fp, "%*s2phase star\n", depth, "");
+ print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
+ print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
+ break;
+
+
+ case r_alternate:
+ case r_concat:
+ fprintf (fp, "%*s%s\n", depth, "",
+ node->type == r_alternate ? "alt" : "concat");
+ print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
+ print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
+ break;
+ case r_side_effect:
+ fprintf (fp, "%*sSide effect: ", depth, "");
+ seprint (rx, node->params.side_effect, fp);
+ fputc ('\n', fp);
+ }
+ }
+}
+
+#ifdef __STDC__
+static void
+print_nfa (struct rx * rx,
+ struct rx_nfa_state * n,
+ side_effect_printer seprint, FILE * fp)
+#else
+static void
+print_nfa (rx, n, seprint, fp)
+ struct rx * rx;
+ struct rx_nfa_state * n;
+ side_effect_printer seprint;
+ FILE * fp;
+#endif
+{
+ while (n)
+ {
+ struct rx_nfa_edge *e = n->edges;
+ struct rx_possible_future *ec = n->futures;
+ fprintf (fp, "node %d %s\n", n->id,
+ n->is_final ? "final" : (n->is_start ? "start" : ""));
+ while (e)
+ {
+ fprintf (fp, " edge to %d, ", e->dest->id);
+ switch (e->type)
+ {
+ case ne_epsilon:
+ fprintf (fp, "epsilon\n");
+ break;
+ case ne_side_effect:
+ fprintf (fp, "side effect ");
+ seprint (rx, e->params.side_effect, fp);
+ fputc ('\n', fp);
+ break;
+ case ne_cset:
+ fprintf (fp, "cset ");
+ print_cset (rx, e->params.cset, fp);
+ fputc ('\n', fp);
+ break;
+ }
+ e = e->next;
+ }
+
+ while (ec)
+ {
+ int x;
+ struct rx_nfa_state_set * s;
+ struct rx_se_list * l;
+ fprintf (fp, " eclosure to {");
+ for (s = ec->destset; s; s = s->cdr)
+ fprintf (fp, "%d ", s->car->id);
+ fprintf (fp, "} (");
+ for (l = ec->effects; l; l = l->cdr)
+ {
+ seprint (rx, l->car, fp);
+ fputc (' ', fp);
+ }
+ fprintf (fp, ")\n");
+ ec = ec->next;
+ }
+ n = n->next;
+ }
+}
+
+static char * efnames [] =
+{
+ "bogon",
+ "re_se_try",
+ "re_se_pushback",
+ "re_se_push0",
+ "re_se_pushpos",
+ "re_se_chkpos",
+ "re_se_poppos",
+ "re_se_at_dot",
+ "re_se_syntax",
+ "re_se_not_syntax",
+ "re_se_begbuf",
+ "re_se_hat",
+ "re_se_wordbeg",
+ "re_se_wordbound",
+ "re_se_notwordbound",
+ "re_se_wordend",
+ "re_se_endbuf",
+ "re_se_dollar",
+ "re_se_fail",
+};
+
+static char * efnames2[] =
+{
+ "re_se_win",
+ "re_se_lparen",
+ "re_se_rparen",
+ "re_se_backref",
+ "re_se_iter",
+ "re_se_end_iter",
+ "re_se_tv"
+};
+
+static char * inx_names[] =
+{
+ "rx_backtrack_point",
+ "rx_do_side_effects",
+ "rx_cache_miss",
+ "rx_next_char",
+ "rx_backtrack",
+ "rx_error_inx",
+ "rx_num_instructions"
+};
+
+
+#ifdef __STDC__
+static void
+re_seprint (struct rx * rx, void * effect, FILE * fp)
+#else
+static void
+re_seprint (rx, effect, fp)
+ struct rx * rx;
+ void * effect;
+ FILE * fp;
+#endif
+{
+ if ((int)effect < 0)
+ fputs (efnames[-(int)effect], fp);
+ else if (dbug_rxb)
+ {
+ struct re_se_params * p = &dbug_rxb->se_params[(int)effect];
+ fprintf (fp, "%s(%d,%d)", efnames2[p->se], p->op1, p->op2);
+ }
+ else
+ fprintf (fp, "[complex op # %d]", (int)effect);
+}
+
+/* These are so the regex.c regression tests will compile. */
+void
+print_compiled_pattern (rxb)
+ struct re_pattern_buffer * rxb;
+{
+}
+
+void
+print_fastmap (fm)
+ char * fm;
+{
+}
+
+#endif /* RX_DEBUG */
+
+
+
+/* This page: Bitsets. Completely unintersting. */
+
+RX_DECL int rx_bitset_is_equal (int, rx_Bitset, rx_Bitset);
+RX_DECL int rx_bitset_is_subset (int, rx_Bitset, rx_Bitset);
+RX_DECL int rx_bitset_empty (int, rx_Bitset);
+RX_DECL void rx_bitset_null (int, rx_Bitset);
+RX_DECL void rx_bitset_complement (int, rx_Bitset);
+RX_DECL void rx_bitset_complement (int, rx_Bitset);
+RX_DECL void rx_bitset_assign (int, rx_Bitset, rx_Bitset);
+RX_DECL void rx_bitset_union (int, rx_Bitset, rx_Bitset);
+RX_DECL void rx_bitset_intersection (int, rx_Bitset, rx_Bitset);
+RX_DECL void rx_bitset_difference (int, rx_Bitset, rx_Bitset);
+RX_DECL void rx_bitset_revdifference (int, rx_Bitset, rx_Bitset);
+RX_DECL void rx_bitset_xor (int, rx_Bitset, rx_Bitset);
+RX_DECL unsigned long
+ rx_bitset_hash (int, rx_Bitset);
+
+#ifdef __STDC__
+RX_DECL int
+rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL int
+rx_bitset_is_equal (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ RX_subset s = b[0];
+ b[0] = ~a[0];
+
+ for (x = rx_bitset_numb_subsets(size) - 1; a[x] == b[x]; --x)
+ ;
+
+ b[0] = s;
+ return !x && s == a[0];
+}
+
+#ifdef __STDC__
+RX_DECL int
+rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL int
+rx_bitset_is_subset (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x = rx_bitset_numb_subsets(size) - 1;
+ while (x-- && (a[x] & b[x]) == a[x]);
+ return x == -1;
+}
+
+
+#ifdef __STDC__
+RX_DECL int
+rx_bitset_empty (int size, rx_Bitset set)
+#else
+RX_DECL int
+rx_bitset_empty (size, set)
+ int size;
+ rx_Bitset set;
+#endif
+{
+ int x;
+ RX_subset s = set[0];
+ set[0] = 1;
+ for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x)
+ ;
+ set[0] = s;
+ return !s;
+}
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_null (int size, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_null (size, b)
+ int size;
+ rx_Bitset b;
+#endif
+{
+ bzero (b, rx_sizeof_bitset(size));
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_universe (int size, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_universe (size, b)
+ int size;
+ rx_Bitset b;
+#endif
+{
+ int x = rx_bitset_numb_subsets (size);
+ while (x--)
+ *b++ = ~(RX_subset)0;
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_complement (int size, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_complement (size, b)
+ int size;
+ rx_Bitset b;
+#endif
+{
+ int x = rx_bitset_numb_subsets (size);
+ while (x--)
+ {
+ *b = ~*b;
+ ++b;
+ }
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_assign (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] = b[x];
+}
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_union (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_union (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] |= b[x];
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_intersection (int size,
+ rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_intersection (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] &= b[x];
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_difference (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] &= ~ b[x];
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_revdifference (int size,
+ rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_revdifference (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] = ~a[x] & b[x];
+}
+
+#ifdef __STDC__
+RX_DECL void
+rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b)
+#else
+RX_DECL void
+rx_bitset_xor (size, a, b)
+ int size;
+ rx_Bitset a;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
+ a[x] ^= b[x];
+}
+
+
+#ifdef __STDC__
+RX_DECL unsigned long
+rx_bitset_hash (int size, rx_Bitset b)
+#else
+RX_DECL unsigned long
+rx_bitset_hash (size, b)
+ int size;
+ rx_Bitset b;
+#endif
+{
+ int x;
+ unsigned long hash = (unsigned long)rx_bitset_hash;
+
+ for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
+ hash ^= rx_bitset_subset_val(b, x);
+
+ return hash;
+}
+
+RX_DECL RX_subset rx_subset_singletons [RX_subset_bits] =
+{
+ 0x1,
+ 0x2,
+ 0x4,
+ 0x8,
+ 0x10,
+ 0x20,
+ 0x40,
+ 0x80,
+ 0x100,
+ 0x200,
+ 0x400,
+ 0x800,
+ 0x1000,
+ 0x2000,
+ 0x4000,
+ 0x8000,
+ 0x10000,
+ 0x20000,
+ 0x40000,
+ 0x80000,
+ 0x100000,
+ 0x200000,
+ 0x400000,
+ 0x800000,
+ 0x1000000,
+ 0x2000000,
+ 0x4000000,
+ 0x8000000,
+ 0x10000000,
+ 0x20000000,
+ 0x40000000,
+ 0x80000000
+};
+
+#ifdef RX_DEBUG
+
+#ifdef __STDC__
+static void
+print_cset (struct rx *rx, rx_Bitset cset, FILE * fp)
+#else
+static void
+print_cset (rx, cset, fp)
+ struct rx *rx;
+ rx_Bitset cset;
+ FILE * fp;
+#endif
+{
+ int x;
+ fputc ('[', fp);
+ for (x = 0; x < rx->local_cset_size; ++x)
+ if (RX_bitset_member (cset, x))
+ {
+ if (isprint(x))
+ fputc (x, fp);
+ else
+ fprintf (fp, "\\0%o ", x);
+ }
+ fputc (']', fp);
+}
+
+#endif /* RX_DEBUG */
+
+
+
+static unsigned long rx_hash_masks[4] =
+{
+ 0x12488421,
+ 0x96699669,
+ 0xbe7dd7eb,
+ 0xffffffff
+};
+
+
+/* Hash tables */
+#ifdef __STDC__
+RX_DECL struct rx_hash_item *
+rx_hash_find (struct rx_hash * table,
+ unsigned long hash,
+ void * value,
+ struct rx_hash_rules * rules)
+#else
+RX_DECL struct rx_hash_item *
+rx_hash_find (table, hash, value, rules)
+ struct rx_hash * table;
+ unsigned long hash;
+ void * value;
+ struct rx_hash_rules * rules;
+#endif
+{
+ rx_hash_eq eq = rules->eq;
+ int maskc = 0;
+ long mask = rx_hash_masks [0];
+ int bucket = (hash & mask) % 13;
+
+ while (table->children [bucket])
+ {
+ table = table->children [bucket];
+ ++maskc;
+ mask = rx_hash_masks[maskc];
+ bucket = (hash & mask) % 13;
+ }
+
+ {
+ struct rx_hash_item * it = table->buckets[bucket];
+ while (it)
+ if (eq (it->data, value))
+ return it;
+ else
+ it = it->next_same_hash;
+ }
+
+ return 0;
+}
+
+#ifdef __STDC__
+RX_DECL struct rx_hash_item *
+rx_hash_store (struct rx_hash * table,
+ unsigned long hash,
+ void * value,
+ struct rx_hash_rules * rules)
+#else
+RX_DECL struct rx_hash_item *
+rx_hash_store (table, hash, value, rules)
+ struct rx_hash * table;
+ unsigned long hash;
+ void * value;
+ struct rx_hash_rules * rules;
+#endif
+{
+ rx_hash_eq eq = rules->eq;
+ int maskc = 0;
+ long mask = rx_hash_masks[0];
+ int bucket = (hash & mask) % 13;
+ int depth = 0;
+
+ while (table->children [bucket])
+ {
+ table = table->children [bucket];
+ ++maskc;
+ mask = rx_hash_masks[maskc];
+ bucket = (hash & mask) % 13;
+ ++depth;
+ }
+
+ {
+ struct rx_hash_item * it = table->buckets[bucket];
+ while (it)
+ if (eq (it->data, value))
+ return it;
+ else
+ it = it->next_same_hash;
+ }
+
+ {
+ if ( (depth < 3)
+ && (table->bucket_size [bucket] >= 4))
+ {
+ struct rx_hash * newtab = ((struct rx_hash *)
+ rules->hash_alloc (rules));
+ if (!newtab)
+ goto add_to_bucket;
+ bzero (newtab, sizeof (*newtab));
+ newtab->parent = table;
+ {
+ struct rx_hash_item * them = table->buckets[bucket];
+ unsigned long newmask = rx_hash_masks[maskc + 1];
+ while (them)
+ {
+ struct rx_hash_item * save = them->next_same_hash;
+ int new_buck = (them->hash & newmask) % 13;
+ them->next_same_hash = newtab->buckets[new_buck];
+ newtab->buckets[new_buck] = them;
+ them->table = newtab;
+ them = save;
+ ++newtab->bucket_size[new_buck];
+ ++newtab->refs;
+ }
+ table->refs = (table->refs - table->bucket_size[bucket] + 1);
+ table->bucket_size[bucket] = 0;
+ table->buckets[bucket] = 0;
+ table->children[bucket] = newtab;
+ table = newtab;
+ bucket = (hash & newmask) % 13;
+ }
+ }
+ }
+ add_to_bucket:
+ {
+ struct rx_hash_item * it = ((struct rx_hash_item *)
+ rules->hash_item_alloc (rules, value));
+ if (!it)
+ return 0;
+ it->hash = hash;
+ it->table = table;
+ /* DATA and BINDING are to be set in hash_item_alloc */
+ it->next_same_hash = table->buckets [bucket];
+ table->buckets[bucket] = it;
+ ++table->bucket_size [bucket];
+ ++table->refs;
+ return it;
+ }
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules)
+#else
+RX_DECL void
+rx_hash_free (it, rules)
+ struct rx_hash_item * it;
+ struct rx_hash_rules * rules;
+#endif
+{
+ if (it)
+ {
+ struct rx_hash * table = it->table;
+ unsigned long hash = it->hash;
+ int depth = (table->parent
+ ? (table->parent->parent
+ ? (table->parent->parent->parent
+ ? 3
+ : 2)
+ : 1)
+ : 0);
+ int bucket = (hash & rx_hash_masks [depth]) % 13;
+ struct rx_hash_item ** pos = &table->buckets [bucket];
+
+ while (*pos != it)
+ pos = &(*pos)->next_same_hash;
+ *pos = it->next_same_hash;
+ rules->free_hash_item (it, rules);
+ --table->bucket_size[bucket];
+ --table->refs;
+ while (!table->refs && depth)
+ {
+ struct rx_hash * save = table;
+ table = table->parent;
+ --depth;
+ bucket = (hash & rx_hash_masks [depth]) % 13;
+ --table->refs;
+ table->children[bucket] = 0;
+ rules->free_hash (save, rules);
+ }
+ }
+}
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn,
+ struct rx_hash_rules * rules)
+#else
+RX_DECL void
+rx_free_hash_table (tab, freefn, rules)
+ struct rx_hash * tab;
+ rx_hash_freefn freefn;
+ struct rx_hash_rules * rules;
+#endif
+{
+ int x;
+
+ for (x = 0; x < 13; ++x)
+ if (tab->children[x])
+ {
+ rx_free_hash_table (tab->children[x], freefn, rules);
+ rules->free_hash (tab->children[x], rules);
+ }
+ else
+ {
+ struct rx_hash_item * them = tab->buckets[x];
+ while (them)
+ {
+ struct rx_hash_item * that = them;
+ them = that->next_same_hash;
+ freefn (that);
+ rules->free_hash_item (that, rules);
+ }
+ }
+}
+
+
+
+/* Utilities for manipulating bitset represntations of characters sets. */
+
+#ifdef __STDC__
+RX_DECL rx_Bitset
+rx_cset (struct rx *rx)
+#else
+RX_DECL rx_Bitset
+rx_cset (rx)
+ struct rx *rx;
+#endif
+{
+ rx_Bitset b = (rx_Bitset) malloc (rx_sizeof_bitset (rx->local_cset_size));
+ if (b)
+ rx_bitset_null (rx->local_cset_size, b);
+ return b;
+}
+
+
+#ifdef __STDC__
+RX_DECL rx_Bitset
+rx_copy_cset (struct rx *rx, rx_Bitset a)
+#else
+RX_DECL rx_Bitset
+rx_copy_cset (rx, a)
+ struct rx *rx;
+ rx_Bitset a;
+#endif
+{
+ rx_Bitset cs = rx_cset (rx);
+
+ if (cs)
+ rx_bitset_union (rx->local_cset_size, cs, a);
+
+ return cs;
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_cset (struct rx * rx, rx_Bitset c)
+#else
+RX_DECL void
+rx_free_cset (rx, c)
+ struct rx * rx;
+ rx_Bitset c;
+#endif
+{
+ if (c)
+ free ((char *)c);
+}
+
+
+/* Hash table memory allocation policy for the regexp compiler */
+
+#ifdef __STDC__
+static struct rx_hash *
+compiler_hash_alloc (struct rx_hash_rules * rules)
+#else
+static struct rx_hash *
+compiler_hash_alloc (rules)
+ struct rx_hash_rules * rules;
+#endif
+{
+ return (struct rx_hash *)malloc (sizeof (struct rx_hash));
+}
+
+
+#ifdef __STDC__
+static struct rx_hash_item *
+compiler_hash_item_alloc (struct rx_hash_rules * rules, void * value)
+#else
+static struct rx_hash_item *
+compiler_hash_item_alloc (rules, value)
+ struct rx_hash_rules * rules;
+ void * value;
+#endif
+{
+ struct rx_hash_item * it;
+ it = (struct rx_hash_item *)malloc (sizeof (*it));
+ if (it)
+ {
+ it->data = value;
+ it->binding = 0;
+ }
+ return it;
+}
+
+#ifdef __STDC__
+static void
+compiler_free_hash (struct rx_hash * tab,
+ struct rx_hash_rules * rules)
+#else
+static void
+compiler_free_hash (tab, rules)
+ struct rx_hash * tab;
+ struct rx_hash_rules * rules;
+#endif
+{
+ free ((char *)tab);
+}
+
+
+#ifdef __STDC__
+static void
+compiler_free_hash_item (struct rx_hash_item * item,
+ struct rx_hash_rules * rules)
+#else
+static void
+compiler_free_hash_item (item, rules)
+ struct rx_hash_item * item;
+ struct rx_hash_rules * rules;
+#endif
+{
+ free ((char *)item);
+}
+
+
+/* This page: REXP_NODE (expression tree) structures. */
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rexp_node (struct rx *rx,
+ enum rexp_node_type type)
+#else
+RX_DECL struct rexp_node *
+rexp_node (rx, type)
+ struct rx *rx;
+ enum rexp_node_type type;
+#endif
+{
+ struct rexp_node *n;
+
+ n = (struct rexp_node *)malloc (sizeof (*n));
+ if (n)
+ {
+ bzero (n, sizeof (*n));
+ n->type = type;
+ }
+ return n;
+}
+
+
+/* free_rexp_node assumes that the bitset passed to rx_mk_r_cset
+ * can be freed using rx_free_cset.
+ */
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_cset (struct rx * rx,
+ rx_Bitset b)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_cset (rx, b)
+ struct rx * rx;
+ rx_Bitset b;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_cset);
+ if (n)
+ n->params.cset = b;
+ return n;
+}
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_concat (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_concat (rx, a, b)
+ struct rx * rx;
+ struct rexp_node * a;
+ struct rexp_node * b;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_concat);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = b;
+ }
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_alternate (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_alternate (rx, a, b)
+ struct rx * rx;
+ struct rexp_node * a;
+ struct rexp_node * b;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_alternate);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = b;
+ }
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_opt (struct rx * rx,
+ struct rexp_node * a)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_opt (rx, a)
+ struct rx * rx;
+ struct rexp_node * a;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_opt);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = 0;
+ }
+ return n;
+}
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_star (struct rx * rx,
+ struct rexp_node * a)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_star (rx, a)
+ struct rx * rx;
+ struct rexp_node * a;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_star);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = 0;
+ }
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_2phase_star (struct rx * rx,
+ struct rexp_node * a,
+ struct rexp_node * b)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_2phase_star (rx, a, b)
+ struct rx * rx;
+ struct rexp_node * a;
+ struct rexp_node * b;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_2phase_star);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = b;
+ }
+ return n;
+}
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_side_effect (struct rx * rx,
+ rx_side_effect a)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_side_effect (rx, a)
+ struct rx * rx;
+ rx_side_effect a;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_side_effect);
+ if (n)
+ {
+ n->params.side_effect = a;
+ n->params.pair.right = 0;
+ }
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_mk_r_data (struct rx * rx,
+ void * a)
+#else
+RX_DECL struct rexp_node *
+rx_mk_r_data (rx, a)
+ struct rx * rx;
+ void * a;
+#endif
+{
+ struct rexp_node * n = rexp_node (rx, r_data);
+ if (n)
+ {
+ n->params.pair.left = a;
+ n->params.pair.right = 0;
+ }
+ return n;
+}
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_rexp (struct rx * rx, struct rexp_node * node)
+#else
+RX_DECL void
+rx_free_rexp (rx, node)
+ struct rx * rx;
+ struct rexp_node * node;
+#endif
+{
+ if (node)
+ {
+ switch (node->type)
+ {
+ case r_cset:
+ if (node->params.cset)
+ rx_free_cset (rx, node->params.cset);
+
+ case r_side_effect:
+ break;
+
+ case r_concat:
+ case r_alternate:
+ case r_2phase_star:
+ case r_opt:
+ case r_star:
+ rx_free_rexp (rx, node->params.pair.left);
+ rx_free_rexp (rx, node->params.pair.right);
+ break;
+
+ case r_data:
+ /* This shouldn't occur. */
+ break;
+ }
+ free ((char *)node);
+ }
+}
+
+#ifdef __STDC__
+RX_DECL struct rexp_node *
+rx_copy_rexp (struct rx *rx,
+ struct rexp_node *node)
+#else
+RX_DECL struct rexp_node *
+rx_copy_rexp (rx, node)
+ struct rx *rx;
+ struct rexp_node *node;
+#endif
+{
+ if (!node)
+ return 0;
+ else
+ {
+ struct rexp_node *n = rexp_node (rx, node->type);
+ if (!n)
+ return 0;
+ switch (node->type)
+ {
+ case r_cset:
+ n->params.cset = rx_copy_cset (rx, node->params.cset);
+ if (!n->params.cset)
+ {
+ rx_free_rexp (rx, n);
+ return 0;
+ }
+ break;
+
+ case r_side_effect:
+ n->params.side_effect = node->params.side_effect;
+ break;
+
+ case r_concat:
+ case r_alternate:
+ case r_opt:
+ case r_2phase_star:
+ case r_star:
+ n->params.pair.left =
+ rx_copy_rexp (rx, node->params.pair.left);
+ n->params.pair.right =
+ rx_copy_rexp (rx, node->params.pair.right);
+ if ( (node->params.pair.left && !n->params.pair.left)
+ || (node->params.pair.right && !n->params.pair.right))
+ {
+ rx_free_rexp (rx, n);
+ return 0;
+ }
+ break;
+ case r_data:
+ /* shouldn't happen */
+ break;
+ }
+ return n;
+ }
+}
+
+
+
+/* This page: functions to build and destroy graphs that describe nfa's */
+
+/* Constructs a new nfa node. */
+#ifdef __STDC__
+RX_DECL struct rx_nfa_state *
+rx_nfa_state (struct rx *rx)
+#else
+RX_DECL struct rx_nfa_state *
+rx_nfa_state (rx)
+ struct rx *rx;
+#endif
+{
+ struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
+ if (!n)
+ return 0;
+ bzero (n, sizeof (*n));
+ n->next = rx->nfa_states;
+ rx->nfa_states = n;
+ return n;
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_nfa_state (struct rx_nfa_state * n)
+#else
+RX_DECL void
+rx_free_nfa_state (n)
+ struct rx_nfa_state * n;
+#endif
+{
+ free ((char *)n);
+}
+
+
+/* This looks up an nfa node, given a numeric id. Numeric id's are
+ * assigned after the nfa has been built.
+ */
+#ifdef __STDC__
+RX_DECL struct rx_nfa_state *
+rx_id_to_nfa_state (struct rx * rx,
+ int id)
+#else
+RX_DECL struct rx_nfa_state *
+rx_id_to_nfa_state (rx, id)
+ struct rx * rx;
+ int id;
+#endif
+{
+ struct rx_nfa_state * n;
+ for (n = rx->nfa_states; n; n = n->next)
+ if (n->id == id)
+ return n;
+ return 0;
+}
+
+
+/* This adds an edge between two nodes, but doesn't initialize the
+ * edge label.
+ */
+
+#ifdef __STDC__
+RX_DECL struct rx_nfa_edge *
+rx_nfa_edge (struct rx *rx,
+ enum rx_nfa_etype type,
+ struct rx_nfa_state *start,
+ struct rx_nfa_state *dest)
+#else
+RX_DECL struct rx_nfa_edge *
+rx_nfa_edge (rx, type, start, dest)
+ struct rx *rx;
+ enum rx_nfa_etype type;
+ struct rx_nfa_state *start;
+ struct rx_nfa_state *dest;
+#endif
+{
+ struct rx_nfa_edge *e;
+ e = (struct rx_nfa_edge *)malloc (sizeof (*e));
+ if (!e)
+ return 0;
+ e->next = start->edges;
+ start->edges = e;
+ e->type = type;
+ e->dest = dest;
+ return e;
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_nfa_edge (struct rx_nfa_edge * e)
+#else
+RX_DECL void
+rx_free_nfa_edge (e)
+ struct rx_nfa_edge * e;
+#endif
+{
+ free ((char *)e);
+}
+
+
+/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
+ * of an NFA. These are added to an nfa automaticly by eclose_nfa.
+ */
+
+#ifdef __STDC__
+static struct rx_possible_future *
+rx_possible_future (struct rx * rx,
+ struct rx_se_list * effects)
+#else
+static struct rx_possible_future *
+rx_possible_future (rx, effects)
+ struct rx * rx;
+ struct rx_se_list * effects;
+#endif
+{
+ struct rx_possible_future *ec;
+ ec = (struct rx_possible_future *) malloc (sizeof (*ec));
+ if (!ec)
+ return 0;
+ ec->destset = 0;
+ ec->next = 0;
+ ec->effects = effects;
+ return ec;
+}
+
+
+#ifdef __STDC__
+static void
+rx_free_possible_future (struct rx_possible_future * pf)
+#else
+static void
+rx_free_possible_future (pf)
+ struct rx_possible_future * pf;
+#endif
+{
+ free ((char *)pf);
+}
+
+
+#ifdef __STDC__
+RX_DECL void
+rx_free_nfa (struct rx *rx)
+#else
+RX_DECL void
+rx_free_nfa (rx)
+ struct rx *rx;
+#endif
+{
+ while (rx->nfa_states)
+ {
+ while (rx->nfa_states->edges)
+ {
+ switch (rx->nfa_states->edges->type)
+ {
+ case ne_cset:
+ rx_free_cset (rx, rx->nfa_states->edges->params.cset);
+ break;
+ default:
+ break;
+ }
+ {
+ struct rx_nfa_edge * e;
+ e = rx->nfa_states->edges;
+ rx->nfa_states->edges = rx->nfa_states->edges->next;
+ rx_free_nfa_edge (e);
+ }
+ } /* while (rx->nfa_states->edges) */
+ {
+ /* Iterate over the partial epsilon closures of rx->nfa_states */
+ struct rx_possible_future * pf = rx->nfa_states->futures;
+ while (pf)
+ {
+ struct rx_possible_future * pft = pf;
+ pf = pf->next;
+ rx_free_possible_future (pft);
+ }
+ }
+ {
+ struct rx_nfa_state *n;
+ n = rx->nfa_states;
+ rx->nfa_states = rx->nfa_states->next;
+ rx_free_nfa_state (n);
+ }
+ }
+}
+
+
+
+/* This page: translating a pattern expression into an nfa and doing the
+ * static part of the nfa->super-nfa translation.
+ */
+
+/* This is the thompson regexp->nfa algorithm.
+ * It is modified to allow for `side-effect epsilons.' Those are
+ * edges that are taken whenever a similar epsilon edge would be,
+ * but which imply that some side effect occurs when the edge
+ * is taken.
+ *
+ * Side effects are used to model parts of the pattern langauge
+ * that are not regular (in the formal sense).
+ */
+
+#ifdef __STDC__
+RX_DECL int
+rx_build_nfa (struct rx *rx,
+ struct rexp_node *rexp,
+ struct rx_nfa_state **start,
+ struct rx_nfa_state **end)
+#else
+RX_DECL int
+rx_build_nfa (rx, rexp, start, end)
+ struct rx *rx;
+ struct rexp_node *rexp;
+ struct rx_nfa_state **start;
+ struct rx_nfa_state **end;
+#endif
+{
+ struct rx_nfa_edge *edge;
+
+ /* Start & end nodes may have been allocated by the caller. */
+ *start = *start ? *start : rx_nfa_state (rx);
+
+ if (!*start)
+ return 0;
+
+ if (!rexp)
+ {
+ *end = *start;
+ return 1;
+ }
+
+ *end = *end ? *end : rx_nfa_state (rx);
+
+ if (!*end)
+ {
+ rx_free_nfa_state (*start);
+ return 0;
+ }
+
+ switch (rexp->type)
+ {
+ case r_data:
+ return 0;
+
+ case r_cset:
+ edge = rx_nfa_edge (rx, ne_cset, *start, *end);
+ if (!edge)
+ return 0;
+ edge->params.cset = rx_copy_cset (rx, rexp->params.cset);
+ if (!edge->params.cset)
+ {
+ rx_free_nfa_edge (edge);
+ return 0;
+ }
+ return 1;
+
+ case r_opt:
+ return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
+ && rx_nfa_edge (rx, ne_epsilon, *start, *end));
+
+ case r_star:
+ {
+ struct rx_nfa_state * star_start = 0;
+ struct rx_nfa_state * star_end = 0;
+ return (rx_build_nfa (rx, rexp->params.pair.left,
+ &star_start, &star_end)
+ && star_start
+ && star_end
+ && rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
+ && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
+ && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
+
+ && rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
+ }
+
+ case r_2phase_star:
+ {
+ struct rx_nfa_state * star_start = 0;
+ struct rx_nfa_state * star_end = 0;
+ struct rx_nfa_state * loop_exp_start = 0;
+ struct rx_nfa_state * loop_exp_end = 0;
+
+ return (rx_build_nfa (rx, rexp->params.pair.left,
+ &star_start, &star_end)
+ && rx_build_nfa (rx, rexp->params.pair.right,
+ &loop_exp_start, &loop_exp_end)
+ && star_start
+ && star_end
+ && loop_exp_end
+ && loop_exp_start
+ && rx_nfa_edge (rx, ne_epsilon, star_start, *end)
+ && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
+ && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
+
+ && rx_nfa_edge (rx, ne_epsilon, star_end, loop_exp_start)
+ && rx_nfa_edge (rx, ne_epsilon, loop_exp_end, star_start));
+ }
+
+
+ case r_concat:
+ {
+ struct rx_nfa_state *shared = 0;
+ return
+ (rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
+ && rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
+ }
+
+ case r_alternate:
+ {
+ struct rx_nfa_state *ls = 0;
+ struct rx_nfa_state *le = 0;
+ struct rx_nfa_state *rs = 0;
+ struct rx_nfa_state *re = 0;
+ return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
+ && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
+ && rx_nfa_edge (rx, ne_epsilon, *start, ls)
+ && rx_nfa_edge (rx, ne_epsilon, *start, rs)
+ && rx_nfa_edge (rx, ne_epsilon, le, *end)
+ && rx_nfa_edge (rx, ne_epsilon, re, *end));
+ }
+
+ case r_side_effect:
+ edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
+ if (!edge)
+ return 0;
+ edge->params.side_effect = rexp->params.side_effect;
+ return 1;
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+/* RX_NAME_NFA_STATES identifies all nodes with outgoing non-epsilon
+ * transitions. Only these nodes can occur in super-states.
+ * All nodes are given an integer id.
+ * The id is non-negative if the node has non-epsilon out-transitions, negative
+ * otherwise (this is because we want the non-negative ids to be used as
+ * array indexes in a few places).
+ */
+
+#ifdef __STDC__
+RX_DECL void
+rx_name_nfa_states (struct rx *rx)
+#else
+RX_DECL void
+rx_name_nfa_states (rx)
+ struct rx *rx;
+#endif
+{
+ struct rx_nfa_state *n = rx->nfa_states;
+
+ rx->nodec = 0;
+ rx->epsnodec = -1;
+
+ while (n)
+ {
+ struct rx_nfa_edge *e = n->edges;
+
+ if (n->is_start)
+ n->eclosure_needed = 1;
+
+ while (e)
+ {
+ switch (e->type)
+ {
+ case ne_epsilon:
+ case ne_side_effect:
+ break;
+
+ case ne_cset:
+ n->id = rx->nodec++;
+ {
+ struct rx_nfa_edge *from_n = n->edges;
+ while (from_n)
+ {
+ from_n->dest->eclosure_needed = 1;
+ from_n = from_n->next;
+ }
+ }
+ goto cont;
+ }
+ e = e->next;
+ }
+ n->id = rx->epsnodec--;
+ cont:
+ n = n->next;
+ }
+ rx->epsnodec = -rx->epsnodec;
+}
+
+
+/* This page: data structures for the static part of the nfa->supernfa
+ * translation.
+ *
+ * There are side effect lists -- lists of side effects occuring
+ * along an uninterrupted, acyclic path of side-effect epsilon edges.
+ * Such paths are collapsed to single edges in the course of computing
+ * epsilon closures. Such single edges are labled with a list of all
+ * the side effects entailed in crossing them. Like lists of side
+ * effects are made == by the constructors below.
+ *
+ * There are also nfa state sets. These are used to hold a list of all
+ * states reachable from a starting state for a given type of transition
+ * and side effect list. These are also hash-consed.
+ */
+
+/* The next several functions compare, construct, etc. lists of side
+ * effects. See ECLOSE_NFA (below) for details.
+ */
+
+/* Ordering of rx_se_list
+ * (-1, 0, 1 return value convention).
+ */
+
+#ifdef __STDC__
+static int
+se_list_cmp (void * va, void * vb)
+#else
+static int
+se_list_cmp (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ struct rx_se_list * a = (struct rx_se_list *)va;
+ struct rx_se_list * b = (struct rx_se_list *)vb;
+
+ return ((va == vb)
+ ? 0
+ : (!va
+ ? -1
+ : (!vb
+ ? 1
+ : ((long)a->car < (long)b->car
+ ? 1
+ : ((long)a->car > (long)b->car
+ ? -1
+ : se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
+}
+
+
+#ifdef __STDC__
+static int
+se_list_equal (void * va, void * vb)
+#else
+static int
+se_list_equal (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ return !(se_list_cmp (va, vb));
+}
+
+static struct rx_hash_rules se_list_hash_rules =
+{
+ se_list_equal,
+ compiler_hash_alloc,
+ compiler_free_hash,
+ compiler_hash_item_alloc,
+ compiler_free_hash_item
+};
+
+
+#ifdef __STDC__
+static struct rx_se_list *
+side_effect_cons (struct rx * rx,
+ void * se, struct rx_se_list * list)
+#else
+static struct rx_se_list *
+side_effect_cons (rx, se, list)
+ struct rx * rx;
+ void * se;
+ struct rx_se_list * list;
+#endif
+{
+ struct rx_se_list * l;
+ l = ((struct rx_se_list *) malloc (sizeof (*l)));
+ if (!l)
+ return 0;
+ l->car = se;
+ l->cdr = list;
+ return l;
+}
+
+
+#ifdef __STDC__
+static struct rx_se_list *
+hash_cons_se_prog (struct rx * rx,
+ struct rx_hash * memo,
+ void * car, struct rx_se_list * cdr)
+#else
+static struct rx_se_list *
+hash_cons_se_prog (rx, memo, car, cdr)
+ struct rx * rx;
+ struct rx_hash * memo;
+ void * car;
+ struct rx_se_list * cdr;
+#endif
+{
+ long hash = (long)car ^ (long)cdr;
+ struct rx_se_list template;
+
+ template.car = car;
+ template.cdr = cdr;
+ {
+ struct rx_hash_item * it = rx_hash_store (memo, hash,
+ (void *)&template,
+ &se_list_hash_rules);
+ if (!it)
+ return 0;
+ if (it->data == (void *)&template)
+ {
+ struct rx_se_list * consed;
+ consed = (struct rx_se_list *) malloc (sizeof (*consed));
+ if (! consed)
+ {
+ free ((char *)it);
+ return 0;
+ }
+ *consed = template;
+ it->data = (void *)consed;
+ }
+ return (struct rx_se_list *)it->data;
+ }
+}
+
+
+#ifdef __STDC__
+static struct rx_se_list *
+hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
+#else
+static struct rx_se_list *
+hash_se_prog (rx, memo, prog)
+ struct rx * rx;
+ struct rx_hash * memo;
+ struct rx_se_list * prog;
+#endif
+{
+ struct rx_se_list * answer = 0;
+ while (prog)
+ {
+ answer = hash_cons_se_prog (rx, memo, prog->car, answer);
+ if (!answer)
+ return 0;
+ prog = prog->cdr;
+ }
+ return answer;
+}
+
+#ifdef __STDC__
+static int
+nfa_set_cmp (void * va, void * vb)
+#else
+static int
+nfa_set_cmp (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
+ struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
+
+ return ((va == vb)
+ ? 0
+ : (!va
+ ? -1
+ : (!vb
+ ? 1
+ : (a->car->id < b->car->id
+ ? 1
+ : (a->car->id > b->car->id
+ ? -1
+ : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
+}
+
+#ifdef __STDC__
+static int
+nfa_set_equal (void * va, void * vb)
+#else
+static int
+nfa_set_equal (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ return !nfa_set_cmp (va, vb);
+}
+
+static struct rx_hash_rules nfa_set_hash_rules =
+{
+ nfa_set_equal,
+ compiler_hash_alloc,
+ compiler_free_hash,
+ compiler_hash_item_alloc,
+ compiler_free_hash_item
+};
+
+
+#ifdef __STDC__
+static struct rx_nfa_state_set *
+nfa_set_cons (struct rx * rx,
+ struct rx_hash * memo, struct rx_nfa_state * state,
+ struct rx_nfa_state_set * set)
+#else
+static struct rx_nfa_state_set *
+nfa_set_cons (rx, memo, state, set)
+ struct rx * rx;
+ struct rx_hash * memo;
+ struct rx_nfa_state * state;
+ struct rx_nfa_state_set * set;
+#endif
+{
+ struct rx_nfa_state_set template;
+ struct rx_hash_item * node;
+ template.car = state;
+ template.cdr = set;
+ node = rx_hash_store (memo,
+ (((long)state) >> 8) ^ (long)set,
+ &template, &nfa_set_hash_rules);
+ if (!node)
+ return 0;
+ if (node->data == &template)
+ {
+ struct rx_nfa_state_set * l;
+ l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
+ node->data = (void *) l;
+ if (!l)
+ return 0;
+ *l = template;
+ }
+ return (struct rx_nfa_state_set *)node->data;
+}
+
+#ifdef __STDC__
+static struct rx_nfa_state_set *
+nfa_set_enjoin (struct rx * rx,
+ struct rx_hash * memo, struct rx_nfa_state * state,
+ struct rx_nfa_state_set * set)
+#else
+static struct rx_nfa_state_set *
+nfa_set_enjoin (rx, memo, state, set)
+ struct rx * rx;
+ struct rx_hash * memo;
+ struct rx_nfa_state * state;
+ struct rx_nfa_state_set * set;
+#endif
+{
+ if (!set || state->id < set->car->id)
+ return nfa_set_cons (rx, memo, state, set);
+ if (state->id == set->car->id)
+ return set;
+ else
+ {
+ struct rx_nfa_state_set * newcdr
+ = nfa_set_enjoin (rx, memo, state, set->cdr);
+ if (newcdr != set->cdr)
+ set = nfa_set_cons (rx, memo, set->car, newcdr);
+ return set;
+ }
+}
+
+
+
+/* This page: computing epsilon closures. The closures aren't total.
+ * Each node's closures are partitioned according to the side effects entailed
+ * along the epsilon edges. Return true on success.
+ */
+
+struct eclose_frame
+{
+ struct rx_se_list *prog_backwards;
+};
+static int eclose_node (struct rx *, struct rx_nfa_state *,
+ struct rx_nfa_state *,
+ struct eclose_frame *);
+RX_DECL int rx_eclose_nfa (struct rx *);
+RX_DECL void rx_delete_epsilon_transitions
+ (struct rx *);
+static int nfacmp (void *, void *);
+static int count_hash_nodes (struct rx_hash *);
+static void nfa_set_freer (struct rx_hash_item *);
+RX_DECL int rx_compactify_nfa (struct rx *, void **,
+ unsigned long *);
+static char *rx_cache_malloc (struct rx_cache *, int);
+static void rx_cache_free (struct rx_cache *,
+ struct rx_freelist **, char *);
+static void install_transition (struct rx_superstate *,
+ struct rx_inx *, rx_Bitset);
+static int qlen (struct rx_superstate *);
+static void check_cache (struct rx_cache *);
+static void semifree_superstate (struct rx_cache *);
+static void refresh_semifree_superstate
+ (struct rx_cache *,
+ struct rx_superstate *);
+static void rx_refresh_this_superstate
+ (struct rx_cache *,
+ struct rx_superstate *);
+static void release_superset_low (struct rx_cache *,
+ struct rx_superset *);
+RX_DECL void rx_release_superset (struct rx *, struct rx_superset *);
+static int rx_really_free_superstate (struct rx_cache *);
+static char *rx_cache_get (struct rx_cache *,
+ struct rx_freelist **);
+static char *rx_cache_malloc_or_get (struct rx_cache *,
+ struct rx_freelist **, int);
+static char *rx_cache_get_superstate (struct rx_cache *);
+static int supersetcmp (void *, void *);
+static struct rx_hash_item
+ *superset_allocator (struct rx_hash_rules *, void *);
+static struct rx_hash
+ *super_hash_allocator (struct rx_hash_rules *);
+static void super_hash_liberator (struct rx_hash *,
+ struct rx_hash_rules *);
+static void superset_hash_item_liberator
+ (struct rx_hash_item *,
+ struct rx_hash_rules *);
+static int bytes_for_cache_size (int, int);
+static void rx_morecore (struct rx_cache *);
+RX_DECL struct rx_superset
+ *rx_superset_cons (struct rx *, struct rx_nfa_state *,
+ struct rx_superset *);
+RX_DECL struct rx_superset
+ *rx_superstate_eclosure_union
+ (struct rx *, struct rx_superset *,
+ struct rx_nfa_state_set *);
+static struct rx_distinct_future
+ *include_futures (struct rx *,
+ struct rx_distinct_future *,
+ struct rx_nfa_state *,
+ struct rx_superstate *);
+RX_DECL struct rx_superstate
+ *rx_superstate (struct rx *, struct rx_superset *);
+static int solve_destination (struct rx *,
+ struct rx_distinct_future *);
+static int compute_super_edge (struct rx *,
+ struct rx_distinct_future **,
+ rx_Bitset, struct rx_superstate *,
+ unsigned char);
+static struct rx_super_edge
+ *rx_super_edge (struct rx *, struct rx_superstate *,
+ rx_Bitset,
+ struct rx_distinct_future *);
+static void install_partial_transition
+ (struct rx_superstate *,
+ struct rx_inx *, RX_subset, int);
+RX_DECL struct rx_inx
+ *rx_handle_cache_miss (struct rx *, struct rx_superstate *,
+ unsigned char, void *);
+static boolean
+ at_begline_loc_p (__const__ char *, __const__ char *,
+ reg_syntax_t);
+static boolean
+ at_endline_loc_p (__const__ char *, __const__ char *,
+ int);
+static rx_Bitset
+ inverse_translation (struct re_pattern_buffer *, char *,
+ rx_Bitset, unsigned char *, int);
+
+
+#ifdef __STDC__
+static int
+eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
+ struct rx_nfa_state *node, struct eclose_frame *frame)
+#else
+static int
+eclose_node (rx, outnode, node, frame)
+ struct rx *rx;
+ struct rx_nfa_state *outnode;
+ struct rx_nfa_state *node;
+ struct eclose_frame *frame;
+#endif
+{
+ struct rx_nfa_edge *e = node->edges;
+
+ /* For each node, we follow all epsilon paths to build the closure.
+ * The closure omits nodes that have only epsilon edges.
+ * The closure is split into partial closures -- all the states in
+ * a partial closure are reached by crossing the same list of
+ * of side effects (though not necessarily the same path).
+ */
+ if (node->mark)
+ return 1;
+ node->mark = 1;
+
+ if (node->id >= 0 || node->is_final)
+ {
+ struct rx_possible_future **ec;
+ struct rx_se_list * prog_in_order
+ = ((struct rx_se_list *)hash_se_prog (rx,
+ &rx->se_list_memo,
+ frame->prog_backwards));
+ int cmp;
+
+ ec = &outnode->futures;
+
+ while (*ec)
+ {
+ cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
+ if (cmp <= 0)
+ break;
+ ec = &(*ec)->next;
+ }
+ if (!*ec || (cmp < 0))
+ {
+ struct rx_possible_future * saved = *ec;
+ *ec = rx_possible_future (rx, prog_in_order);
+ (*ec)->next = saved;
+ if (!*ec)
+ return 0;
+ }
+ if (node->id >= 0)
+ {
+ (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
+ node, (*ec)->destset);
+ if (!(*ec)->destset)
+ return 0;
+ }
+ }
+
+ while (e)
+ {
+ switch (e->type)
+ {
+ case ne_epsilon:
+ if (!eclose_node (rx, outnode, e->dest, frame))
+ return 0;
+ break;
+ case ne_side_effect:
+ {
+ frame->prog_backwards = side_effect_cons (rx,
+ e->params.side_effect,
+ frame->prog_backwards);
+ if (!frame->prog_backwards)
+ return 0;
+ if (!eclose_node (rx, outnode, e->dest, frame))
+ return 0;
+ {
+ struct rx_se_list * dying = frame->prog_backwards;
+ frame->prog_backwards = frame->prog_backwards->cdr;
+ free ((char *)dying);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ e = e->next;
+ }
+ node->mark = 0;
+ return 1;
+}
+
+#ifdef __STDC__
+RX_DECL int
+rx_eclose_nfa (struct rx *rx)
+#else
+RX_DECL int
+rx_eclose_nfa (rx)
+ struct rx *rx;
+#endif
+{
+ struct rx_nfa_state *n = rx->nfa_states;
+ struct eclose_frame frame;
+ static int rx_id = 0;
+
+ frame.prog_backwards = 0;
+ rx->rx_id = rx_id++;
+ bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
+ bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
+ while (n)
+ {
+ n->futures = 0;
+ if (n->eclosure_needed && !eclose_node (rx, n, n, &frame))
+ return 0;
+ /* clear_marks (rx); */
+ n = n->next;
+ }
+ return 1;
+}
+
+
+/* This deletes epsilon edges from an NFA. After running eclose_node,
+ * we have no more need for these edges. They are removed to simplify
+ * further operations on the NFA.
+ */
+
+#ifdef __STDC__
+RX_DECL void
+rx_delete_epsilon_transitions (struct rx *rx)
+#else
+RX_DECL void
+rx_delete_epsilon_transitions (rx)
+ struct rx *rx;
+#endif
+{
+ struct rx_nfa_state *n = rx->nfa_states;
+ struct rx_nfa_edge **e;
+
+ while (n)
+ {
+ e = &n->edges;
+ while (*e)
+ {
+ struct rx_nfa_edge *t;
+ switch ((*e)->type)
+ {
+ case ne_epsilon:
+ case ne_side_effect:
+ t = *e;
+ *e = t->next;
+ rx_free_nfa_edge (t);
+ break;
+
+ default:
+ e = &(*e)->next;
+ break;
+ }
+ }
+ n = n->next;
+ }
+}
+
+
+/* This page: storing the nfa in a contiguous region of memory for
+ * subsequent conversion to a super-nfa.
+ */
+
+/* This is for qsort on an array of nfa_states. The order
+ * is based on state ids and goes
+ * [0...MAX][MIN..-1] where (MAX>=0) and (MIN<0)
+ * This way, positive ids double as array indices.
+ */
+
+#ifdef __STDC__
+static int
+nfacmp (void * va, void * vb)
+#else
+static int
+nfacmp (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ struct rx_nfa_state **a = (struct rx_nfa_state **)va;
+ struct rx_nfa_state **b = (struct rx_nfa_state **)vb;
+ return (*a == *b /* &&&& 3.18 */
+ ? 0
+ : (((*a)->id < 0) == ((*b)->id < 0)
+ ? (((*a)->id < (*b)->id) ? -1 : 1)
+ : (((*a)->id < 0)
+ ? 1 : -1)));
+}
+
+#ifdef __STDC__
+static int
+count_hash_nodes (struct rx_hash * st)
+#else
+static int
+count_hash_nodes (st)
+ struct rx_hash * st;
+#endif
+{
+ int x;
+ int count = 0;
+ for (x = 0; x < 13; ++x)
+ count += ((st->children[x])
+ ? count_hash_nodes (st->children[x])
+ : st->bucket_size[x]);
+
+ return count;
+}
+
+
+#ifdef __STDC__
+static void
+se_memo_freer (struct rx_hash_item * node)
+#else
+static void
+se_memo_freer (node)
+ struct rx_hash_item * node;
+#endif
+{
+ free ((char *)node->data);
+}
+
+
+#ifdef __STDC__
+static void
+nfa_set_freer (struct rx_hash_item * node)
+#else
+static void
+nfa_set_freer (node)
+ struct rx_hash_item * node;
+#endif
+{
+ free ((char *)node->data);
+}
+
+
+/* This copies an entire NFA into a single malloced block of memory.
+ * Mostly this is for compatability with regex.c, though it is convenient
+ * to have the nfa nodes in an array.
+ */
+
+#ifdef __STDC__
+RX_DECL int
+rx_compactify_nfa (struct rx *rx,
+ void **mem, unsigned long *size)
+#else
+RX_DECL int
+rx_compactify_nfa (rx, mem, size)
+ struct rx *rx;
+ void **mem;
+ unsigned long *size;
+#endif
+{
+ int total_nodec;
+ struct rx_nfa_state *n;
+ int edgec = 0;
+ int eclosec = 0;
+ int se_list_consc = count_hash_nodes (&rx->se_list_memo);
+ int nfa_setc = count_hash_nodes (&rx->set_list_memo);
+ unsigned long total_size;
+
+ /* This takes place in two stages. First, the total size of the
+ * nfa is computed, then structures are copied.
+ */
+ n = rx->nfa_states;
+ total_nodec = 0;
+ while (n)
+ {
+ struct rx_nfa_edge *e = n->edges;
+ struct rx_possible_future *ec = n->futures;
+ ++total_nodec;
+ while (e)
+ {
+ ++edgec;
+ e = e->next;
+ }
+ while (ec)
+ {
+ ++eclosec;
+ ec = ec->next;
+ }
+ n = n->next;
+ }
+
+ total_size = (total_nodec * sizeof (struct rx_nfa_state)
+ + edgec * rx_sizeof_bitset (rx->local_cset_size)
+ + edgec * sizeof (struct rx_nfa_edge)
+ + nfa_setc * sizeof (struct rx_nfa_state_set)
+ + eclosec * sizeof (struct rx_possible_future)
+ + se_list_consc * sizeof (struct rx_se_list)
+ + rx->reserved);
+
+ if (total_size > *size)
+ {
+ *mem = remalloc (*mem, total_size);
+ if (*mem)
+ *size = total_size;
+ else
+ return 0;
+ }
+ /* Now we've allocated the memory; this copies the NFA. */
+ {
+ static struct rx_nfa_state **scratch = 0;
+ static int scratch_alloc = 0;
+ struct rx_nfa_state *state_base = (struct rx_nfa_state *) * mem;
+ struct rx_nfa_state *new_state = state_base;
+ struct rx_nfa_edge *new_edge =
+ (struct rx_nfa_edge *)
+ ((char *) state_base + total_nodec * sizeof (struct rx_nfa_state));
+ struct rx_se_list * new_se_list =
+ (struct rx_se_list *)
+ ((char *)new_edge + edgec * sizeof (struct rx_nfa_edge));
+ struct rx_possible_future *new_close =
+ ((struct rx_possible_future *)
+ ((char *) new_se_list
+ + se_list_consc * sizeof (struct rx_se_list)));
+ struct rx_nfa_state_set * new_nfa_set =
+ ((struct rx_nfa_state_set *)
+ ((char *)new_close + eclosec * sizeof (struct rx_possible_future)));
+ char *new_bitset =
+ ((char *) new_nfa_set + nfa_setc * sizeof (struct rx_nfa_state_set));
+ int x;
+ struct rx_nfa_state *n;
+
+ if (scratch_alloc < total_nodec)
+ {
+ scratch = ((struct rx_nfa_state **)
+ remalloc (scratch, total_nodec * sizeof (*scratch)));
+ if (scratch)
+ scratch_alloc = total_nodec;
+ else
+ {
+ scratch_alloc = 0;
+ return 0;
+ }
+ }
+
+ for (x = 0, n = rx->nfa_states; n; n = n->next)
+ scratch[x++] = n;
+
+ qsort (scratch, total_nodec, sizeof (struct rx_nfa_state *),
+ (__compar_fn_t)nfacmp);
+
+ for (x = 0; x < total_nodec; ++x)
+ {
+ struct rx_possible_future *eclose = scratch[x]->futures;
+ struct rx_nfa_edge *edge = scratch[x]->edges;
+ struct rx_nfa_state *cn = new_state++;
+ cn->futures = 0;
+ cn->edges = 0;
+ cn->next = (x == total_nodec - 1) ? 0 : (cn + 1);
+ cn->id = scratch[x]->id;
+ cn->is_final = scratch[x]->is_final;
+ cn->is_start = scratch[x]->is_start;
+ cn->mark = 0;
+ while (edge)
+ {
+ int indx = (edge->dest->id < 0
+ ? (total_nodec + edge->dest->id)
+ : edge->dest->id);
+ struct rx_nfa_edge *e = new_edge++;
+ rx_Bitset cset = (rx_Bitset) new_bitset;
+ new_bitset += rx_sizeof_bitset (rx->local_cset_size);
+ rx_bitset_null (rx->local_cset_size, cset);
+ rx_bitset_union (rx->local_cset_size, cset, edge->params.cset);
+ e->next = cn->edges;
+ cn->edges = e;
+ e->type = edge->type;
+ e->dest = state_base + indx;
+ e->params.cset = cset;
+ edge = edge->next;
+ }
+ while (eclose)
+ {
+ struct rx_possible_future *ec = new_close++;
+ struct rx_hash_item * sp;
+ struct rx_se_list ** sepos;
+ struct rx_se_list * sesrc;
+ struct rx_nfa_state_set * destlst;
+ struct rx_nfa_state_set ** destpos;
+ ec->next = cn->futures;
+ cn->futures = ec;
+ for (sepos = &ec->effects, sesrc = eclose->effects;
+ sesrc;
+ sesrc = sesrc->cdr, sepos = &(*sepos)->cdr)
+ {
+ sp = rx_hash_find (&rx->se_list_memo,
+ (long)sesrc->car ^ (long)sesrc->cdr,
+ sesrc, &se_list_hash_rules);
+ if (sp->binding)
+ {
+ sesrc = (struct rx_se_list *)sp->binding;
+ break;
+ }
+ *new_se_list = *sesrc;
+ sp->binding = (void *)new_se_list;
+ *sepos = new_se_list;
+ ++new_se_list;
+ }
+ *sepos = sesrc;
+ for (destpos = &ec->destset, destlst = eclose->destset;
+ destlst;
+ destpos = &(*destpos)->cdr, destlst = destlst->cdr)
+ {
+ sp = rx_hash_find (&rx->set_list_memo,
+ ((((long)destlst->car) >> 8)
+ ^ (long)destlst->cdr),
+ destlst, &nfa_set_hash_rules);
+ if (sp->binding)
+ {
+ destlst = (struct rx_nfa_state_set *)sp->binding;
+ break;
+ }
+ *new_nfa_set = *destlst;
+ new_nfa_set->car = state_base + destlst->car->id;
+ sp->binding = (void *)new_nfa_set;
+ *destpos = new_nfa_set;
+ ++new_nfa_set;
+ }
+ *destpos = destlst;
+ eclose = eclose->next;
+ }
+ }
+ }
+ rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
+ bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
+ rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
+ bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
+
+ rx_free_nfa (rx);
+ rx->nfa_states = (struct rx_nfa_state *)*mem;
+ return 1;
+}
+
+
+/* The functions in the next several pages define the lazy-NFA-conversion used
+ * by matchers. The input to this construction is an NFA such as
+ * is built by compactify_nfa (rx.c). The output is the superNFA.
+ */
+
+/* Match engines can use arbitrary values for opcodes. So, the parse tree
+ * is built using instructions names (enum rx_opcode), but the superstate
+ * nfa is populated with mystery opcodes (void *).
+ *
+ * For convenience, here is an id table. The opcodes are == to their inxs
+ *
+ * The lables in re_search_2 would make good values for instructions.
+ */
+
+void * rx_id_instruction_table[rx_num_instructions] =
+{
+ (void *) rx_backtrack_point,
+ (void *) rx_do_side_effects,
+ (void *) rx_cache_miss,
+ (void *) rx_next_char,
+ (void *) rx_backtrack,
+ (void *) rx_error_inx
+};
+
+
+
+/* Memory mgt. for superstate graphs. */
+
+#ifdef __STDC__
+static char *
+rx_cache_malloc (struct rx_cache * cache, int bytes)
+#else
+static char *
+rx_cache_malloc (cache, bytes)
+ struct rx_cache * cache;
+ int bytes;
+#endif
+{
+ while (cache->bytes_left < bytes)
+ {
+ if (cache->memory_pos)
+ cache->memory_pos = cache->memory_pos->next;
+ if (!cache->memory_pos)
+ {
+ cache->morecore (cache);
+ if (!cache->memory_pos)
+ return 0;
+ }
+ cache->bytes_left = cache->memory_pos->bytes;
+ cache->memory_addr = ((char *)cache->memory_pos
+ + sizeof (struct rx_blocklist));
+ }
+ cache->bytes_left -= bytes;
+ {
+ char * addr = cache->memory_addr;
+ cache->memory_addr += bytes;
+ return addr;
+ }
+}
+
+#ifdef __STDC__
+static void
+rx_cache_free (struct rx_cache * cache,
+ struct rx_freelist ** freelist, char * mem)
+#else
+static void
+rx_cache_free (cache, freelist, mem)
+ struct rx_cache * cache;
+ struct rx_freelist ** freelist;
+ char * mem;
+#endif
+{
+ struct rx_freelist * it = (struct rx_freelist *)mem;
+ it->next = *freelist;
+ *freelist = it;
+}
+
+/* The partially instantiated superstate graph has a transition
+ * table at every node. There is one entry for every character.
+ * This fills in the transition for a set.
+ */
+#ifdef __STDC__
+static void
+install_transition (struct rx_superstate *super,
+ struct rx_inx *answer, rx_Bitset trcset)
+#else
+static void
+install_transition (super, answer, trcset)
+ struct rx_superstate *super;
+ struct rx_inx *answer;
+ rx_Bitset trcset;
+#endif
+{
+ struct rx_inx * transitions = super->transitions;
+ int chr;
+ for (chr = 0; chr < 256; )
+ if (!*trcset)
+ {
+ ++trcset;
+ chr += 32;
+ }
+ else
+ {
+ RX_subset sub = *trcset;
+ RX_subset mask = 1;
+ int bound = chr + 32;
+ while (chr < bound)
+ {
+ if (sub & mask)
+ transitions [chr] = *answer;
+ ++chr;
+ mask <<= 1;
+ }
+ ++trcset;
+ }
+}
+
+#ifdef __STDC__
+static int
+qlen (struct rx_superstate * q)
+#else
+static int
+qlen (q)
+ struct rx_superstate * q;
+#endif
+{
+ int count = 1;
+ struct rx_superstate * it;
+ if (!q)
+ return 0;
+ for (it = q->next_recyclable; it != q; it = it->next_recyclable)
+ ++count;
+ return count;
+}
+
+#ifdef __STDC__
+static void
+check_cache (struct rx_cache * cache)
+#else
+static void
+check_cache (cache)
+ struct rx_cache * cache;
+#endif
+{
+ struct rx_cache * you_fucked_up = 0;
+ int total = cache->superstates;
+ int semi = cache->semifree_superstates;
+ if (semi != qlen (cache->semifree_superstate))
+ check_cache (you_fucked_up);
+ if ((total - semi) != qlen (cache->lru_superstate))
+ check_cache (you_fucked_up);
+}
+
+/* When a superstate is old and neglected, it can enter a
+ * semi-free state. A semi-free state is slated to die.
+ * Incoming transitions to a semi-free state are re-written
+ * to cause an (interpreted) fault when they are taken.
+ * The fault handler revives the semi-free state, patches
+ * incoming transitions back to normal, and continues.
+ *
+ * The idea is basicly to free in two stages, aborting
+ * between the two if the state turns out to be useful again.
+ * When a free is aborted, the rescued superstate is placed
+ * in the most-favored slot to maximize the time until it
+ * is next semi-freed.
+ */
+
+#ifdef __STDC__
+static void
+semifree_superstate (struct rx_cache * cache)
+#else
+static void
+semifree_superstate (cache)
+ struct rx_cache * cache;
+#endif
+{
+ int disqualified = cache->semifree_superstates;
+ if (disqualified == cache->superstates)
+ return;
+ while (cache->lru_superstate->locks)
+ {
+ cache->lru_superstate = cache->lru_superstate->next_recyclable;
+ ++disqualified;
+ if (disqualified == cache->superstates)
+ return;
+ }
+ {
+ struct rx_superstate * it = cache->lru_superstate;
+ it->next_recyclable->prev_recyclable = it->prev_recyclable;
+ it->prev_recyclable->next_recyclable = it->next_recyclable;
+ cache->lru_superstate = (it == it->next_recyclable
+ ? 0
+ : it->next_recyclable);
+ if (!cache->semifree_superstate)
+ {
+ cache->semifree_superstate = it;
+ it->next_recyclable = it;
+ it->prev_recyclable = it;
+ }
+ else
+ {
+ it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
+ it->next_recyclable = cache->semifree_superstate;
+ it->prev_recyclable->next_recyclable = it;
+ it->next_recyclable->prev_recyclable = it;
+ }
+ {
+ struct rx_distinct_future *df;
+ it->is_semifree = 1;
+ ++cache->semifree_superstates;
+ df = it->transition_refs;
+ if (df)
+ {
+ df->prev_same_dest->next_same_dest = 0;
+ for (df = it->transition_refs; df; df = df->next_same_dest)
+ {
+ df->future_frame.inx = cache->instruction_table[rx_cache_miss];
+ df->future_frame.data = 0;
+ df->future_frame.data_2 = (void *) df;
+ /* If there are any NEXT-CHAR instruction frames that
+ * refer to this state, we convert them to CACHE-MISS frames.
+ */
+ if (!df->effects
+ && (df->edge->options->next_same_super_edge[0]
+ == df->edge->options))
+ install_transition (df->present, &df->future_frame,
+ df->edge->cset);
+ }
+ df = it->transition_refs;
+ df->prev_same_dest->next_same_dest = df;
+ }
+ }
+ }
+}
+
+#ifdef __STDC__
+static void
+refresh_semifree_superstate (struct rx_cache * cache,
+ struct rx_superstate * super)
+#else
+static void
+refresh_semifree_superstate (cache, super)
+ struct rx_cache * cache;
+ struct rx_superstate * super;
+#endif
+{
+ struct rx_distinct_future *df;
+
+ if (super->transition_refs)
+ {
+ super->transition_refs->prev_same_dest->next_same_dest = 0;
+ for (df = super->transition_refs; df; df = df->next_same_dest)
+ {
+ df->future_frame.inx = cache->instruction_table[rx_next_char];
+ df->future_frame.data = (void *) super->transitions;
+ /* CACHE-MISS instruction frames that refer to this state,
+ * must be converted to NEXT-CHAR frames.
+ */
+ if (!df->effects
+ && (df->edge->options->next_same_super_edge[0]
+ == df->edge->options))
+ install_transition (df->present, &df->future_frame,
+ df->edge->cset);
+ }
+ super->transition_refs->prev_same_dest->next_same_dest
+ = super->transition_refs;
+ }
+ if (cache->semifree_superstate == super)
+ cache->semifree_superstate = (super->prev_recyclable == super
+ ? 0
+ : super->prev_recyclable);
+ super->next_recyclable->prev_recyclable = super->prev_recyclable;
+ super->prev_recyclable->next_recyclable = super->next_recyclable;
+
+ if (!cache->lru_superstate)
+ (cache->lru_superstate
+ = super->next_recyclable
+ = super->prev_recyclable
+ = super);
+ else
+ {
+ super->next_recyclable = cache->lru_superstate;
+ super->prev_recyclable = cache->lru_superstate->prev_recyclable;
+ super->next_recyclable->prev_recyclable = super;
+ super->prev_recyclable->next_recyclable = super;
+ }
+ super->is_semifree = 0;
+ --cache->semifree_superstates;
+}
+
+#ifdef __STDC__
+static void
+rx_refresh_this_superstate (struct rx_cache * cache, struct rx_superstate * superstate)
+#else
+static void
+rx_refresh_this_superstate (cache, superstate)
+ struct rx_cache * cache;
+ struct rx_superstate * superstate;
+#endif
+{
+ if (superstate->is_semifree)
+ refresh_semifree_superstate (cache, superstate);
+ else if (cache->lru_superstate == superstate)
+ cache->lru_superstate = superstate->next_recyclable;
+ else if (superstate != cache->lru_superstate->prev_recyclable)
+ {
+ superstate->next_recyclable->prev_recyclable
+ = superstate->prev_recyclable;
+ superstate->prev_recyclable->next_recyclable
+ = superstate->next_recyclable;
+ superstate->next_recyclable = cache->lru_superstate;
+ superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
+ superstate->next_recyclable->prev_recyclable = superstate;
+ superstate->prev_recyclable->next_recyclable = superstate;
+ }
+}
+
+#ifdef __STDC__
+static void
+release_superset_low (struct rx_cache * cache,
+ struct rx_superset *set)
+#else
+static void
+release_superset_low (cache, set)
+ struct rx_cache * cache;
+ struct rx_superset *set;
+#endif
+{
+ if (!--set->refs)
+ {
+ if (set->cdr)
+ release_superset_low (cache, set->cdr);
+
+ set->starts_for = 0;
+
+ rx_hash_free
+ (rx_hash_find
+ (&cache->superset_table,
+ (unsigned long)set->car ^ set->id ^ (unsigned long)set->cdr,
+ (void *)set,
+ &cache->superset_hash_rules),
+ &cache->superset_hash_rules);
+ rx_cache_free (cache, &cache->free_supersets, (char *)set);
+ }
+}
+
+#ifdef __STDC__
+RX_DECL void
+rx_release_superset (struct rx *rx,
+ struct rx_superset *set)
+#else
+RX_DECL void
+rx_release_superset (rx, set)
+ struct rx *rx;
+ struct rx_superset *set;
+#endif
+{
+ release_superset_low (rx->cache, set);
+}
+
+/* This tries to add a new superstate to the superstate freelist.
+ * It might, as a result, free some edge pieces or hash tables.
+ * If nothing can be freed because too many locks are being held, fail.
+ */
+
+#ifdef __STDC__
+static int
+rx_really_free_superstate (struct rx_cache * cache)
+#else
+static int
+rx_really_free_superstate (cache)
+ struct rx_cache * cache;
+#endif
+{
+ int locked_superstates = 0;
+ struct rx_superstate * it;
+
+ if (!cache->superstates)
+ return 0;
+
+ {
+ /* This is a total guess. The idea is that we should expect as
+ * many misses as we've recently experienced. I.e., cache->misses
+ * should be the same as cache->semifree_superstates.
+ */
+ while ((cache->hits + cache->misses) > cache->superstates_allowed)
+ {
+ cache->hits >>= 1;
+ cache->misses >>= 1;
+ }
+ if ( ((cache->hits + cache->misses) * cache->semifree_superstates)
+ < (cache->superstates * cache->misses))
+ {
+ semifree_superstate (cache);
+ semifree_superstate (cache);
+ }
+ }
+
+ while (cache->semifree_superstate && cache->semifree_superstate->locks)
+ {
+ refresh_semifree_superstate (cache, cache->semifree_superstate);
+ ++locked_superstates;
+ if (locked_superstates == cache->superstates)
+ return 0;
+ }
+
+ if (cache->semifree_superstate)
+ {
+ it = cache->semifree_superstate;
+ it->next_recyclable->prev_recyclable = it->prev_recyclable;
+ it->prev_recyclable->next_recyclable = it->next_recyclable;
+ cache->semifree_superstate = ((it == it->next_recyclable)
+ ? 0
+ : it->next_recyclable);
+ --cache->semifree_superstates;
+ }
+ else
+ {
+ while (cache->lru_superstate->locks)
+ {
+ cache->lru_superstate = cache->lru_superstate->next_recyclable;
+ ++locked_superstates;
+ if (locked_superstates == cache->superstates)
+ return 0;
+ }
+ it = cache->lru_superstate;
+ it->next_recyclable->prev_recyclable = it->prev_recyclable;
+ it->prev_recyclable->next_recyclable = it->next_recyclable;
+ cache->lru_superstate = ((it == it->next_recyclable)
+ ? 0
+ : it->next_recyclable);
+ }
+
+ if (it->transition_refs)
+ {
+ struct rx_distinct_future *df;
+ for (df = it->transition_refs,
+ df->prev_same_dest->next_same_dest = 0;
+ df;
+ df = df->next_same_dest)
+ {
+ df->future_frame.inx = cache->instruction_table[rx_cache_miss];
+ df->future_frame.data = 0;
+ df->future_frame.data_2 = (void *) df;
+ df->future = 0;
+ }
+ it->transition_refs->prev_same_dest->next_same_dest =
+ it->transition_refs;
+ }
+ {
+ struct rx_super_edge *tc = it->edges;
+ while (tc)
+ {
+ struct rx_distinct_future * df;
+ struct rx_super_edge *tct = tc->next;
+ df = tc->options;
+ df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
+ while (df)
+ {
+ struct rx_distinct_future *dft = df;
+ df = df->next_same_super_edge[0];
+
+
+ if (dft->future && dft->future->transition_refs == dft)
+ {
+ dft->future->transition_refs = dft->next_same_dest;
+ if (dft->future->transition_refs == dft)
+ dft->future->transition_refs = 0;
+ }
+ dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
+ dft->prev_same_dest->next_same_dest = dft->next_same_dest;
+ rx_cache_free (cache, &cache->free_discernable_futures,
+ (char *)dft);
+ }
+ rx_cache_free (cache, &cache->free_transition_classes, (char *)tc);
+ tc = tct;
+ }
+ }
+
+ if (it->contents->superstate == it)
+ it->contents->superstate = 0;
+ release_superset_low (cache, it->contents);
+ rx_cache_free (cache, &cache->free_superstates, (char *)it);
+ --cache->superstates;
+ return 1;
+}
+
+#ifdef __STDC__
+static char *
+rx_cache_get (struct rx_cache * cache,
+ struct rx_freelist ** freelist)
+#else
+static char *
+rx_cache_get (cache, freelist)
+ struct rx_cache * cache;
+ struct rx_freelist ** freelist;
+#endif
+{
+ while (!*freelist && rx_really_free_superstate (cache))
+ ;
+ if (!*freelist)
+ return 0;
+ {
+ struct rx_freelist * it = *freelist;
+ *freelist = it->next;
+ return (char *)it;
+ }
+}
+
+#ifdef __STDC__
+static char *
+rx_cache_malloc_or_get (struct rx_cache * cache,
+ struct rx_freelist ** freelist, int bytes)
+#else
+static char *
+rx_cache_malloc_or_get (cache, freelist, bytes)
+ struct rx_cache * cache;
+ struct rx_freelist ** freelist;
+ int bytes;
+#endif
+{
+ if (!*freelist)
+ {
+ char * answer = rx_cache_malloc (cache, bytes);
+ if (answer)
+ return answer;
+ }
+
+ return rx_cache_get (cache, freelist);
+}
+
+#ifdef __STDC__
+static char *
+rx_cache_get_superstate (struct rx_cache * cache)
+#else
+static char *
+rx_cache_get_superstate (cache)
+ struct rx_cache * cache;
+#endif
+{
+ char * answer;
+ int bytes = ( sizeof (struct rx_superstate)
+ + cache->local_cset_size * sizeof (struct rx_inx));
+ if (!cache->free_superstates
+ && (cache->superstates < cache->superstates_allowed))
+ {
+ answer = rx_cache_malloc (cache, bytes);
+ if (answer)
+ {
+ ++cache->superstates;
+ return answer;
+ }
+ }
+ answer = rx_cache_get (cache, &cache->free_superstates);
+ if (!answer)
+ {
+ answer = rx_cache_malloc (cache, bytes);
+ if (answer)
+ ++cache->superstates_allowed;
+ }
+ ++cache->superstates;
+ return answer;
+}
+
+
+
+#ifdef __STDC__
+static int
+supersetcmp (void * va, void * vb)
+#else
+static int
+supersetcmp (va, vb)
+ void * va;
+ void * vb;
+#endif
+{
+ struct rx_superset * a = (struct rx_superset *)va;
+ struct rx_superset * b = (struct rx_superset *)vb;
+ return ( (a == b)
+ || (a && b && (a->car == b->car) && (a->cdr == b->cdr)));
+}
+
+#ifdef __STDC__
+static struct rx_hash_item *
+superset_allocator (struct rx_hash_rules * rules, void * val)
+#else
+static struct rx_hash_item *
+superset_allocator (rules, val)
+ struct rx_hash_rules * rules;
+ void * val;
+#endif
+{
+ struct rx_cache * cache
+ = ((struct rx_cache *)
+ ((char *)rules
+ - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
+ struct rx_superset * template = (struct rx_superset *)val;
+ struct rx_superset * newset
+ = ((struct rx_superset *)
+ rx_cache_malloc_or_get (cache,
+ &cache->free_supersets,
+ sizeof (*template)));
+ if (!newset)
+ return 0;
+ newset->refs = 0;
+ newset->car = template->car;
+ newset->id = template->car->id;
+ newset->cdr = template->cdr;
+ newset->superstate = 0;
+ rx_protect_superset (rx, template->cdr);
+ newset->hash_item.data = (void *)newset;
+ newset->hash_item.binding = 0;
+ return &newset->hash_item;
+}
+
+#ifdef __STDC__
+static struct rx_hash *
+super_hash_allocator (struct rx_hash_rules * rules)
+#else
+static struct rx_hash *
+super_hash_allocator (rules)
+ struct rx_hash_rules * rules;
+#endif
+{
+ struct rx_cache * cache
+ = ((struct rx_cache *)
+ ((char *)rules
+ - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
+ return ((struct rx_hash *)
+ rx_cache_malloc_or_get (cache,
+ &cache->free_hash, sizeof (struct rx_hash)));
+}
+
+
+#ifdef __STDC__
+static void
+super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
+#else
+static void
+super_hash_liberator (hash, rules)
+ struct rx_hash * hash;
+ struct rx_hash_rules * rules;
+#endif
+{
+ struct rx_cache * cache
+ = ((struct rx_cache *)
+ (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
+ rx_cache_free (cache, &cache->free_hash, (char *)hash);
+}
+
+#ifdef __STDC__
+static void
+superset_hash_item_liberator (struct rx_hash_item * it,
+ struct rx_hash_rules * rules)
+#else
+static void
+superset_hash_item_liberator (it, rules) /* Well, it does ya know. */
+ struct rx_hash_item * it;
+ struct rx_hash_rules * rules;
+#endif
+{
+}
+
+int rx_cache_bound = 128;
+static int rx_default_cache_got = 0;
+
+#ifdef __STDC__
+static int
+bytes_for_cache_size (int supers, int cset_size)
+#else
+static int
+bytes_for_cache_size (supers, cset_size)
+ int supers;
+ int cset_size;
+#endif
+{
+ /* What the hell is this? !!!*/
+ return (int)
+ ((float)supers *
+ ( (1.03 * (float) ( rx_sizeof_bitset (cset_size)
+ + sizeof (struct rx_super_edge)))
+ + (1.80 * (float) sizeof (struct rx_possible_future))
+ + (float) ( sizeof (struct rx_superstate)
+ + cset_size * sizeof (struct rx_inx))));
+}
+
+#ifdef __STDC__
+static void
+rx_morecore (struct rx_cache * cache)
+#else
+static void
+rx_morecore (cache)
+ struct rx_cache * cache;
+#endif
+{
+ if (rx_default_cache_got >= rx_cache_bound)
+ return;
+
+ rx_default_cache_got += 16;
+ cache->superstates_allowed = rx_cache_bound;
+ {
+ struct rx_blocklist ** pos = &cache->memory;
+ int size = bytes_for_cache_size (16, cache->local_cset_size);
+ while (*pos)
+ pos = &(*pos)->next;
+ *pos = ((struct rx_blocklist *)
+ malloc (size + sizeof (struct rx_blocklist)));
+ if (!*pos)
+ return;
+
+ (*pos)->next = 0;
+ (*pos)->bytes = size;
+ cache->memory_pos = *pos;
+ cache->memory_addr = (char *)*pos + sizeof (**pos);
+ cache->bytes_left = size;
+ }
+}
+
+static struct rx_cache default_cache =
+{
+ {
+ supersetcmp,
+ super_hash_allocator,
+ super_hash_liberator,
+ superset_allocator,
+ superset_hash_item_liberator,
+ },
+ 0,
+ 0,
+ 0,
+ 0,
+ rx_morecore,
+
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+
+ 0,
+ 0,
+
+ 0,
+
+ 0,
+ 0,
+ 0,
+ 0,
+ 128,
+
+ 256,
+ rx_id_instruction_table,
+
+ {
+ 0,
+ 0,
+ {0},
+ {0},
+ {0}
+ }
+};
+
+/* This adds an element to a superstate set. These sets are lists, such
+ * that lists with == elements are ==. The empty set is returned by
+ * superset_cons (rx, 0, 0) and is NOT equivelent to
+ * (struct rx_superset)0.
+ */
+
+#ifdef __STDC__
+RX_DECL struct rx_superset *
+rx_superset_cons (struct rx * rx,
+ struct rx_nfa_state *car, struct rx_superset *cdr)
+#else
+RX_DECL struct rx_superset *
+rx_superset_cons (rx, car, cdr)
+ struct rx * rx;
+ struct rx_nfa_state *car;
+ struct rx_superset *cdr;
+#endif
+{
+ struct rx_cache * cache = rx->cache;
+ if (!car && !cdr)
+ {
+ if (!cache->empty_superset)
+ {
+ cache->empty_superset
+ = ((struct rx_superset *)
+ rx_cache_malloc_or_get (cache, &cache->free_supersets,
+ sizeof (struct rx_superset)));
+ if (!cache->empty_superset)
+ return 0;
+ bzero (cache->empty_superset, sizeof (struct rx_superset));
+ cache->empty_superset->refs = 1000;
+ }
+ return cache->empty_superset;
+ }
+ {
+ struct rx_superset template;
+ struct rx_hash_item * hit;
+ template.car = car;
+ template.cdr = cdr;
+ template.id = car->id;
+ /* While hash_store will protect cdr itself it might first allocate hash
+ tables and stuff which might cause it to be garbage collected before
+ it's protected -- [gsstark:19961026.2155EST] */
+ rx_protect_superset (rx, cdr);
+ hit = rx_hash_store (&cache->superset_table,
+ (unsigned long)car ^ car->id ^ (unsigned long)cdr,
+ (void *)&template,
+ &cache->superset_hash_rules);
+ rx_release_superset (rx, cdr);
+ return (hit
+ ? (struct rx_superset *)hit->data
+ : 0);
+ }
+}
+
+/* This computes a union of two NFA state sets. The sets do not have the
+ * same representation though. One is a RX_SUPERSET structure (part
+ * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
+ */
+
+#ifdef __STDC__
+RX_DECL struct rx_superset *
+rx_superstate_eclosure_union
+ (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
+#else
+RX_DECL struct rx_superset *
+rx_superstate_eclosure_union (rx, set, ecl)
+ struct rx * rx;
+ struct rx_superset *set;
+ struct rx_nfa_state_set *ecl;
+#endif
+{
+ if (!ecl)
+ return set;
+
+ if (!set->car)
+ return rx_superset_cons (rx, ecl->car,
+ rx_superstate_eclosure_union (rx, set, ecl->cdr));
+ if (set->car == ecl->car)
+ return rx_superstate_eclosure_union (rx, set, ecl->cdr);
+
+ {
+ struct rx_superset * tail;
+ struct rx_nfa_state * first;
+
+ if (set->car > ecl->car)
+ {
+ tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
+ first = set->car;
+ }
+ else
+ {
+ tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
+ first = ecl->car;
+ }
+ if (!tail)
+ return 0;
+ else
+ {
+ struct rx_superset * answer;
+ answer = rx_superset_cons (rx, first, tail);
+ if (!answer)
+ {
+ rx_protect_superset (rx, tail);
+ rx_release_superset (rx, tail);
+ return 0;
+ }
+ else
+ return answer;
+ }
+ }
+}
+
+
+
+
+/*
+ * This makes sure that a list of rx_distinct_futures contains
+ * a future for each possible set of side effects in the eclosure
+ * of a given state. This is some of the work of filling in a
+ * superstate transition.
+ */
+
+#ifdef __STDC__
+static struct rx_distinct_future *
+include_futures (struct rx *rx,
+ struct rx_distinct_future *df, struct rx_nfa_state
+ *state, struct rx_superstate *superstate)
+#else
+static struct rx_distinct_future *
+include_futures (rx, df, state, superstate)
+ struct rx *rx;
+ struct rx_distinct_future *df;
+ struct rx_nfa_state *state;
+ struct rx_superstate *superstate;
+#endif
+{
+ struct rx_possible_future *future;
+ struct rx_cache * cache = rx->cache;
+ for (future = state->futures; future; future = future->next)
+ {
+ struct rx_distinct_future *dfp;
+ struct rx_distinct_future *insert_before = 0;
+ if (df)
+ df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
+ for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
+ if (dfp->effects == future->effects)
+ break;
+ else
+ {
+ int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
+ if (order > 0)
+ {
+ insert_before = dfp;
+ dfp = 0;
+ break;
+ }
+ }
+ if (df)
+ df->next_same_super_edge[1]->next_same_super_edge[0] = df;
+ if (!dfp)
+ {
+ dfp
+ = ((struct rx_distinct_future *)
+ rx_cache_malloc_or_get (cache, &cache->free_discernable_futures,
+ sizeof (struct rx_distinct_future)));
+ if (!dfp)
+ return 0;
+ if (!df)
+ {
+ df = insert_before = dfp;
+ df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
+ }
+ else if (!insert_before)
+ insert_before = df;
+ else if (insert_before == df)
+ df = dfp;
+
+ dfp->next_same_super_edge[0] = insert_before;
+ dfp->next_same_super_edge[1]
+ = insert_before->next_same_super_edge[1];
+ dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
+ dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
+ dfp->next_same_dest = dfp->prev_same_dest = dfp;
+ dfp->future = 0;
+ dfp->present = superstate;
+ dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
+ dfp->future_frame.data = 0;
+ dfp->future_frame.data_2 = (void *) dfp;
+ dfp->side_effects_frame.inx
+ = rx->instruction_table[rx_do_side_effects];
+ dfp->side_effects_frame.data = 0;
+ dfp->side_effects_frame.data_2 = (void *) dfp;
+ dfp->effects = future->effects;
+ }
+ }
+ return df;
+}
+
+
+/* This constructs a new superstate from its state set. The only
+ * complexity here is memory management.
+ */
+#ifdef __STDC__
+RX_DECL struct rx_superstate *
+rx_superstate (struct rx *rx,
+ struct rx_superset *set)
+#else
+RX_DECL struct rx_superstate *
+rx_superstate (rx, set)
+ struct rx *rx;
+ struct rx_superset *set;
+#endif
+{
+ struct rx_cache * cache = rx->cache;
+ struct rx_superstate * superstate = 0;
+
+ /* Does the superstate already exist in the cache? */
+ if (set->superstate)
+ {
+ if (set->superstate->rx_id != rx->rx_id)
+ {
+ /* Aha. It is in the cache, but belongs to a superstate
+ * that refers to an NFA that no longer exists.
+ * (We know it no longer exists because it was evidently
+ * stored in the same region of memory as the current nfa
+ * yet it has a different id.)
+ */
+ superstate = set->superstate;
+ if (!superstate->is_semifree)
+ {
+ if (cache->lru_superstate == superstate)
+ {
+ cache->lru_superstate = superstate->next_recyclable;
+ if (cache->lru_superstate == superstate)
+ cache->lru_superstate = 0;
+ }
+ {
+ superstate->next_recyclable->prev_recyclable
+ = superstate->prev_recyclable;
+ superstate->prev_recyclable->next_recyclable
+ = superstate->next_recyclable;
+ if (!cache->semifree_superstate)
+ {
+ (cache->semifree_superstate
+ = superstate->next_recyclable
+ = superstate->prev_recyclable
+ = superstate);
+ }
+ else
+ {
+ superstate->next_recyclable = cache->semifree_superstate;
+ superstate->prev_recyclable
+ = cache->semifree_superstate->prev_recyclable;
+ superstate->next_recyclable->prev_recyclable
+ = superstate;
+ superstate->prev_recyclable->next_recyclable
+ = superstate;
+ cache->semifree_superstate = superstate;
+ }
+ ++cache->semifree_superstates;
+ }
+ }
+ set->superstate = 0;
+ goto handle_cache_miss;
+ }
+ ++cache->hits;
+ superstate = set->superstate;
+
+ rx_refresh_this_superstate (cache, superstate);
+ return superstate;
+ }
+
+ handle_cache_miss:
+
+ /* This point reached only for cache misses. */
+ ++cache->misses;
+#if RX_DEBUG
+ if (rx_debug_trace > 1)
+ {
+ struct rx_superset * setp = set;
+ fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
+ while (setp)
+ {
+ fprintf (stderr, "%d ", setp->id);
+ setp = setp->cdr;
+ }
+ fprintf (stderr, "(%d)\n", set);
+ }
+#endif
+ superstate = (struct rx_superstate *)rx_cache_get_superstate (cache);
+ if (!superstate)
+ return 0;
+
+ if (!cache->lru_superstate)
+ (cache->lru_superstate
+ = superstate->next_recyclable
+ = superstate->prev_recyclable
+ = superstate);
+ else
+ {
+ superstate->next_recyclable = cache->lru_superstate;
+ superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
+ ( superstate->prev_recyclable->next_recyclable
+ = superstate->next_recyclable->prev_recyclable
+ = superstate);
+ }
+ superstate->rx_id = rx->rx_id;
+ superstate->transition_refs = 0;
+ superstate->locks = 0;
+ superstate->is_semifree = 0;
+ set->superstate = superstate;
+ superstate->contents = set;
+ rx_protect_superset (rx, set);
+ superstate->edges = 0;
+ {
+ int x;
+ /* None of the transitions from this superstate are known yet. */
+ for (x = 0; x < rx->local_cset_size; ++x) /* &&&&& 3.8 % */
+ {
+ struct rx_inx * ifr = &superstate->transitions[x];
+ ifr->inx = rx->instruction_table [rx_cache_miss];
+ ifr->data = ifr->data_2 = 0;
+ }
+ }
+ return superstate;
+}
+
+
+/* This computes the destination set of one edge of the superstate NFA.
+ * Note that a RX_DISTINCT_FUTURE is a superstate edge.
+ * Returns 0 on an allocation failure.
+ */
+
+#ifdef __STDC__
+static int
+solve_destination (struct rx *rx, struct rx_distinct_future *df)
+#else
+static int
+solve_destination (rx, df)
+ struct rx *rx;
+ struct rx_distinct_future *df;
+#endif
+{
+ struct rx_super_edge *tc = df->edge;
+ struct rx_superset *nfa_state;
+ struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
+ struct rx_superset *solution = nil_set;
+ struct rx_superstate *dest;
+
+ rx_protect_superset (rx, solution);
+ /* Iterate over all NFA states in the state set of this superstate. */
+ for (nfa_state = df->present->contents;
+ nfa_state->car;
+ nfa_state = nfa_state->cdr)
+ {
+ struct rx_nfa_edge *e;
+ /* Iterate over all edges of each NFA state. */
+ for (e = nfa_state->car->edges; e; e = e->next)
+ /* If we find an edge that is labeled with
+ * the characters we are solving for.....
+ */
+ if (rx_bitset_is_subset (rx->local_cset_size,
+ tc->cset, e->params.cset))
+ {
+ struct rx_nfa_state *n = e->dest;
+ struct rx_possible_future *pf;
+ /* ....search the partial epsilon closures of the destination
+ * of that edge for a path that involves the same set of
+ * side effects we are solving for.
+ * If we find such a RX_POSSIBLE_FUTURE, we add members to the
+ * stateset we are computing.
+ */
+ for (pf = n->futures; pf; pf = pf->next)
+ if (pf->effects == df->effects)
+ {
+ struct rx_superset * old_sol;
+ old_sol = solution;
+ solution = rx_superstate_eclosure_union (rx, solution,
+ pf->destset);
+ if (!solution)
+ return 0;
+ rx_protect_superset (rx, solution);
+ rx_release_superset (rx, old_sol);
+ }
+ }
+ }
+ /* It is possible that the RX_DISTINCT_FUTURE we are working on has
+ * the empty set of NFA states as its definition. In that case, this
+ * is a failure point.
+ */
+ if (solution == nil_set)
+ {
+ df->future_frame.inx = (void *) rx_backtrack;
+ df->future_frame.data = 0;
+ df->future_frame.data_2 = 0;
+ return 1;
+ }
+ dest = rx_superstate (rx, solution);
+ rx_release_superset (rx, solution);
+ if (!dest)
+ return 0;
+
+ {
+ struct rx_distinct_future *dft;
+ dft = df;
+ df->prev_same_dest->next_same_dest = 0;
+ while (dft)
+ {
+ dft->future = dest;
+ dft->future_frame.inx = rx->instruction_table[rx_next_char];
+ dft->future_frame.data = (void *) dest->transitions;
+ dft = dft->next_same_dest;
+ }
+ df->prev_same_dest->next_same_dest = df;
+ }
+ if (!dest->transition_refs)
+ dest->transition_refs = df;
+ else
+ {
+ struct rx_distinct_future *dft = dest->transition_refs->next_same_dest;
+ dest->transition_refs->next_same_dest = df->next_same_dest;
+ df->next_same_dest->prev_same_dest = dest->transition_refs;
+ df->next_same_dest = dft;
+ dft->prev_same_dest = df;
+ }
+ return 1;
+}
+
+
+/* This takes a superstate and a character, and computes some edges
+ * from the superstate NFA. In particular, this computes all edges
+ * that lead from SUPERSTATE given CHR. This function also
+ * computes the set of characters that share this edge set.
+ * This returns 0 on allocation error.
+ * The character set and list of edges are returned through
+ * the paramters CSETOUT and DFOUT.
+} */
+
+#ifdef __STDC__
+static int
+compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
+ rx_Bitset csetout, struct rx_superstate *superstate,
+ unsigned char chr)
+#else
+static int
+compute_super_edge (rx, dfout, csetout, superstate, chr)
+ struct rx *rx;
+ struct rx_distinct_future **dfout;
+ rx_Bitset csetout;
+ struct rx_superstate *superstate;
+ unsigned char chr;
+#endif
+{
+ struct rx_superset *stateset = superstate->contents;
+
+ /* To compute the set of characters that share edges with CHR,
+ * we start with the full character set, and subtract.
+ */
+ rx_bitset_universe (rx->local_cset_size, csetout);
+ *dfout = 0;
+
+ /* Iterate over the NFA states in the superstate state-set. */
+ while (stateset->car)
+ {
+ struct rx_nfa_edge *e;
+ for (e = stateset->car->edges; e; e = e->next)
+ if (RX_bitset_member (e->params.cset, chr))
+ {
+ /* If we find an NFA edge that applies, we make sure there
+ * are corresponding edges in the superstate NFA.
+ */
+ {
+ struct rx_distinct_future * saved;
+ saved = *dfout;
+ *dfout = include_futures (rx, *dfout, e->dest, superstate);
+ if (!*dfout)
+ {
+ struct rx_distinct_future * df;
+ df = saved;
+ if (df)
+ df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
+ while (df)
+ {
+ struct rx_distinct_future *dft;
+ dft = df;
+ df = df->next_same_super_edge[0];
+
+ if (dft->future && dft->future->transition_refs == dft)
+ {
+ dft->future->transition_refs = dft->next_same_dest;
+ if (dft->future->transition_refs == dft)
+ dft->future->transition_refs = 0;
+ }
+ dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
+ dft->prev_same_dest->next_same_dest = dft->next_same_dest;
+ rx_cache_free (rx->cache,
+ &rx->cache->free_discernable_futures,
+ (char *)dft);
+ }
+ return 0;
+ }
+ }
+ /* We also trim the character set a bit. */
+ rx_bitset_intersection (rx->local_cset_size,
+ csetout, e->params.cset);
+ }
+ else
+ /* An edge that doesn't apply at least tells us some characters
+ * that don't share the same edge set as CHR.
+ */
+ rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
+ stateset = stateset->cdr;
+ }
+ return 1;
+}
+
+
+/* This is a constructor for RX_SUPER_EDGE structures. These are
+ * wrappers for lists of superstate NFA edges that share character sets labels.
+ * If a transition class contains more than one rx_distinct_future (superstate
+ * edge), then it represents a non-determinism in the superstate NFA.
+ */
+
+
+#ifdef __STDC__
+static struct rx_super_edge *
+rx_super_edge (struct rx *rx,
+ struct rx_superstate *super, rx_Bitset cset,
+ struct rx_distinct_future *df)
+#else
+static struct rx_super_edge *
+rx_super_edge (rx, super, cset, df)
+ struct rx *rx;
+ struct rx_superstate *super;
+ rx_Bitset cset;
+ struct rx_distinct_future *df;
+#endif
+{
+ struct rx_super_edge *tc =
+ (struct rx_super_edge *)rx_cache_malloc_or_get
+ (rx->cache, &rx->cache->free_transition_classes,
+ sizeof (struct rx_super_edge) + rx_sizeof_bitset (rx->local_cset_size));
+
+ if (!tc)
+ return 0;
+ tc->next = super->edges;
+ super->edges = tc;
+ tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
+ tc->rx_backtrack_frame.data = 0;
+ tc->rx_backtrack_frame.data_2 = (void *) tc;
+ tc->options = df;
+ tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
+ rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
+ if (df)
+ {
+ struct rx_distinct_future * dfp = df;
+ df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
+ while (dfp)
+ {
+ dfp->edge = tc;
+ dfp = dfp->next_same_super_edge[0];
+ }
+ df->next_same_super_edge[1]->next_same_super_edge[0] = df;
+ }
+ return tc;
+}
+
+
+/* There are three kinds of cache miss. The first occurs when a
+ * transition is taken that has never been computed during the
+ * lifetime of the source superstate. That cache miss is handled by
+ * calling COMPUTE_SUPER_EDGE. The second kind of cache miss
+ * occurs when the destination superstate of a transition doesn't
+ * exist. SOLVE_DESTINATION is used to construct the destination superstate.
+ * Finally, the third kind of cache miss occurs when the destination
+ * superstate of a transition is in a `semi-free state'. That case is
+ * handled by UNFREE_SUPERSTATE.
+ *
+ * The function of HANDLE_CACHE_MISS is to figure out which of these
+ * cases applies.
+ */
+
+
+#ifdef __STDC__
+static void
+install_partial_transition (struct rx_superstate *super,
+ struct rx_inx *answer,
+ RX_subset set, int offset)
+#else
+static void
+install_partial_transition (super, answer, set, offset)
+ struct rx_superstate *super;
+ struct rx_inx *answer;
+ RX_subset set;
+ int offset;
+#endif
+{
+ int start = offset;
+ int end = start + 32;
+ RX_subset pos = 1;
+ struct rx_inx * transitions = super->transitions;
+
+ while (start < end)
+ {
+ if (set & pos)
+ transitions[start] = *answer;
+ pos <<= 1;
+ ++start;
+ }
+}
+
+#ifdef __STDC__
+RX_DECL struct rx_inx *
+rx_handle_cache_miss
+ (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data)
+#else
+RX_DECL struct rx_inx *
+rx_handle_cache_miss (rx, super, chr, data)
+ struct rx *rx;
+ struct rx_superstate *super;
+ unsigned char chr;
+ void *data;
+#endif
+{
+ int offset = chr / RX_subset_bits;
+ struct rx_distinct_future *df = data;
+
+ if (!df) /* must be the shared_cache_miss_frame */
+ {
+ /* Perhaps this is just a transition waiting to be filled. */
+ struct rx_super_edge *tc;
+ RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
+
+ for (tc = super->edges; tc; tc = tc->next)
+ if (tc->cset[offset] & mask)
+ {
+ struct rx_inx * answer;
+ df = tc->options;
+ answer = ((tc->options->next_same_super_edge[0] != tc->options)
+ ? &tc->rx_backtrack_frame
+ : (df->effects
+ ? &df->side_effects_frame
+ : &df->future_frame));
+ install_partial_transition (super, answer,
+ tc->cset [offset], offset * 32);
+ return answer;
+ }
+ /* Otherwise, it's a flushed or newly encountered edge. */
+ {
+ char cset_space[1024]; /* this limit is far from unreasonable */
+ rx_Bitset trcset;
+ struct rx_inx *answer;
+
+ if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
+ return 0; /* If the arbitrary limit is hit, always fail */
+ /* cleanly. */
+ trcset = (rx_Bitset)cset_space;
+ rx_lock_superstate (rx, super);
+ if (!compute_super_edge (rx, &df, trcset, super, chr))
+ {
+ rx_unlock_superstate (rx, super);
+ return 0;
+ }
+ if (!df) /* We just computed the fail transition. */
+ {
+ static struct rx_inx
+ shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
+ answer = &shared_fail_frame;
+ }
+ else
+ {
+ tc = rx_super_edge (rx, super, trcset, df);
+ if (!tc)
+ {
+ rx_unlock_superstate (rx, super);
+ return 0;
+ }
+ answer = ((tc->options->next_same_super_edge[0] != tc->options)
+ ? &tc->rx_backtrack_frame
+ : (df->effects
+ ? &df->side_effects_frame
+ : &df->future_frame));
+ }
+ install_partial_transition (super, answer,
+ trcset[offset], offset * 32);
+ rx_unlock_superstate (rx, super);
+ return answer;
+ }
+ }
+ else if (df->future) /* A cache miss on an edge with a future? Must be
+ * a semi-free destination. */
+ {
+ if (df->future->is_semifree)
+ refresh_semifree_superstate (rx->cache, df->future);
+ return &df->future_frame;
+ }
+ else
+ /* no future superstate on an existing edge */
+ {
+ rx_lock_superstate (rx, super);
+ if (!solve_destination (rx, df))
+ {
+ rx_unlock_superstate (rx, super);
+ return 0;
+ }
+ if (!df->effects
+ && (df->edge->options->next_same_super_edge[0] == df->edge->options))
+ install_partial_transition (super, &df->future_frame,
+ df->edge->cset[offset], offset * 32);
+ rx_unlock_superstate (rx, super);
+ return &df->future_frame;
+ }
+}
+
+
+
+
+/* The rest of the code provides a regex.c compatable interface. */
+
+
+__const__ char *re_error_msg[] =
+{
+ 0, /* REG_NOUT */
+ "No match", /* REG_NOMATCH */
+ "Invalid regular expression", /* REG_BADPAT */
+ "Invalid collation character", /* REG_ECOLLATE */
+ "Invalid character class name", /* REG_ECTYPE */
+ "Trailing backslash", /* REG_EESCAPE */
+ "Invalid back reference", /* REG_ESUBREG */
+ "Unmatched [ or [^", /* REG_EBRACK */
+ "Unmatched ( or \\(", /* REG_EPAREN */
+ "Unmatched \\{", /* REG_EBRACE */
+ "Invalid content of \\{\\}", /* REG_BADBR */
+ "Invalid range end", /* REG_ERANGE */
+ "Memory exhausted", /* REG_ESPACE */
+ "Invalid preceding regular expression", /* REG_BADRPT */
+ "Premature end of regular expression", /* REG_EEND */
+ "Regular expression too big", /* REG_ESIZE */
+ "Unmatched ) or \\)", /* REG_ERPAREN */
+};
+
+
+
+/*
+ * Macros used while compiling patterns.
+ *
+ * By convention, PEND points just past the end of the uncompiled pattern,
+ * P points to the read position in the pattern. `translate' is the name
+ * of the translation table (`TRANSLATE' is the name of a macro that looks
+ * things up in `translate').
+ */
+
+
+/*
+ * Fetch the next character in the uncompiled pattern---translating it
+ * if necessary. *Also cast from a signed character in the constant
+ * string passed to us by the user to an unsigned char that we can use
+ * as an array index (in, e.g., `translate').
+ */
+#define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ c = translate[c]; \
+ } while (0)
+
+/*
+ * Fetch the next character in the uncompiled pattern, with no
+ * translation.
+ */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+#define TRANSLATE(d) translate[(unsigned char) (d)]
+
+typedef unsigned regnum_t;
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ * be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.
+ */
+typedef int pattern_offset_t;
+
+typedef struct
+{
+ struct rexp_node ** top_expression; /* was begalt */
+ struct rexp_node ** last_expression; /* was laststart */
+ pattern_offset_t inner_group_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+static boolean
+ group_in_compile_stack (compile_stack_type, regnum_t);
+static reg_errcode_t
+ compile_range (struct re_pattern_buffer *, rx_Bitset,
+ __const__ char **, __const__ char *,
+ unsigned char *, reg_syntax_t,
+ rx_Bitset, char *);
+static void find_backrefs (char *, struct rexp_node *,
+ struct re_se_params *);
+static int compute_fastset (struct re_pattern_buffer *,
+ struct rexp_node *);
+static int is_anchored (struct rexp_node *, rx_side_effect);
+static struct rexp_node
+ *remove_unecessary_side_effects
+ (struct rx *, char *,
+ struct rexp_node *,
+ struct re_se_params *);
+static int pointless_if_repeated (struct rexp_node *,
+ struct re_se_params *);
+static int registers_on_stack (struct re_pattern_buffer *,
+ struct rexp_node *,
+ int, struct re_se_params *);
+static int has_any_se (struct rx *, struct rexp_node *);
+static int has_non_idempotent_epsilon_path
+ (struct rx *, struct rexp_node *,
+ struct re_se_params *);
+static int begins_with_complex_se (struct rx *, struct rexp_node *);
+static void speed_up_alt (struct rx *, struct rexp_node *, int);
+RX_DECL reg_errcode_t
+ rx_compile (__const__ char *, int, reg_syntax_t,
+ struct re_pattern_buffer *);
+RX_DECL void rx_blow_up_fastmap (struct re_pattern_buffer *);
+static __inline__ enum rx_get_burst_return
+ re_search_2_get_burst (struct rx_string_position *,
+ void *, int);
+static __inline__ enum rx_back_check_return
+ re_search_2_back_check (struct rx_string_position *, int,
+ int, unsigned char *, void *, int);
+static __inline__ int
+ re_search_2_fetch_char (struct rx_string_position *,
+ int, void *, int);
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / CHARBITS] \
+ |= 1 << (((unsigned char) c) % CHARBITS))
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (isdigit (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+#define IS_CHAR_CLASS(string) \
+ (!strcmp (string, "alpha") || !strcmp (string, "upper") \
+ || !strcmp (string, "lower") || !strcmp (string, "digit") \
+ || !strcmp (string, "alnum") || !strcmp (string, "xdigit") \
+ || !strcmp (string, "space") || !strcmp (string, "print") \
+ || !strcmp (string, "punct") || !strcmp (string, "graph") \
+ || !strcmp (string, "cntrl") || !strcmp (string, "blank"))
+
+
+/* These predicates are used in regex_compile. */
+
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ * after an alternative or a begin-subexpression. We assume there is at
+ * least one character before the ^.
+ */
+
+#ifdef __STDC__
+static boolean
+at_begline_loc_p (__const__ char *pattern, __const__ char * p, reg_syntax_t syntax)
+#else
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+ __const__ char *pattern;
+ __const__ char * p;
+ reg_syntax_t syntax;
+#endif
+{
+ __const__ char *prev = p - 2;
+ boolean prev_prev_backslash = ((prev > pattern) && (prev[-1] == '\\'));
+
+ return
+
+ (/* After a subexpression? */
+ ((*prev == '(') && ((syntax & RE_NO_BK_PARENS) || prev_prev_backslash))
+ ||
+ /* After an alternative? */
+ ((*prev == '|') && ((syntax & RE_NO_BK_VBAR) || prev_prev_backslash))
+ );
+}
+
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ * at least one character after the $, i.e., `P < PEND'.
+ */
+
+#ifdef __STDC__
+static boolean
+at_endline_loc_p (__const__ char *p, __const__ char *pend, int syntax)
+#else
+static boolean
+at_endline_loc_p (p, pend, syntax)
+ __const__ char *p;
+ __const__ char *pend;
+ int syntax;
+#endif
+{
+ __const__ char *next = p;
+ boolean next_backslash = (*next == '\\');
+ __const__ char *next_next = (p + 1 < pend) ? (p + 1) : 0;
+
+ return
+ (
+ /* Before a subexpression? */
+ ((syntax & RE_NO_BK_PARENS)
+ ? (*next == ')')
+ : (next_backslash && next_next && (*next_next == ')')))
+ ||
+ /* Before an alternative? */
+ ((syntax & RE_NO_BK_VBAR)
+ ? (*next == '|')
+ : (next_backslash && next_next && (*next_next == '|')))
+ );
+}
+
+
+unsigned char rx_id_translation[256] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
+ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
+ 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
+
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
+ 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
+ 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
+ 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
+ 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
+ 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
+
+ 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
+ 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
+ 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
+ 250, 251, 252, 253, 254, 255
+};
+
+/* The compiler keeps an inverted translation table.
+ * This looks up/inititalize elements.
+ * VALID is an array of booleans that validate CACHE.
+ */
+
+#ifdef __STDC__
+static rx_Bitset
+inverse_translation (struct re_pattern_buffer * rxb,
+ char * valid, rx_Bitset cache,
+ unsigned char * translate, int c)
+#else
+static rx_Bitset
+inverse_translation (rxb, valid, cache, translate, c)
+ struct re_pattern_buffer * rxb;
+ char * valid;
+ rx_Bitset cache;
+ unsigned char * translate;
+ int c;
+#endif
+{
+ rx_Bitset cs
+ = cache + c * rx_bitset_numb_subsets (rxb->rx.local_cset_size);
+
+ if (!valid[c])
+ {
+ int x;
+ int c_tr = TRANSLATE(c);
+ rx_bitset_null (rxb->rx.local_cset_size, cs);
+ for (x = 0; x < 256; ++x) /* &&&& 13.37 */
+ if (TRANSLATE(x) == c_tr)
+ RX_bitset_enjoin (cs, x);
+ valid[c] = 1;
+ }
+ return cs;
+}
+
+
+
+
+/* More subroutine declarations and macros for regex_compile. */
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+ false if it's not. */
+
+#ifdef __STDC__
+static boolean
+group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
+#else
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+ compile_stack_type compile_stack;
+ regnum_t regnum;
+#endif
+{
+ int this_element;
+
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0;
+ this_element--)
+ if (compile_stack.stack[this_element].regnum == regnum)
+ return true;
+
+ return false;
+}
+
+
+/*
+ * Read the ending character of a range (in a bracket expression) from the
+ * uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ * starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ * Then we set the translation of all bits between the starting and
+ * ending characters (inclusive) in the compiled pattern B.
+ *
+ * Return an error code.
+ *
+ * We use these short variable names so we can use the same macros as
+ * `regex_compile' itself.
+ */
+
+#ifdef __STDC__
+static reg_errcode_t
+compile_range (struct re_pattern_buffer * rxb, rx_Bitset cs,
+ __const__ char ** p_ptr, __const__ char * pend,
+ unsigned char * translate, reg_syntax_t syntax,
+ rx_Bitset inv_tr, char * valid_inv_tr)
+#else
+static reg_errcode_t
+compile_range (rxb, cs, p_ptr, pend, translate, syntax, inv_tr, valid_inv_tr)
+ struct re_pattern_buffer * rxb;
+ rx_Bitset cs;
+ __const__ char ** p_ptr;
+ __const__ char * pend;
+ unsigned char * translate;
+ reg_syntax_t syntax;
+ rx_Bitset inv_tr;
+ char * valid_inv_tr;
+#endif
+{
+ unsigned this_char;
+
+ __const__ char *p = *p_ptr;
+
+ unsigned char range_end;
+ unsigned char range_start = TRANSLATE(p[-2]);
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ PATFETCH (range_end);
+
+ (*p_ptr)++;
+
+ if (range_start > range_end)
+ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+ for (this_char = range_start; this_char <= range_end; this_char++)
+ {
+ rx_Bitset it =
+ inverse_translation (rxb, valid_inv_tr, inv_tr, translate, this_char);
+ rx_bitset_union (rxb->rx.local_cset_size, cs, it);
+ }
+
+ return REG_NOERROR;
+}
+
+
+/* This searches a regexp for backreference side effects.
+ * It fills in the array OUT with 1 at the index of every register pair
+ * referenced by a backreference.
+ *
+ * This is used to help optimize patterns for searching. The information is
+ * useful because, if the caller doesn't want register values, backreferenced
+ * registers are the only registers for which we need rx_backtrack.
+ */
+
+#ifdef __STDC__
+static void
+find_backrefs (char * out, struct rexp_node * rexp,
+ struct re_se_params * params)
+#else
+static void
+find_backrefs (out, rexp, params)
+ char * out;
+ struct rexp_node * rexp;
+ struct re_se_params * params;
+#endif
+{
+ if (rexp)
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return;
+ case r_alternate:
+ case r_concat:
+ case r_opt:
+ case r_star:
+ case r_2phase_star:
+ find_backrefs (out, rexp->params.pair.left, params);
+ find_backrefs (out, rexp->params.pair.right, params);
+ return;
+ case r_side_effect:
+ if ( ((long)rexp->params.side_effect >= 0)
+ && (params [(long)rexp->params.side_effect].se == re_se_backref))
+ out[ params [(long)rexp->params.side_effect].op1] = 1;
+ return;
+ }
+}
+
+
+
+/* Returns 0 unless the pattern can match the empty string. */
+
+#ifdef __STDC__
+static int
+compute_fastset (struct re_pattern_buffer * rxb, struct rexp_node * rexp)
+#else
+static int
+compute_fastset (rxb, rexp)
+ struct re_pattern_buffer * rxb;
+ struct rexp_node * rexp;
+#endif
+{
+ if (!rexp)
+ return 1;
+ switch (rexp->type)
+ {
+ case r_data:
+ return 1;
+ case r_cset:
+ {
+ rx_bitset_union (rxb->rx.local_cset_size,
+ rxb->fastset, rexp->params.cset);
+ }
+ return 0;
+ case r_concat:
+ return (compute_fastset (rxb, rexp->params.pair.left)
+ && compute_fastset (rxb, rexp->params.pair.right));
+ case r_2phase_star:
+ compute_fastset (rxb, rexp->params.pair.left);
+ /* compute_fastset (rxb, rexp->params.pair.right); nope... */
+ return 1;
+ case r_alternate:
+ return !!(compute_fastset (rxb, rexp->params.pair.left)
+ + compute_fastset (rxb, rexp->params.pair.right));
+ case r_opt:
+ case r_star:
+ compute_fastset (rxb, rexp->params.pair.left);
+ return 1;
+ case r_side_effect:
+ return 1;
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+/* returns
+ * 1 -- yes, definately anchored by the given side effect.
+ * 2 -- maybe anchored, maybe the empty string.
+ * 0 -- definately not anchored
+ * There is simply no other possibility.
+ */
+
+#ifdef __STDC__
+static int
+is_anchored (struct rexp_node * rexp, rx_side_effect se)
+#else
+static int
+is_anchored (rexp, se)
+ struct rexp_node * rexp;
+ rx_side_effect se;
+#endif
+{
+ if (!rexp)
+ return 2;
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return 0;
+ case r_concat:
+ case r_2phase_star:
+ {
+ int l = is_anchored (rexp->params.pair.left, se);
+ return (l == 2 ? is_anchored (rexp->params.pair.right, se) : l);
+ }
+ case r_alternate:
+ {
+ int l = is_anchored (rexp->params.pair.left, se);
+ int r = l ? is_anchored (rexp->params.pair.right, se) : 0;
+
+ if (l == r)
+ return l;
+ else if ((l == 0) || (r == 0))
+ return 0;
+ else
+ return 2;
+ }
+ case r_opt:
+ case r_star:
+ return is_anchored (rexp->params.pair.left, se) ? 2 : 0;
+
+ case r_side_effect:
+ return ((rexp->params.side_effect == se)
+ ? 1 : 2);
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+/* This removes register assignments that aren't required by backreferencing.
+ * This can speed up explore_future, especially if it eliminates
+ * non-determinism in the superstate NFA.
+ *
+ * NEEDED is an array of characters, presumably filled in by FIND_BACKREFS.
+ * The non-zero elements of the array indicate which register assignments
+ * can NOT be removed from the expression.
+ */
+
+#ifdef __STDC__
+static struct rexp_node *
+remove_unecessary_side_effects (struct rx * rx, char * needed,
+ struct rexp_node * rexp,
+ struct re_se_params * params)
+#else
+static struct rexp_node *
+remove_unecessary_side_effects (rx, needed, rexp, params)
+ struct rx * rx;
+ char * needed;
+ struct rexp_node * rexp;
+ struct re_se_params * params;
+#endif
+{
+ struct rexp_node * l;
+ struct rexp_node * r;
+ if (!rexp)
+ return 0;
+ else
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return rexp;
+ case r_alternate:
+ case r_concat:
+ case r_2phase_star:
+ l = remove_unecessary_side_effects (rx, needed,
+ rexp->params.pair.left, params);
+ r = remove_unecessary_side_effects (rx, needed,
+ rexp->params.pair.right, params);
+ if ((l && r) || (rexp->type != r_concat))
+ {
+ rexp->params.pair.left = l;
+ rexp->params.pair.right = r;
+ return rexp;
+ }
+ else
+ {
+ rexp->params.pair.left = rexp->params.pair.right = 0;
+ rx_free_rexp (rx, rexp);
+ return l ? l : r;
+ }
+ case r_opt:
+ case r_star:
+ l = remove_unecessary_side_effects (rx, needed,
+ rexp->params.pair.left, params);
+ if (l)
+ {
+ rexp->params.pair.left = l;
+ return rexp;
+ }
+ else
+ {
+ rexp->params.pair.left = 0;
+ rx_free_rexp (rx, rexp);
+ return 0;
+ }
+ case r_side_effect:
+ {
+ int se = (long)rexp->params.side_effect;
+ if ( (se >= 0)
+ && ( ((enum re_side_effects)params[se].se == re_se_lparen)
+ || ((enum re_side_effects)params[se].se == re_se_rparen))
+ && (params [se].op1 > 0)
+ && (!needed [params [se].op1]))
+ {
+ rx_free_rexp (rx, rexp);
+ return 0;
+ }
+ else
+ return rexp;
+ }
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+
+#ifdef __STDC__
+static int
+pointless_if_repeated (struct rexp_node * node, struct re_se_params * params)
+#else
+static int
+pointless_if_repeated (node, params)
+ struct rexp_node * node;
+ struct re_se_params * params;
+#endif
+{
+ if (!node)
+ return 1;
+ switch (node->type)
+ {
+ case r_cset:
+ return 0;
+ case r_alternate:
+ case r_concat:
+ case r_2phase_star:
+ return (pointless_if_repeated (node->params.pair.left, params)
+ && pointless_if_repeated (node->params.pair.right, params));
+ case r_opt:
+ case r_star:
+ return pointless_if_repeated (node->params.pair.left, params);
+ case r_side_effect:
+ switch (((long)node->params.side_effect < 0)
+ ? (enum re_side_effects)node->params.side_effect
+ : (enum re_side_effects)params[(long)node->params.side_effect].se)
+ {
+ case re_se_try:
+ case re_se_at_dot:
+ case re_se_begbuf:
+ case re_se_hat:
+ case re_se_wordbeg:
+ case re_se_wordbound:
+ case re_se_notwordbound:
+ case re_se_wordend:
+ case re_se_endbuf:
+ case re_se_dollar:
+ case re_se_fail:
+ case re_se_win:
+ return 1;
+ case re_se_lparen:
+ case re_se_rparen:
+ case re_se_iter:
+ case re_se_end_iter:
+ case re_se_syntax:
+ case re_se_not_syntax:
+ case re_se_backref:
+ return 0;
+ }
+ case r_data:
+ default:
+ return 0;
+ }
+}
+
+
+
+#ifdef __STDC__
+static int
+registers_on_stack (struct re_pattern_buffer * rxb,
+ struct rexp_node * rexp, int in_danger,
+ struct re_se_params * params)
+#else
+static int
+registers_on_stack (rxb, rexp, in_danger, params)
+ struct re_pattern_buffer * rxb;
+ struct rexp_node * rexp;
+ int in_danger;
+ struct re_se_params * params;
+#endif
+{
+ if (!rexp)
+ return 0;
+ else
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return 0;
+ case r_alternate:
+ case r_concat:
+ return ( registers_on_stack (rxb, rexp->params.pair.left,
+ in_danger, params)
+ || (registers_on_stack
+ (rxb, rexp->params.pair.right,
+ in_danger, params)));
+ case r_opt:
+ return registers_on_stack (rxb, rexp->params.pair.left, 0, params);
+ case r_star:
+ return registers_on_stack (rxb, rexp->params.pair.left, 1, params);
+ case r_2phase_star:
+ return
+ ( registers_on_stack (rxb, rexp->params.pair.left, 1, params)
+ || registers_on_stack (rxb, rexp->params.pair.right, 1, params));
+ case r_side_effect:
+ {
+ int se = (long)rexp->params.side_effect;
+ if ( in_danger
+ && (se >= 0)
+ && (params [se].op1 > 0)
+ && ( ((enum re_side_effects)params[se].se == re_se_lparen)
+ || ((enum re_side_effects)params[se].se == re_se_rparen)))
+ return 1;
+ else
+ return 0;
+ }
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+
+static char idempotent_complex_se[] =
+{
+#define RX_WANT_SE_DEFS 1
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#define RX_DEF_SE(IDEM, NAME, VALUE)
+#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) IDEM,
+#include <regex.h>
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#undef RX_WANT_SE_DEFS
+ 23
+};
+
+static char idempotent_se[] =
+{
+ 13,
+#define RX_WANT_SE_DEFS 1
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#define RX_DEF_SE(IDEM, NAME, VALUE) IDEM,
+#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE)
+#include <regex.h>
+#undef RX_DEF_SE
+#undef RX_DEF_CPLX_SE
+#undef RX_WANT_SE_DEFS
+ 42
+};
+
+
+
+#ifdef __STDC__
+static int
+has_any_se (struct rx * rx,
+ struct rexp_node * rexp)
+#else
+static int
+has_any_se (rx, rexp)
+ struct rx * rx;
+ struct rexp_node * rexp;
+#endif
+{
+ if (!rexp)
+ return 0;
+
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return 0;
+
+ case r_side_effect:
+ return 1;
+
+ case r_2phase_star:
+ case r_concat:
+ case r_alternate:
+ return
+ ( has_any_se (rx, rexp->params.pair.left)
+ || has_any_se (rx, rexp->params.pair.right));
+
+ case r_opt:
+ case r_star:
+ return has_any_se (rx, rexp->params.pair.left);
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+
+/* This must be called AFTER `convert_hard_loops' for a given REXP. */
+#ifdef __STDC__
+static int
+has_non_idempotent_epsilon_path (struct rx * rx,
+ struct rexp_node * rexp,
+ struct re_se_params * params)
+#else
+static int
+has_non_idempotent_epsilon_path (rx, rexp, params)
+ struct rx * rx;
+ struct rexp_node * rexp;
+ struct re_se_params * params;
+#endif
+{
+ if (!rexp)
+ return 0;
+
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ case r_star:
+ return 0;
+
+ case r_side_effect:
+ return
+ !((long)rexp->params.side_effect > 0
+ ? idempotent_complex_se [ params [(long)rexp->params.side_effect].se ]
+ : idempotent_se [-(long)rexp->params.side_effect]);
+
+ case r_alternate:
+ return
+ ( has_non_idempotent_epsilon_path (rx,
+ rexp->params.pair.left, params)
+ || has_non_idempotent_epsilon_path (rx,
+ rexp->params.pair.right, params));
+
+ case r_2phase_star:
+ case r_concat:
+ return
+ ( has_non_idempotent_epsilon_path (rx,
+ rexp->params.pair.left, params)
+ && has_non_idempotent_epsilon_path (rx,
+ rexp->params.pair.right, params));
+
+ case r_opt:
+ return has_non_idempotent_epsilon_path (rx,
+ rexp->params.pair.left, params);
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+
+/* This computes rougly what it's name suggests. It can (and does) go wrong
+ * in the direction of returning spurious 0 without causing disasters.
+ */
+#ifdef __STDC__
+static int
+begins_with_complex_se (struct rx * rx, struct rexp_node * rexp)
+#else
+static int
+begins_with_complex_se (rx, rexp)
+ struct rx * rx;
+ struct rexp_node * rexp;
+#endif
+{
+ if (!rexp)
+ return 0;
+
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ return 0;
+
+ case r_side_effect:
+ return ((long)rexp->params.side_effect >= 0);
+
+ case r_alternate:
+ return
+ ( begins_with_complex_se (rx, rexp->params.pair.left)
+ && begins_with_complex_se (rx, rexp->params.pair.right));
+
+
+ case r_concat:
+ return has_any_se (rx, rexp->params.pair.left);
+ case r_opt:
+ case r_star:
+ case r_2phase_star:
+ return 0;
+ }
+
+ /* this should never happen */
+ return 0;
+}
+
+
+/* This destructively removes some of the re_se_tv side effects from
+ * a rexp tree. In particular, during parsing re_se_tv was inserted on the
+ * right half of every | to guarantee that posix path preference could be
+ * honored. This function removes some which it can be determined aren't
+ * needed.
+ */
+
+#ifdef __STDC__
+static void
+speed_up_alt (struct rx * rx,
+ struct rexp_node * rexp,
+ int unposix)
+#else
+static void
+speed_up_alt (rx, rexp, unposix)
+ struct rx * rx;
+ struct rexp_node * rexp;
+ int unposix;
+#endif
+{
+ if (!rexp)
+ return;
+
+ switch (rexp->type)
+ {
+ case r_cset:
+ case r_data:
+ case r_side_effect:
+ return;
+
+ case r_opt:
+ case r_star:
+ speed_up_alt (rx, rexp->params.pair.left, unposix);
+ return;
+
+ case r_2phase_star:
+ case r_concat:
+ speed_up_alt (rx, rexp->params.pair.left, unposix);
+ speed_up_alt (rx, rexp->params.pair.right, unposix);
+ return;
+
+ case r_alternate:
+ /* the right child is guaranteed to be (concat re_se_tv <subexp>) */
+
+ speed_up_alt (rx, rexp->params.pair.left, unposix);
+ speed_up_alt (rx, rexp->params.pair.right->params.pair.right, unposix);
+
+ if ( unposix
+ || (begins_with_complex_se
+ (rx, rexp->params.pair.right->params.pair.right))
+ || !( has_any_se (rx, rexp->params.pair.right->params.pair.right)
+ || has_any_se (rx, rexp->params.pair.left)))
+ {
+ struct rexp_node * conc = rexp->params.pair.right;
+ rexp->params.pair.right = conc->params.pair.right;
+ conc->params.pair.right = 0;
+ rx_free_rexp (rx, conc);
+ }
+ }
+}
+
+
+
+
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is set to zero;
+ `re_nsub' is set to the number of groups in PATTERN;
+ `not_bol' and `not_eol' are set to zero.
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+
+#ifdef __STDC__
+RX_DECL reg_errcode_t
+rx_compile (__const__ char *pattern, int size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer * rxb)
+#else
+RX_DECL reg_errcode_t
+rx_compile (pattern, size, syntax, rxb)
+ __const__ char *pattern;
+ int size;
+ reg_syntax_t syntax;
+ struct re_pattern_buffer * rxb;
+#endif
+{
+ RX_subset
+ inverse_translate [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
+ char
+ validate_inv_tr [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
+
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register unsigned char c, c1;
+
+ /* A random tempory spot in PATTERN. */
+ __const__ char *p1;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+ __const__ char *p = pattern;
+ __const__ char *pend = pattern + size;
+
+ /* How to translate the characters in the pattern. */
+ unsigned char *translate = (rxb->translate
+ ? rxb->translate
+ : rx_id_translation);
+
+ /* When parsing is done, this will hold the expression tree. */
+ struct rexp_node * rexp = 0;
+
+ /* In the midst of compilation, this holds onto the regexp
+ * first parst while rexp goes on to aquire additional constructs.
+ */
+ struct rexp_node * orig_rexp = 0;
+ struct rexp_node * fewer_side_effects = 0;
+
+ /* This and top_expression are saved on the compile stack. */
+ struct rexp_node ** top_expression = &rexp;
+ struct rexp_node ** last_expression = top_expression;
+
+ /* Parameter to `goto append_node' */
+ struct rexp_node * append;
+
+ /* Counts open-groups as they are encountered. This is the index of the
+ * innermost group being compiled.
+ */
+ regnum_t regnum = 0;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ * which to go back if the interval is invalid.
+ */
+ __const__ char *beg_interval;
+
+ struct re_se_params * params = 0;
+ int paramc = 0; /* How many complex side effects so far? */
+
+ rx_side_effect side; /* param to `goto add_side_effect' */
+
+ bzero (validate_inv_tr, sizeof (validate_inv_tr));
+
+ rxb->rx.instruction_table = rx_id_instruction_table;
+
+
+ /* Initialize the compile stack. */
+ compile_stack.stack = (( compile_stack_elt_t *) malloc ((INIT_COMPILE_STACK_SIZE) * sizeof ( compile_stack_elt_t)));
+ if (compile_stack.stack == 0)
+ return REG_ESPACE;
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ rxb->rx.cache = &default_cache;
+ rxb->syntax = syntax;
+ rxb->fastmap_accurate = 0;
+ rxb->not_bol = rxb->not_eol = 0;
+ rxb->least_subs = 0;
+
+ /* Always count groups, whether or not rxb->no_sub is set.
+ * The whole pattern is implicitly group 0, so counting begins
+ * with 1.
+ */
+ rxb->re_nsub = 0;
+
+#if !defined (emacs) && !defined (SYNTAX_TABLE)
+ /* Initialize the syntax table. */
+ init_syntax_once ();
+#endif
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ {
+ struct rexp_node * n
+ = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_hat);
+ if (!n)
+ return REG_ESPACE;
+ append = n;
+ goto append_node;
+ }
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ {
+ struct rexp_node * n
+ = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_dollar);
+ if (!n)
+ return REG_ESPACE;
+ append = n;
+ goto append_node;
+ }
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (pointless_if_repeated (*last_expression, params))
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!last_expression)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ * and also whether or not two or more matches is allowed.
+ */
+
+ {
+ struct rexp_node * inner_exp = *last_expression;
+ int need_sync = 0;
+
+ if (many_times_ok
+ && has_non_idempotent_epsilon_path (&rxb->rx,
+ inner_exp, params))
+ {
+ struct rexp_node * pusher
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_pushpos);
+ struct rexp_node * checker
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_chkpos);
+ struct rexp_node * pushback
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_pushback);
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * lit_t;
+ struct rexp_node * fake_state;
+ struct rexp_node * phase2;
+ struct rexp_node * popper;
+ struct rexp_node * star;
+ struct rexp_node * a;
+ struct rexp_node * whole_thing;
+
+ if (! cs)
+ return REG_ESPACE;
+ lit_t = rx_mk_r_cset (&rxb->rx, cs);
+ fake_state = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
+ phase2 = rx_mk_r_concat (&rxb->rx, checker, fake_state);
+ popper = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_poppos);
+ star = rx_mk_r_2phase_star (&rxb->rx, inner_exp, phase2);
+ a = rx_mk_r_concat (&rxb->rx, pusher, star);
+ whole_thing = rx_mk_r_concat (&rxb->rx, a, popper);
+
+ if (!(pusher && star && pushback && lit_t && fake_state
+ && lit_t && phase2 && checker && popper
+ && a && whole_thing))
+ return REG_ESPACE;
+ RX_bitset_enjoin (cs, 't');
+ *last_expression = whole_thing;
+ }
+ else
+ {
+ struct rexp_node * star =
+ (many_times_ok ? rx_mk_r_star : rx_mk_r_opt)
+ (&rxb->rx, *last_expression);
+ if (!star)
+ return REG_ESPACE;
+ *last_expression = star;
+ need_sync = has_any_se (&rxb->rx, *last_expression);
+ }
+ if (!zero_times_ok)
+ {
+ struct rexp_node * concat
+ = rx_mk_r_concat (&rxb->rx, inner_exp,
+ rx_copy_rexp (&rxb->rx,
+ *last_expression));
+ if (!concat)
+ return REG_ESPACE;
+ *last_expression = concat;
+ }
+ if (need_sync)
+ {
+ int sync_se = paramc;
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ sizeof (*params) * (1 + paramc)))
+ : ((struct re_se_params *)
+ malloc (sizeof (*params))));
+ if (!params)
+ return REG_ESPACE;
+ ++paramc;
+ params [sync_se].se = re_se_tv;
+ side = (rx_side_effect)sync_se;
+ goto add_side_effect;
+ }
+ }
+ /* The old regex.c used to optimize `.*\n'.
+ * Maybe rx should too?
+ */
+ }
+ break;
+
+
+ case '.':
+ {
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * n = rx_mk_r_cset (&rxb->rx, cs);
+ if (!(cs && n))
+ return REG_ESPACE;
+
+ rx_bitset_universe (rxb->rx.local_cset_size, cs);
+ if (!(rxb->syntax & RE_DOT_NEWLINE))
+ RX_bitset_remove (cs, '\n');
+ if (!(rxb->syntax & RE_DOT_NOT_NULL))
+ RX_bitset_remove (cs, 0);
+
+ append = n;
+ goto append_node;
+ break;
+ }
+
+
+ case '[':
+ if (p == pend) return REG_EBRACK;
+ {
+ boolean had_char_class = false;
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * node = rx_mk_r_cset (&rxb->rx, cs);
+ int is_inverted = *p == '^';
+
+ if (!(node && cs))
+ return REG_ESPACE;
+
+ /* This branch of the switch is normally exited with
+ *`goto append_node'
+ */
+ append = node;
+
+ if (is_inverted)
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) return REG_EBRACK;
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ {
+ rx_Bitset it = inverse_translation (rxb,
+ validate_inv_tr,
+ inverse_translate,
+ translate,
+ c1);
+ rx_bitset_union (rxb->rx.local_cset_size, cs, it);
+ }
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ goto finalize_class_and_append;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ return REG_ERANGE;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret
+ = compile_range (rxb, cs, &p, pend, translate, syntax,
+ inverse_translate, validate_inv_tr);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (rxb, cs, &p, pend, translate, syntax,
+ inverse_translate, validate_inv_tr);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if ((syntax & RE_CHAR_CLASSES)
+ && (c == '[') && (*p == ':'))
+ {
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) return REG_EBRACK;
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and:`]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+ int ch;
+ boolean is_alnum = !strcmp (str, "alnum");
+ boolean is_alpha = !strcmp (str, "alpha");
+ boolean is_blank = !strcmp (str, "blank");
+ boolean is_cntrl = !strcmp (str, "cntrl");
+ boolean is_digit = !strcmp (str, "digit");
+ boolean is_graph = !strcmp (str, "graph");
+ boolean is_lower = !strcmp (str, "lower");
+ boolean is_print = !strcmp (str, "print");
+ boolean is_punct = !strcmp (str, "punct");
+ boolean is_space = !strcmp (str, "space");
+ boolean is_upper = !strcmp (str, "upper");
+ boolean is_xdigit = !strcmp (str, "xdigit");
+
+ if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) return REG_EBRACK;
+
+ for (ch = 0; ch < 1 << CHARBITS; ch++)
+ {
+ if ( (is_alnum && isalnum (ch))
+ || (is_alpha && isalpha (ch))
+ || (is_blank && isblank (ch))
+ || (is_cntrl && iscntrl (ch))
+ || (is_digit && isdigit (ch))
+ || (is_graph && isgraph (ch))
+ || (is_lower && islower (ch))
+ || (is_print && isprint (ch))
+ || (is_punct && ispunct (ch))
+ || (is_space && isspace (ch))
+ || (is_upper && isupper (ch))
+ || (is_xdigit && isxdigit (ch)))
+ {
+ rx_Bitset it =
+ inverse_translation (rxb,
+ validate_inv_tr,
+ inverse_translate,
+ translate,
+ ch);
+ rx_bitset_union (rxb->rx.local_cset_size,
+ cs, it);
+ }
+ }
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ {
+ rx_Bitset it =
+ inverse_translation (rxb,
+ validate_inv_tr,
+ inverse_translate,
+ translate,
+ '[');
+ rx_bitset_union (rxb->rx.local_cset_size,
+ cs, it);
+ }
+ {
+ rx_Bitset it =
+ inverse_translation (rxb,
+ validate_inv_tr,
+ inverse_translate,
+ translate,
+ ':');
+ rx_bitset_union (rxb->rx.local_cset_size,
+ cs, it);
+ }
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ {
+ rx_Bitset it = inverse_translation (rxb,
+ validate_inv_tr,
+ inverse_translate,
+ translate,
+ c);
+ rx_bitset_union (rxb->rx.local_cset_size, cs, it);
+ }
+ }
+ }
+
+ finalize_class_and_append:
+ if (is_inverted)
+ {
+ rx_bitset_complement (rxb->rx.local_cset_size, cs);
+ if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+ RX_bitset_remove (cs, '\n');
+ }
+ goto append_node;
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_close;
+ else
+ goto normal_char;
+
+
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '|':
+ if (syntax & RE_NO_BK_VBAR)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '{':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ goto handle_interval;
+ else
+ goto normal_char;
+
+
+ case '\\':
+ if (p == pend) return REG_EESCAPE;
+
+ /* Do not translate the character after the \, so that we can
+ distinguish, e.g., \B from \b, even if we normally would
+ translate, e.g., B to b. */
+ PATFETCH_RAW (c);
+
+ switch (c)
+ {
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ handle_open:
+ rxb->re_nsub++;
+ regnum++;
+ if (COMPILE_STACK_FULL)
+ {
+ ((compile_stack.stack) =
+ (compile_stack_elt_t *) realloc (compile_stack.stack, ( compile_stack.size << 1) * sizeof (
+ compile_stack_elt_t)));
+ if (compile_stack.stack == 0) return REG_ESPACE;
+
+ compile_stack.size <<= 1;
+ }
+
+ if (*last_expression)
+ {
+ struct rexp_node * concat
+ = rx_mk_r_concat (&rxb->rx, *last_expression, 0);
+ if (!concat)
+ return REG_ESPACE;
+ *last_expression = concat;
+ last_expression = &concat->params.pair.right;
+ }
+
+ /*
+ * These are the values to restore when we hit end of this
+ * group.
+ */
+ COMPILE_STACK_TOP.top_expression = top_expression;
+ COMPILE_STACK_TOP.last_expression = last_expression;
+ COMPILE_STACK_TOP.regnum = regnum;
+
+ compile_stack.avail++;
+
+ top_expression = last_expression;
+ break;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+ handle_close:
+ /* See similar code for backslashed left paren above. */
+ if (COMPILE_STACK_EMPTY)
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ return REG_ERPAREN;
+
+ /* Since we just checked for an empty stack above, this
+ ``can't happen''. */
+
+ {
+ /* We don't just want to restore into `regnum', because
+ later groups should continue to be numbered higher,
+ as in `(ab)c(de)' -- the second group is #2. */
+ regnum_t this_group_regnum;
+ struct rexp_node ** inner = top_expression;
+
+ compile_stack.avail--;
+ top_expression = COMPILE_STACK_TOP.top_expression;
+ last_expression = COMPILE_STACK_TOP.last_expression;
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
+ {
+ int left_se = paramc;
+ int right_se = paramc + 1;
+
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ (paramc + 2) * sizeof (params[0])))
+ : ((struct re_se_params *)
+ malloc (2 * sizeof (params[0]))));
+ if (!params)
+ return REG_ESPACE;
+ paramc += 2;
+
+ params[left_se].se = re_se_lparen;
+ params[left_se].op1 = this_group_regnum;
+ params[right_se].se = re_se_rparen;
+ params[right_se].op1 = this_group_regnum;
+ {
+ struct rexp_node * left
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)left_se);
+ struct rexp_node * right
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)right_se);
+ struct rexp_node * c1
+ = (*inner
+ ? rx_mk_r_concat (&rxb->rx, left, *inner) : left);
+ struct rexp_node * c2
+ = rx_mk_r_concat (&rxb->rx, c1, right);
+ if (!(left && right && c1 && c2))
+ return REG_ESPACE;
+ *inner = c2;
+ }
+ }
+ break;
+ }
+
+ case '|': /* `\|'. */
+ if ((syntax & RE_LIMITED_OPS) || (syntax & RE_NO_BK_VBAR))
+ goto normal_backslash;
+ handle_alt:
+ if (syntax & RE_LIMITED_OPS)
+ goto normal_char;
+
+ {
+ struct rexp_node * alt
+ = rx_mk_r_alternate (&rxb->rx, *top_expression, 0);
+ if (!alt)
+ return REG_ESPACE;
+ *top_expression = alt;
+ last_expression = &alt->params.pair.right;
+ {
+ int sync_se = paramc;
+
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ (paramc + 1) * sizeof (params[0])))
+ : ((struct re_se_params *)
+ malloc (sizeof (params[0]))));
+ if (!params)
+ return REG_ESPACE;
+ ++paramc;
+
+ params[sync_se].se = re_se_tv;
+ {
+ struct rexp_node * sync
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)sync_se);
+ struct rexp_node * conc
+ = rx_mk_r_concat (&rxb->rx, sync, 0);
+
+ if (!sync || !conc)
+ return REG_ESPACE;
+
+ *last_expression = conc;
+ last_expression = &conc->params.pair.right;
+ }
+ }
+ }
+ break;
+
+
+ case '{':
+ /* If \{ is a literal. */
+ if (!(syntax & RE_INTERVALS)
+ /* If we're at `\{' and it's not the open-interval
+ operator. */
+ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ || (p - 2 == pattern && p == pend))
+ goto normal_backslash;
+
+ handle_interval:
+ {
+ /* If got here, then the syntax allows intervals. */
+
+ /* At least (most) this many matches must be made. */
+ int lower_bound = -1, upper_bound = -1;
+
+ beg_interval = p - 1;
+
+ if (p == pend)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_EBRACE;
+ }
+
+ GET_UNSIGNED_NUMBER (lower_bound);
+
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+ }
+ else
+ /* Interval such as `{1}' => match exactly once. */
+ upper_bound = lower_bound;
+
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (c != '\\') return REG_EBRACE;
+ PATFETCH (c);
+ }
+
+ if (c != '}')
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ /* We just parsed a valid interval. */
+
+ /* If it's invalid to have no preceding re. */
+ if (pointless_if_repeated (*last_expression, params))
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto unfetch_interval;
+ /* was: else laststart = b; */
+ }
+
+ /* If the upper bound is zero, don't want to iterate
+ * at all.
+ */
+ if (upper_bound == 0)
+ {
+ if (*last_expression)
+ {
+ rx_free_rexp (&rxb->rx, *last_expression);
+ *last_expression = 0;
+ }
+ }
+ else
+ /* Otherwise, we have a nontrivial interval. */
+ {
+ int iter_se = paramc;
+ int end_se = paramc + 1;
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ sizeof (*params) * (2 + paramc)))
+ : ((struct re_se_params *)
+ malloc (2 * sizeof (*params))));
+ if (!params)
+ return REG_ESPACE;
+ paramc += 2;
+ params [iter_se].se = re_se_iter;
+ params [iter_se].op1 = lower_bound;
+ params[iter_se].op2 = upper_bound;
+
+ params[end_se].se = re_se_end_iter;
+ params[end_se].op1 = lower_bound;
+ params[end_se].op2 = upper_bound;
+ {
+ struct rexp_node * push0
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_push0);
+ struct rexp_node * start_one_iter
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)iter_se);
+ struct rexp_node * phase1
+ = rx_mk_r_concat (&rxb->rx, start_one_iter,
+ *last_expression);
+ struct rexp_node * pushback
+ = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)re_se_pushback);
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * lit_t;
+ struct rexp_node * phase2;
+ struct rexp_node * loop;
+ struct rexp_node * push_n_loop;
+ struct rexp_node * final_test;
+ struct rexp_node * full_exp;
+
+ if (! cs)
+ return REG_ESPACE;
+ lit_t = rx_mk_r_cset (&rxb->rx, cs);
+ phase2 = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
+ loop = rx_mk_r_2phase_star (&rxb->rx, phase1, phase2);
+ push_n_loop = rx_mk_r_concat (&rxb->rx, push0, loop);
+ final_test = rx_mk_r_side_effect (&rxb->rx,
+ (rx_side_effect)end_se);
+ full_exp = rx_mk_r_concat (&rxb->rx, push_n_loop, final_test);
+
+ if (!(push0 && start_one_iter && phase1
+ && pushback && lit_t && phase2
+ && loop && push_n_loop && final_test && full_exp))
+ return REG_ESPACE;
+
+ RX_bitset_enjoin(cs, 't');
+
+ *last_expression = full_exp;
+ }
+ }
+ beg_interval = 0;
+ }
+ break;
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ p = beg_interval;
+ beg_interval = 0;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH (c);
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (p > pattern && p[-1] == '\\')
+ goto normal_backslash;
+ }
+ goto normal_char;
+
+#ifdef emacs
+ /* There is no way to specify the before_dot and after_dot
+ operators. rms says this is ok. --karl */
+ case '=':
+ side = (rx_side_effect)rx_se_at_dot;
+ goto add_side_effect;
+ break;
+
+ case 's':
+ case 'S':
+ {
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * set = rx_mk_r_cset (&rxb->rx, cs);
+ if (!(cs && set))
+ return REG_ESPACE;
+ if (c == 'S')
+ rx_bitset_universe (rxb->rx.local_cset_size, cs);
+
+ PATFETCH (c);
+ {
+ int x;
+ enum syntaxcode code = syntax_spec_code [c];
+ for (x = 0; x < 256; ++x)
+ {
+
+ if (SYNTAX (x) == code)
+ {
+ rx_Bitset it =
+ inverse_translation (rxb, validate_inv_tr,
+ inverse_translate,
+ translate, x);
+ rx_bitset_xor (rxb->rx.local_cset_size, cs, it);
+ }
+ }
+ }
+ append = set;
+ goto append_node;
+ }
+ break;
+#endif /* emacs */
+
+
+ case 'w':
+ case 'W':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ {
+ rx_Bitset cs = rx_cset (&rxb->rx);
+ struct rexp_node * n = (cs ? rx_mk_r_cset (&rxb->rx, cs) : 0);
+ if (!(cs && n))
+ return REG_ESPACE;
+ if (c == 'W')
+ rx_bitset_universe (rxb->rx.local_cset_size ,cs);
+ {
+ int x;
+ for (x = rxb->rx.local_cset_size - 1; x > 0; --x)
+ if (SYNTAX(x) & Sword)
+ RX_bitset_toggle (cs, x);
+ }
+ append = n;
+ goto append_node;
+ }
+ break;
+
+/* With a little extra work, some of these side effects could be optimized
+ * away (basicly by looking at what we already know about the surrounding
+ * chars).
+ */
+ case '<':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ side = (rx_side_effect)re_se_wordbeg;
+ goto add_side_effect;
+ break;
+
+ case '>':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ side = (rx_side_effect)re_se_wordend;
+ goto add_side_effect;
+ break;
+
+ case 'b':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ side = (rx_side_effect)re_se_wordbound;
+ goto add_side_effect;
+ break;
+
+ case 'B':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ side = (rx_side_effect)re_se_notwordbound;
+ goto add_side_effect;
+ break;
+
+ case '`':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ side = (rx_side_effect)re_se_begbuf;
+ goto add_side_effect;
+ break;
+
+ case '\'':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ side = (rx_side_effect)re_se_endbuf;
+ goto add_side_effect;
+ break;
+
+ add_side_effect:
+ {
+ struct rexp_node * se
+ = rx_mk_r_side_effect (&rxb->rx, side);
+ if (!se)
+ return REG_ESPACE;
+ append = se;
+ goto append_node;
+ }
+ break;
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
+
+ c1 = c - '0';
+
+ if (c1 > regnum)
+ return REG_ESUBREG;
+
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack (compile_stack, c1))
+ return REG_ESUBREG;
+
+ {
+ int backref_se = paramc;
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ sizeof (*params) * (1 + paramc)))
+ : ((struct re_se_params *)
+ malloc (sizeof (*params))));
+ if (!params)
+ return REG_ESPACE;
+ ++paramc;
+ params[backref_se].se = re_se_backref;
+ params[backref_se].op1 = c1;
+ side = (rx_side_effect)backref_se;
+ goto add_side_effect;
+ }
+ break;
+
+ case '+':
+ case '?':
+ if (syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backslash;
+
+ default:
+ normal_backslash:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ c = TRANSLATE (c);
+ goto normal_char;
+ }
+ break;
+
+
+ default:
+ /* Expects the character in `c'. */
+ normal_char:
+ {
+ rx_Bitset cs = rx_cset(&rxb->rx);
+ struct rexp_node * match = rx_mk_r_cset (&rxb->rx, cs);
+ rx_Bitset it;
+ if (!(cs && match))
+ return REG_ESPACE;
+ it = inverse_translation (rxb, validate_inv_tr,
+ inverse_translate, translate, c);
+ rx_bitset_union (CHAR_SET_SIZE, cs, it);
+ append = match;
+
+ append_node:
+ /* This genericly appends the rexp APPEND to *LAST_EXPRESSION
+ * and then parses the next character normally.
+ */
+ if (*last_expression)
+ {
+ struct rexp_node * concat
+ = rx_mk_r_concat (&rxb->rx, *last_expression, append);
+ if (!concat)
+ return REG_ESPACE;
+ *last_expression = concat;
+ last_expression = &concat->params.pair.right;
+ }
+ else
+ *last_expression = append;
+ }
+ } /* switch (c) */
+ } /* while p != pend */
+
+
+ {
+ int win_se = paramc;
+ params = (params
+ ? ((struct re_se_params *)
+ realloc (params,
+ sizeof (*params) * (1 + paramc)))
+ : ((struct re_se_params *)
+ malloc (sizeof (*params))));
+ if (!params)
+ return REG_ESPACE;
+ ++paramc;
+ params[win_se].se = re_se_win;
+ {
+ struct rexp_node * se
+ = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)win_se);
+ struct rexp_node * concat
+ = rx_mk_r_concat (&rxb->rx, rexp, se);
+ if (!(se && concat))
+ return REG_ESPACE;
+ rexp = concat;
+ }
+ }
+
+
+ /* Through the pattern now. */
+
+ if (!COMPILE_STACK_EMPTY)
+ return REG_EPAREN;
+
+ free (compile_stack.stack);
+
+ orig_rexp = rexp;
+#ifdef RX_DEBUG
+ if (rx_debug_compile)
+ {
+ dbug_rxb = rxb;
+ fputs ("\n\nCompiling ", stdout);
+ fwrite (pattern, 1, size, stdout);
+ fputs (":\n", stdout);
+ rxb->se_params = params;
+ print_rexp (&rxb->rx, orig_rexp, 2, re_seprint, stdout);
+ }
+#endif
+ {
+ rx_Bitset cs = rx_cset(&rxb->rx);
+ rx_Bitset cs2 = rx_cset(&rxb->rx);
+ char * se_map = (char *) alloca (paramc);
+ struct rexp_node * new_rexp = 0;
+
+
+ bzero (se_map, paramc);
+ find_backrefs (se_map, rexp, params);
+ fewer_side_effects =
+ remove_unecessary_side_effects (&rxb->rx, se_map,
+ rx_copy_rexp (&rxb->rx, rexp), params);
+
+ speed_up_alt (&rxb->rx, rexp, 0);
+ speed_up_alt (&rxb->rx, fewer_side_effects, 1);
+
+ {
+ char * syntax_parens = rxb->syntax_parens;
+ if (syntax_parens == (char *)0x1)
+ rexp = remove_unecessary_side_effects
+ (&rxb->rx, se_map, rexp, params);
+ else if (syntax_parens)
+ {
+ int x;
+ for (x = 0; x < paramc; ++x)
+ if (( (params[x].se == re_se_lparen)
+ || (params[x].se == re_se_rparen))
+ && (!syntax_parens [params[x].op1]))
+ se_map [x] = 1;
+ rexp = remove_unecessary_side_effects
+ (&rxb->rx, se_map, rexp, params);
+ }
+ }
+
+ /* At least one more optimization would be nice to have here but i ran out
+ * of time. The idea would be to delay side effects.
+ * For examle, `(abc)' is the same thing as `abc()' except that the
+ * left paren is offset by 3 (which we know at compile time).
+ * (In this comment, write that second pattern `abc(:3:)'
+ * where `(:3:' is a syntactic unit.)
+ *
+ * Trickier: `(abc|defg)' is the same as `(abc(:3:|defg(:4:))'
+ * (The paren nesting may be hard to follow -- that's an alternation
+ * of `abc(:3:' and `defg(:4:' inside (purely syntactic) parens
+ * followed by the closing paren from the original expression.)
+ *
+ * Neither the expression tree representation nor the the nfa make
+ * this very easy to write. :(
+ */
+
+ /* What we compile is different than what the parser returns.
+ * Suppose the parser returns expression R.
+ * Let R' be R with unnecessary register assignments removed
+ * (see REMOVE_UNECESSARY_SIDE_EFFECTS, above).
+ *
+ * What we will compile is the expression:
+ *
+ * m{try}R{win}\|s{try}R'{win}
+ *
+ * {try} and {win} denote side effect epsilons (see EXPLORE_FUTURE).
+ *
+ * When trying a match, we insert an `m' at the beginning of the
+ * string if the user wants registers to be filled, `s' if not.
+ */
+ new_rexp =
+ rx_mk_r_alternate
+ (&rxb->rx,
+ rx_mk_r_concat (&rxb->rx, rx_mk_r_cset (&rxb->rx, cs2), rexp),
+ rx_mk_r_concat (&rxb->rx,
+ rx_mk_r_cset (&rxb->rx, cs), fewer_side_effects));
+
+ if (!(new_rexp && cs && cs2))
+ return REG_ESPACE;
+ RX_bitset_enjoin (cs2, '\0'); /* prefixed to the rexp used for matching. */
+ RX_bitset_enjoin (cs, '\1'); /* prefixed to the rexp used for searching. */
+ rexp = new_rexp;
+ }
+
+#ifdef RX_DEBUG
+ if (rx_debug_compile)
+ {
+ fputs ("\n...which is compiled as:\n", stdout);
+ print_rexp (&rxb->rx, rexp, 2, re_seprint, stdout);
+ }
+#endif
+ {
+ struct rx_nfa_state *start = 0;
+ struct rx_nfa_state *end = 0;
+
+ if (!rx_build_nfa (&rxb->rx, rexp, &start, &end))
+ return REG_ESPACE; /* */
+ else
+ {
+ void * mem = (void *)rxb->buffer;
+ unsigned long size = rxb->allocated;
+ int start_id;
+ char * perm_mem;
+ int iterator_size = paramc * sizeof (params[0]);
+
+ end->is_final = 1;
+ start->is_start = 1;
+ rx_name_nfa_states (&rxb->rx);
+ start_id = start->id;
+#ifdef RX_DEBUG
+ if (rx_debug_compile)
+ {
+ fputs ("...giving the NFA: \n", stdout);
+ dbug_rxb = rxb;
+ print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
+ }
+#endif
+ if (!rx_eclose_nfa (&rxb->rx))
+ return REG_ESPACE;
+ else
+ {
+ rx_delete_epsilon_transitions (&rxb->rx);
+
+ /* For compatability reasons, we need to shove the
+ * compiled nfa into one chunk of malloced memory.
+ */
+ rxb->rx.reserved = ( sizeof (params[0]) * paramc
+ + rx_sizeof_bitset (rxb->rx.local_cset_size));
+#ifdef RX_DEBUG
+ if (rx_debug_compile)
+ {
+ dbug_rxb = rxb;
+ fputs ("...which cooks down (uncompactified) to: \n", stdout);
+ print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
+ }
+#endif
+ if (!rx_compactify_nfa (&rxb->rx, &mem, &size))
+ return REG_ESPACE;
+ rxb->buffer = mem;
+ rxb->allocated = size;
+ rxb->rx.buffer = mem;
+ rxb->rx.allocated = size;
+ perm_mem = ((char *)rxb->rx.buffer
+ + rxb->rx.allocated - rxb->rx.reserved);
+ rxb->se_params = ((struct re_se_params *)perm_mem);
+ bcopy (params, rxb->se_params, iterator_size);
+ perm_mem += iterator_size;
+ rxb->fastset = (rx_Bitset) perm_mem;
+ rxb->start = rx_id_to_nfa_state (&rxb->rx, start_id);
+ }
+ rx_bitset_null (rxb->rx.local_cset_size, rxb->fastset);
+ rxb->can_match_empty = compute_fastset (rxb, orig_rexp);
+ rxb->match_regs_on_stack =
+ registers_on_stack (rxb, orig_rexp, 0, params);
+ rxb->search_regs_on_stack =
+ registers_on_stack (rxb, fewer_side_effects, 0, params);
+ if (rxb->can_match_empty)
+ rx_bitset_universe (rxb->rx.local_cset_size, rxb->fastset);
+ rxb->is_anchored = is_anchored (orig_rexp, (rx_side_effect) re_se_hat);
+ rxb->begbuf_only = is_anchored (orig_rexp,
+ (rx_side_effect) re_se_begbuf);
+ }
+ rx_free_rexp (&rxb->rx, rexp);
+ if (params)
+ free (params);
+#ifdef RX_DEBUG
+ if (rx_debug_compile)
+ {
+ dbug_rxb = rxb;
+ fputs ("...which cooks down to: \n", stdout);
+ print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
+ }
+#endif
+ }
+ return REG_NOERROR;
+}
+
+
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there. */
+
+__const__ char * rx_error_msg[] =
+{ 0, /* REG_NOERROR */
+ "No match", /* REG_NOMATCH */
+ "Invalid regular expression", /* REG_BADPAT */
+ "Invalid collation character", /* REG_ECOLLATE */
+ "Invalid character class name", /* REG_ECTYPE */
+ "Trailing backslash", /* REG_EESCAPE */
+ "Invalid back reference", /* REG_ESUBREG */
+ "Unmatched [ or [^", /* REG_EBRACK */
+ "Unmatched ( or \\(", /* REG_EPAREN */
+ "Unmatched \\{", /* REG_EBRACE */
+ "Invalid content of \\{\\}", /* REG_BADBR */
+ "Invalid range end", /* REG_ERANGE */
+ "Memory exhausted", /* REG_ESPACE */
+ "Invalid preceding regular expression", /* REG_BADRPT */
+ "Premature end of regular expression", /* REG_EEND */
+ "Regular expression too big", /* REG_ESIZE */
+ "Unmatched ) or \\)", /* REG_ERPAREN */
+};
+
+
+
+
+char rx_slowmap [256] =
+{
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+#ifdef __STDC__
+RX_DECL void
+rx_blow_up_fastmap (struct re_pattern_buffer * rxb)
+#else
+RX_DECL void
+rx_blow_up_fastmap (rxb)
+ struct re_pattern_buffer * rxb;
+#endif
+{
+ int x;
+ for (x = 0; x < 256; ++x) /* &&&& 3.6 % */
+ rxb->fastmap [x] = !!RX_bitset_member (rxb->fastset, x);
+ rxb->fastmap_accurate = 1;
+}
+
+
+
+
+#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
+#define RE_SEARCH_2_FN inner_re_search_2
+#define RE_S2_QUAL static
+#else
+#define RE_SEARCH_2_FN re_search_2
+#define RE_S2_QUAL
+#endif
+
+struct re_search_2_closure
+{
+ __const__ char * string1;
+ int size1;
+ __const__ char * string2;
+ int size2;
+};
+
+RE_S2_QUAL int
+ RE_SEARCH_2_FN (struct re_pattern_buffer *,
+ __const__ char *,
+ int, __const__ char *, int, int,
+ int, struct re_registers *, int);
+int re_rx_search (struct re_pattern_buffer *, int,
+ int, int, int, rx_get_burst_fn,
+ rx_back_check_fn, rx_fetch_char_fn,
+ void *, struct re_registers *,
+ struct rx_search_state *,
+ struct rx_search_state *);
+#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
+int re_search_2 (struct re_pattern_buffer *,
+ __const__ char *, int,
+ __const__ char *, int,
+ int, int, struct re_registers *,
+ int);
+#endif
+int re_search (struct re_pattern_buffer *,
+ __const__ char *, int, int, int,
+ struct re_registers *);
+int re_match_2 (struct re_pattern_buffer *,
+ __const__ char *, int,
+ __const__ char *, int,
+ int, struct re_registers *, int);
+int re_match (struct re_pattern_buffer *,
+ __const__ char *, int, int,
+ struct re_registers *);
+reg_syntax_t re_set_syntax (reg_syntax_t);
+void re_set_registers (struct re_pattern_buffer *,
+ struct re_registers *, unsigned,
+ regoff_t *, regoff_t *);
+static int cplx_se_sublist_len (struct rx_se_list *);
+static int posix_se_list_order (struct rx *, struct rx_se_list *,
+ struct rx_se_list *);
+__const__ char
+ *re_compile_pattern (__const__ char *, int,
+ struct re_pattern_buffer *);
+int re_compile_fastmap (struct re_pattern_buffer *);
+char *re_comp (__const__ char *);
+int re_exec (__const__ char *);
+int regcomp (regex_t *, __const__ char *, int);
+int regexec (__const__ regex_t *,
+ __const__ char *, size_t,
+ regmatch_t pmatch[], int);
+size_t regerror (int, __const__ regex_t *,
+ char *, size_t);
+
+#ifdef __STDC__
+static __inline__ enum rx_get_burst_return
+re_search_2_get_burst ( struct rx_string_position * pos,
+ void * vclosure, int stop )
+#else
+static __inline__ enum rx_get_burst_return
+re_search_2_get_burst (pos, vclosure, stop)
+ struct rx_string_position * pos;
+ void * vclosure;
+ int stop;
+#endif
+{
+ struct re_search_2_closure * closure;
+ closure = (struct re_search_2_closure *)vclosure;
+ if (!closure->string2)
+ {
+ int inset;
+
+ inset = pos->pos - pos->string;
+ if ((inset < -1) || (inset > closure->size1))
+ return rx_get_burst_no_more;
+ else
+ {
+ pos->pos = (__const__ unsigned char *) closure->string1 + inset;
+ pos->string = (__const__ unsigned char *) closure->string1;
+ pos->size = closure->size1;
+ pos->end = ((__const__ unsigned char *)
+ MIN(closure->string1 + closure->size1,
+ closure->string1 + stop));
+ pos->offset = 0;
+ return ((pos->pos < pos->end)
+ ? rx_get_burst_ok
+ : rx_get_burst_no_more);
+ }
+ }
+ else if (!closure->string1)
+ {
+ int inset;
+
+ inset = pos->pos - pos->string;
+ pos->pos = (__const__ unsigned char *) closure->string2 + inset;
+ pos->string = (__const__ unsigned char *) closure->string2;
+ pos->size = closure->size2;
+ pos->end = ((__const__ unsigned char *)
+ MIN(closure->string2 + closure->size2,
+ closure->string2 + stop));
+ pos->offset = 0;
+ return ((pos->pos < pos->end)
+ ? rx_get_burst_ok
+ : rx_get_burst_no_more);
+ }
+ else
+ {
+ int inset;
+
+ inset = pos->pos - pos->string + pos->offset;
+ if (inset < closure->size1)
+ {
+ pos->pos = (__const__ unsigned char *) closure->string1 + inset;
+ pos->string = (__const__ unsigned char *) closure->string1;
+ pos->size = closure->size1;
+ pos->end = ((__const__ unsigned char *)
+ MIN(closure->string1 + closure->size1,
+ closure->string1 + stop));
+ pos->offset = 0;
+ return rx_get_burst_ok;
+ }
+ else
+ {
+ pos->pos = ((__const__ unsigned char *)
+ closure->string2 + inset - closure->size1);
+ pos->string = (__const__ unsigned char *) closure->string2;
+ pos->size = closure->size2;
+ pos->end = ((__const__ unsigned char *)
+ MIN(closure->string2 + closure->size2,
+ closure->string2 + stop - closure->size1));
+ pos->offset = closure->size1;
+ return ((pos->pos < pos->end)
+ ? rx_get_burst_ok
+ : rx_get_burst_no_more);
+ }
+ }
+}
+
+
+#ifdef __STDC__
+static __inline__ enum rx_back_check_return
+re_search_2_back_check ( struct rx_string_position * pos,
+ int lparen, int rparen, unsigned char * translate,
+ void * vclosure, int stop )
+#else
+static __inline__ enum rx_back_check_return
+re_search_2_back_check (pos, lparen, rparen, translate, vclosure, stop)
+ struct rx_string_position * pos;
+ int lparen;
+ int rparen;
+ unsigned char * translate;
+ void * vclosure;
+ int stop;
+#endif
+{
+ struct rx_string_position there;
+ struct rx_string_position past;
+
+ there = *pos;
+ there.pos = there.string + lparen - there.offset;
+ re_search_2_get_burst (&there, vclosure, stop);
+
+ past = *pos;
+ past.pos = past.string + rparen - there.offset;
+ re_search_2_get_burst (&past, vclosure, stop);
+
+ ++pos->pos;
+ re_search_2_get_burst (pos, vclosure, stop);
+
+ while ( (there.pos != past.pos)
+ && (pos->pos != pos->end))
+ if (TRANSLATE(*there.pos) != TRANSLATE(*pos->pos))
+ return rx_back_check_fail;
+ else
+ {
+ ++there.pos;
+ ++pos->pos;
+ if (there.pos == there.end)
+ re_search_2_get_burst (&there, vclosure, stop);
+ if (pos->pos == pos->end)
+ re_search_2_get_burst (pos, vclosure, stop);
+ }
+
+ if (there.pos != past.pos)
+ return rx_back_check_fail;
+ --pos->pos;
+ re_search_2_get_burst (pos, vclosure, stop);
+ return rx_back_check_pass;
+}
+
+#ifdef __STDC__
+static __inline__ int
+re_search_2_fetch_char ( struct rx_string_position * pos, int offset,
+ void * app_closure, int stop )
+#else
+static __inline__ int
+re_search_2_fetch_char (pos, offset, app_closure, stop)
+ struct rx_string_position * pos;
+ int offset;
+ void * app_closure;
+ int stop;
+#endif
+{
+ struct re_search_2_closure * closure;
+ closure = (struct re_search_2_closure *)app_closure;
+ if (offset == 0)
+ {
+ if (pos->pos >= pos->string)
+ return *pos->pos;
+ else
+ {
+ if ( (pos->string == (__const__ unsigned char *) closure->string2)
+ && (closure->string1)
+ && (closure->size1))
+ return closure->string1[closure->size1 - 1];
+ else
+ return 0; /* sure, why not. */
+ }
+ }
+ if (pos->pos == pos->end)
+ return *closure->string2;
+ else
+#if 0
+ return pos->pos[1];
+#else
+ return pos->pos[offset]; /* FIXME */
+#endif
+}
+
+#ifdef __STDC__
+RE_S2_QUAL int
+RE_SEARCH_2_FN (struct re_pattern_buffer *rxb,
+ __const__ char * string1, int size1,
+ __const__ char * string2, int size2,
+ int startpos, int range,
+ struct re_registers *regs,
+ int stop)
+#else
+RE_S2_QUAL int
+RE_SEARCH_2_FN (rxb,
+ string1, size1, string2, size2, startpos, range, regs, stop)
+ struct re_pattern_buffer *rxb;
+ __const__ char * string1;
+ int size1;
+ __const__ char * string2;
+ int size2;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+ int stop;
+#endif
+{
+ int answer;
+ struct re_search_2_closure closure;
+ closure.string1 = string1;
+ closure.size1 = size1;
+ closure.string2 = string2;
+ closure.size2 = size2;
+ answer = rx_search (rxb, startpos, range, stop, size1 + size2,
+ re_search_2_get_burst,
+ re_search_2_back_check,
+ re_search_2_fetch_char,
+ (void *)&closure,
+ regs,
+ 0,
+ 0);
+ switch (answer)
+ {
+ case rx_search_continuation:
+ abort ();
+ case rx_search_error:
+ return -2;
+ case rx_search_soft_fail:
+ case rx_search_fail:
+ return -1;
+ default:
+ return answer;
+ }
+}
+
+/* Export rx_search to callers outside this file. */
+
+#ifdef __STDC__
+int
+re_rx_search ( struct re_pattern_buffer * rxb, int startpos, int range,
+ int stop, int total_size, rx_get_burst_fn get_burst,
+ rx_back_check_fn back_check, rx_fetch_char_fn fetch_char,
+ void * app_closure, struct re_registers * regs,
+ struct rx_search_state * resume_state,
+ struct rx_search_state * save_state )
+#else
+int
+re_rx_search (rxb, startpos, range, stop, total_size,
+ get_burst, back_check, fetch_char,
+ app_closure, regs, resume_state, save_state)
+ struct re_pattern_buffer * rxb;
+ int startpos;
+ int range;
+ int stop;
+ int total_size;
+ rx_get_burst_fn get_burst;
+ rx_back_check_fn back_check;
+ rx_fetch_char_fn fetch_char;
+ void * app_closure;
+ struct re_registers * regs;
+ struct rx_search_state * resume_state;
+ struct rx_search_state * save_state;
+#endif
+{
+ return rx_search (rxb, startpos, range, stop, total_size,
+ get_burst, back_check, fetch_char, app_closure,
+ regs, resume_state, save_state);
+}
+
+#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
+#ifdef __STDC__
+int
+re_search_2 (struct re_pattern_buffer *rxb,
+ __const__ char * string1, int size1,
+ __const__ char * string2, int size2,
+ int startpos, int range,
+ struct re_registers *regs,
+ int stop)
+#else
+int
+re_search_2 (rxb, string1, size1, string2, size2, startpos, range, regs, stop)
+ struct re_pattern_buffer *rxb;
+ __const__ char * string1;
+ int size1;
+ __const__ char * string2;
+ int size2;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+ int stop;
+#endif
+{
+ int ret;
+ ret = inner_re_search_2 (rxb, string1, size1, string2, size2, startpos,
+ range, regs, stop);
+ alloca (0);
+ return ret;
+}
+#endif
+
+
+/* Like re_search_2, above, but only one string is specified, and
+ * doesn't let you say where to stop matching.
+ */
+
+#ifdef __STDC__
+int
+re_search (struct re_pattern_buffer * rxb, __const__ char *string,
+ int size, int startpos, int range,
+ struct re_registers *regs)
+#else
+int
+re_search (rxb, string, size, startpos, range, regs)
+ struct re_pattern_buffer * rxb;
+ __const__ char * string;
+ int size;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+#endif
+{
+ return re_search_2 (rxb, 0, 0, string, size, startpos, range, regs, size);
+}
+
+#ifdef __STDC__
+int
+re_match_2 (struct re_pattern_buffer * rxb,
+ __const__ char * string1, int size1,
+ __const__ char * string2, int size2,
+ int pos, struct re_registers *regs, int stop)
+#else
+int
+re_match_2 (rxb, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer * rxb;
+ __const__ char * string1;
+ int size1;
+ __const__ char * string2;
+ int size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+#endif
+{
+ struct re_registers some_regs;
+ regoff_t start;
+ regoff_t end;
+ int srch;
+ int save = rxb->regs_allocated;
+ struct re_registers * regs_to_pass = regs;
+ char *old_fastmap = rxb->fastmap;
+
+ if (!regs)
+ {
+ some_regs.start = &start;
+ some_regs.end = &end;
+ some_regs.num_regs = 1;
+ regs_to_pass = &some_regs;
+ rxb->regs_allocated = REGS_FIXED;
+ }
+
+ rxb->fastmap = NULL;
+ srch = re_search_2 (rxb, string1, size1, string2, size2,
+ pos, 1, regs_to_pass, stop);
+ rxb->fastmap = old_fastmap;
+ if (regs_to_pass != regs)
+ rxb->regs_allocated = save;
+ if (srch < 0)
+ return srch;
+ return regs_to_pass->end[0] - regs_to_pass->start[0];
+}
+
+/* re_match is like re_match_2 except it takes only a single string. */
+
+#ifdef __STDC__
+int
+re_match (struct re_pattern_buffer * rxb,
+ __const__ char * string,
+ int size, int pos,
+ struct re_registers *regs)
+#else
+int
+re_match (rxb, string, size, pos, regs)
+ struct re_pattern_buffer * rxb;
+ __const__ char *string;
+ int size;
+ int pos;
+ struct re_registers *regs;
+#endif
+{
+ return re_match_2 (rxb, string, size, 0, 0, pos, regs, size);
+}
+
+
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+#ifdef __STDC__
+reg_syntax_t
+re_set_syntax (reg_syntax_t syntax)
+#else
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+#endif
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+#ifdef __STDC__
+void
+re_set_registers (struct re_pattern_buffer *bufp,
+ struct re_registers *regs,
+ unsigned num_regs,
+ regoff_t * starts, regoff_t * ends)
+#else
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t * starts;
+ regoff_t * ends;
+#endif
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t) 0;
+ }
+}
+
+
+
+
+#ifdef __STDC__
+static int
+cplx_se_sublist_len (struct rx_se_list * list)
+#else
+static int
+cplx_se_sublist_len (list)
+ struct rx_se_list * list;
+#endif
+{
+ int x = 0;
+ while (list)
+ {
+ if ((long)list->car >= 0)
+ ++x;
+ list = list->cdr;
+ }
+ return x;
+}
+
+
+/* For rx->se_list_cmp */
+
+#ifdef __STDC__
+static int
+posix_se_list_order (struct rx * rx,
+ struct rx_se_list * a, struct rx_se_list * b)
+#else
+static int
+posix_se_list_order (rx, a, b)
+ struct rx * rx;
+ struct rx_se_list * a;
+ struct rx_se_list * b;
+#endif
+{
+ int al = cplx_se_sublist_len (a);
+ int bl = cplx_se_sublist_len (b);
+
+ if (!al && !bl)
+ return ((a == b)
+ ? 0
+ : ((a < b) ? -1 : 1));
+
+ else if (!al)
+ return -1;
+
+ else if (!bl)
+ return 1;
+
+ else
+ {
+ rx_side_effect * av = ((rx_side_effect *)
+ alloca (sizeof (rx_side_effect) * (al + 1)));
+ rx_side_effect * bv = ((rx_side_effect *)
+ alloca (sizeof (rx_side_effect) * (bl + 1)));
+ struct rx_se_list * ap = a;
+ struct rx_se_list * bp = b;
+ int ai, bi;
+
+ for (ai = al - 1; ai >= 0; --ai)
+ {
+ while ((long)ap->car < 0)
+ ap = ap->cdr;
+ av[ai] = ap->car;
+ ap = ap->cdr;
+ }
+ av[al] = (rx_side_effect)-2;
+ for (bi = bl - 1; bi >= 0; --bi)
+ {
+ while ((long)bp->car < 0)
+ bp = bp->cdr;
+ bv[bi] = bp->car;
+ bp = bp->cdr;
+ }
+ bv[bl] = (rx_side_effect)-1;
+
+ {
+ int ret;
+ int x = 0;
+ while (av[x] == bv[x])
+ ++x;
+ ret = (((unsigned *)(av[x]) < (unsigned *)(bv[x])) ? -1 : 1);
+ return ret;
+ }
+ }
+}
+
+
+
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length SIZE) and puts the result in RXB.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in RXB on entry.
+
+ We call rx_compile to do the actual compilation. */
+
+#ifdef __STDC__
+__const__ char *
+re_compile_pattern (__const__ char *pattern,
+ int length,
+ struct re_pattern_buffer * rxb)
+#else
+__const__ char *
+re_compile_pattern (pattern, length, rxb)
+ __const__ char *pattern;
+ int length;
+ struct re_pattern_buffer * rxb;
+#endif
+{
+ reg_errcode_t ret;
+
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ rxb->regs_allocated = REGS_UNALLOCATED;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ rxb->no_sub = 0;
+
+ rxb->rx.local_cset_size = 256;
+
+ /* Match anchors at newline. */
+ rxb->newline_anchor = 1;
+
+ rxb->re_nsub = 0;
+ rxb->start = 0;
+ rxb->se_params = 0;
+ rxb->rx.nodec = 0;
+ rxb->rx.epsnodec = 0;
+ rxb->rx.instruction_table = 0;
+ rxb->rx.nfa_states = 0;
+ rxb->rx.se_list_cmp = posix_se_list_order;
+ rxb->rx.start_set = 0;
+
+ ret = rx_compile (pattern, length, re_syntax_options, rxb);
+ alloca (0);
+ return rx_error_msg[(int) ret];
+}
+
+
+#ifdef __STDC__
+int
+re_compile_fastmap (struct re_pattern_buffer * rxb)
+#else
+int
+re_compile_fastmap (rxb)
+ struct re_pattern_buffer * rxb;
+#endif
+{
+ rx_blow_up_fastmap (rxb);
+ return 0;
+}
+
+
+
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them if this is an Emacs or POSIX compilation. */
+
+#if (!defined (emacs) && !defined (_POSIX_SOURCE)) || defined(USE_BSD_REGEX)
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer rx_comp_buf;
+
+#ifdef __STDC__
+char *
+re_comp (__const__ char *s)
+#else
+char *
+re_comp (s)
+ __const__ char *s;
+#endif
+{
+ reg_errcode_t ret;
+
+ if (!s || (*s == '\0'))
+ {
+ if (!rx_comp_buf.buffer)
+ return "No previous regular expression";
+ return 0;
+ }
+
+ if (!rx_comp_buf.fastmap)
+ {
+ rx_comp_buf.fastmap = (char *) malloc (1 << CHARBITS);
+ if (!rx_comp_buf.fastmap)
+ return "Memory exhausted";
+ }
+
+ /* Since `rx_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ rx_comp_buf.newline_anchor = 1;
+
+ rx_comp_buf.re_nsub = 0;
+ rx_comp_buf.start = 0;
+ rx_comp_buf.se_params = 0;
+ rx_comp_buf.rx.nodec = 0;
+ rx_comp_buf.rx.epsnodec = 0;
+ rx_comp_buf.rx.instruction_table = 0;
+ rx_comp_buf.rx.nfa_states = 0;
+ rx_comp_buf.rx.start = 0;
+ rx_comp_buf.rx.se_list_cmp = posix_se_list_order;
+ rx_comp_buf.rx.start_set = 0;
+ rx_comp_buf.rx.local_cset_size = 256;
+
+ ret = rx_compile (s, strlen (s), re_syntax_options, &rx_comp_buf);
+ alloca (0);
+
+ /* Yes, we're discarding `__const__' here. */
+ return (char *) rx_error_msg[(int) ret];
+}
+
+
+#ifdef __STDC__
+int
+re_exec (__const__ char *s)
+#else
+int
+re_exec (s)
+ __const__ char *s;
+#endif
+{
+ __const__ int len = strlen (s);
+ return
+ 0 <= re_search (&rx_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+#endif /* not emacs and not _POSIX_SOURCE */
+
+
+
+/* POSIX.2 functions. Don't define these for Emacs. */
+
+#if !defined(emacs)
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' and `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+
+#ifdef __STDC__
+int
+regcomp (regex_t * preg, __const__ char * pattern, int cflags)
+#else
+int
+regcomp (preg, pattern, cflags)
+ regex_t * preg;
+ __const__ char * pattern;
+ int cflags;
+#endif
+{
+ reg_errcode_t ret;
+ unsigned syntax
+ = cflags & REG_EXTENDED ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+ /* regex_compile will allocate the space for the compiled pattern. */
+ preg->buffer = 0;
+ preg->allocated = 0;
+ preg->fastmap = malloc (256);
+ if (!preg->fastmap)
+ return REG_ESPACE;
+ preg->fastmap_accurate = 0;
+
+ if (cflags & REG_ICASE)
+ {
+ unsigned i;
+
+ preg->translate = (unsigned char *) malloc (256);
+ if (!preg->translate)
+ return (int) REG_ESPACE;
+
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ preg->translate[i] = isupper (i) ? tolower (i) : i;
+ }
+ else
+ preg->translate = 0;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+
+ preg->no_sub = !!(cflags & REG_NOSUB);
+
+ /* POSIX says a null character in the pattern terminates it, so we
+ can use strlen here in compiling the pattern. */
+ preg->re_nsub = 0;
+ preg->start = 0;
+ preg->se_params = 0;
+ preg->syntax_parens = 0;
+ preg->rx.nodec = 0;
+ preg->rx.epsnodec = 0;
+ preg->rx.instruction_table = 0;
+ preg->rx.nfa_states = 0;
+ preg->rx.local_cset_size = 256;
+ preg->rx.start = 0;
+ preg->rx.se_list_cmp = posix_se_list_order;
+ preg->rx.start_set = 0;
+ ret = rx_compile (pattern, strlen (pattern), syntax, preg);
+ alloca (0);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+ return (int) ret;
+}
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+#ifdef __STDC__
+int
+regexec (__const__ regex_t *preg, __const__ char *string,
+ size_t nmatch, regmatch_t pmatch[],
+ int eflags)
+#else
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ __const__ regex_t *preg;
+ __const__ char *string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+#endif
+{
+ int ret;
+ struct re_registers regs;
+ regex_t private_preg;
+ int len = strlen (string);
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+ private_preg = *preg;
+
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+ /* The user has told us exactly how many registers to return
+ * information about, via `nmatch'. We have to pass that on to the
+ * matching routines.
+ */
+ private_preg.regs_allocated = REGS_FIXED;
+
+ if (want_reg_info)
+ {
+ regs.num_regs = nmatch;
+ regs.start = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
+ regs.end = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
+ if (regs.start == 0 || regs.end == 0)
+ return (int) REG_NOMATCH;
+ }
+
+ /* Perform the searching operation. */
+ ret = re_search (&private_preg,
+ string, len,
+ /* start: */ 0,
+ /* range: */ len,
+ want_reg_info ? &regs : (struct re_registers *) 0);
+
+ /* Copy the register information to the POSIX structure. */
+ if (want_reg_info)
+ {
+ if (ret >= 0)
+ {
+ unsigned r;
+
+ for (r = 0; r < nmatch; r++)
+ {
+ pmatch[r].rm_so = regs.start[r];
+ pmatch[r].rm_eo = regs.end[r];
+ }
+ }
+
+ /* If we needed the temporary register info, free the space now. */
+ free (regs.start);
+ free (regs.end);
+ }
+
+ /* We want zero return to mean success, unlike `re_search'. */
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. */
+
+#ifdef __STDC__
+size_t
+regerror (int errcode, __const__ regex_t *preg,
+ char *errbuf, size_t errbuf_size)
+#else
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ __const__ regex_t *preg;
+ char *errbuf;
+ size_t errbuf_size;
+#endif
+{
+ __const__ char *msg
+ = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode];
+ size_t msg_size = strlen (msg) + 1; /* Includes the 0. */
+
+ if (errbuf_size != 0)
+ {
+ if (msg_size > errbuf_size)
+ {
+ strncpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+ }
+ else
+ strcpy (errbuf, msg);
+ }
+
+ return msg_size;
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+#ifdef __STDC__
+void
+regfree (regex_t *preg)
+#else
+void
+regfree (preg)
+ regex_t *preg;
+#endif
+{
+ if (preg->buffer != 0)
+ free (preg->buffer);
+ preg->buffer = 0;
+ preg->allocated = 0;
+
+ if (preg->fastmap != 0)
+ free (preg->fastmap);
+ preg->fastmap = 0;
+ preg->fastmap_accurate = 0;
+
+ if (preg->translate != 0)
+ free (preg->translate);
+ preg->translate = 0;
+}
+
+#endif /* not emacs */
diff --git a/libc/stdlib/Makefile b/libc/stdlib/Makefile
index 2c2d0f5fb..6f33f8afc 100644
--- a/libc/stdlib/Makefile
+++ b/libc/stdlib/Makefile
@@ -18,7 +18,7 @@ EOBJ=on_exit.o atexit.o __do_exit.o exit.o
GOBJ=atoi.o atol.o ltoa.o ltostr.o \
ctype.o qsort.o bsearch.o rand.o lsearch.o getopt.o \
glob.o fnmatch.o itoa.o strtol.o crypt.o sleep.o mkstemp.o \
- mktemp.o
+ mktemp.o realpath.o
UOBJ=getenv.o putenv.o popen.o system.o getcwd.o setenv.o \
execl.o execv.o execlp.o execvp.o execvep.o
diff --git a/libc/stdlib/realpath.c b/libc/stdlib/realpath.c
new file mode 100644
index 000000000..d053cfcaf
--- /dev/null
+++ b/libc/stdlib/realpath.c
@@ -0,0 +1,168 @@
+/*
+ * realpath.c -- canonicalize pathname by removing symlinks
+ * Copyright (C) 1993 Rick Sladkey <jrs@world.std.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <limits.h> /* for PATH_MAX */
+#include <sys/param.h> /* for MAXPATHLEN */
+#include <errno.h>
+
+#include <sys/stat.h> /* for S_IFLNK */
+
+#ifndef PATH_MAX
+#ifdef _POSIX_VERSION
+#define PATH_MAX _POSIX_PATH_MAX
+#else
+#ifdef MAXPATHLEN
+#define PATH_MAX MAXPATHLEN
+#else
+#define PATH_MAX 1024
+#endif
+#endif
+#endif
+
+#define MAX_READLINKS 32
+
+#ifdef __STDC__
+char *realpath(const char *path, char resolved_path [])
+#else
+char *realpath(path, resolved_path)
+const char *path;
+char resolved_path [];
+#endif
+{
+ char copy_path[PATH_MAX];
+ char link_path[PATH_MAX];
+ char got_path [PATH_MAX];
+ char *new_path = got_path;
+ char *max_path;
+ int readlinks = 0;
+ int n;
+
+ /* Make a copy of the source path since we may need to modify it. */
+ if (strlen(path)>=PATH_MAX-2) {
+ errno = ENAMETOOLONG;
+ return NULL;
+ }
+ strcpy(copy_path, path);
+ path = copy_path;
+ max_path = copy_path + PATH_MAX - 2;
+ /* If it's a relative pathname use getwd for starters. */
+ if (*path != '/') {
+ /* Ohoo... */
+#define HAVE_GETCWD
+#ifdef HAVE_GETCWD
+ getcwd(new_path, PATH_MAX - 1);
+#else
+ getwd(new_path);
+#endif
+ new_path += strlen(new_path);
+ if (new_path[-1] != '/')
+ *new_path++ = '/';
+ }
+ else {
+ *new_path++ = '/';
+ path++;
+ }
+ /* Expand each slash-separated pathname component. */
+ while (*path != '\0') {
+ /* Ignore stray "/". */
+ if (*path == '/') {
+ path++;
+ continue;
+ }
+ if (*path == '.') {
+ /* Ignore ".". */
+ if (path[1] == '\0' || path[1] == '/') {
+ path++;
+ continue;
+ }
+ if (path[1] == '.') {
+ if (path[2] == '\0' || path[2] == '/') {
+ path += 2;
+ /* Ignore ".." at root. */
+ if (new_path == got_path + 1)
+ continue;
+ /* Handle ".." by backing up. */
+ while ((--new_path)[-1] != '/')
+ ;
+ continue;
+ }
+ }
+ }
+ /* Safely copy the next pathname component. */
+ while (*path != '\0' && *path != '/') {
+ if (path > max_path) {
+ errno = ENAMETOOLONG;
+ return NULL;
+ }
+ *new_path++ = *path++;
+ }
+#ifdef S_IFLNK
+ /* Protect against infinite loops. */
+ if (readlinks++ > MAX_READLINKS) {
+ errno = ELOOP;
+ return NULL;
+ }
+ /* See if latest pathname component is a symlink. */
+ *new_path = '\0';
+ n = readlink(got_path, link_path, PATH_MAX - 1);
+ if (n < 0) {
+ /* EINVAL means the file exists but isn't a symlink. */
+ if (errno != EINVAL) {
+ /* Make sure it's null terminated. */
+ *new_path = '\0';
+ strcpy (resolved_path, got_path);
+ return NULL;
+ }
+ }
+ else {
+ /* Note: readlink doesn't add the null byte. */
+ link_path[n] = '\0';
+ if (*link_path == '/')
+ /* Start over for an absolute symlink. */
+ new_path = got_path;
+ else
+ /* Otherwise back up over this component. */
+ while (*(--new_path) != '/')
+ ;
+ /* Safe sex check. */
+ if (strlen(path) + n >= PATH_MAX-2) {
+ errno = ENAMETOOLONG;
+ return NULL;
+ }
+ /* Insert symlink contents into path. */
+ strcat(link_path, path);
+ strcpy(copy_path, link_path);
+ path = copy_path;
+ }
+#endif /* S_IFLNK */
+ *new_path++ = '/';
+ }
+ /* Delete trailing slash but don't whomp a lone slash. */
+ if (new_path != got_path + 1 && new_path[-1] == '/')
+ new_path--;
+ /* Make sure it's null terminated. */
+ *new_path = '\0';
+ strcpy (resolved_path, got_path);
+ return resolved_path;
+}