Use _IO_FILE_complete, not _IO_file_plus.
[kopensolaris-gnu/glibc.git] / posix / regex.c
index b7c82f6..4f125be 100644 (file)
@@ -1,6 +1,6 @@
 /* Extended regular expression matching and search library,
    version 0.12.
-   (Implements POSIX draft P10003.2/D11.2, except for
+   (Implements POSIX draft P1003.2/D11.2, except for some of the
    internationalization features.)
 
    Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
 #include <sys/types.h>
 #endif
 
+/* For platform which support the ISO C amendement 1 functionality we
+   support user defined character classes.  */
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+# include <wctype.h>
+# include <wchar.h>
+#endif
+
 /* This is for other GNU distributions with internationalized messages.  */
 #if HAVE_LIBINTL_H || defined (_LIBC)
 # include <libintl.h>
@@ -121,15 +128,15 @@ char *realloc ();
 #define SWITCH_ENUM_CAST(x) (x)
 #endif
 
+/* How many characters in the character set.  */
+#define CHAR_SET_SIZE 256
+
 #ifdef SYNTAX_TABLE
 
 extern char *re_syntax_table;
 
 #else /* not SYNTAX_TABLE */
 
-/* How many characters in the character set.  */
-#define CHAR_SET_SIZE 256
-
 static char re_syntax_table[CHAR_SET_SIZE];
 
 static void
@@ -946,6 +953,12 @@ re_set_syntax (syntax)
   reg_syntax_t ret = re_syntax_options;
 
   re_syntax_options = syntax;
+#ifdef DEBUG
+  if (syntax & RE_DEBUG)
+    debug = 1;
+  else if (debug) /* was on but now is not */
+    debug = 0;
+#endif /* DEBUG */
   return ret;
 }
 \f
@@ -1026,22 +1039,24 @@ static const char *re_error_msgid[] =
 #endif
 
 /* Roughly the maximum number of failure points on the stack.  Would be
-   exactly that if always used MAX_FAILURE_SPACE each time we failed.
+   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
    This is a variable only so users of regex can assign to it; we never
    change it ourselves.  */
 
 #ifdef INT_IS_16BIT
 
 #if defined (MATCH_MAY_ALLOCATE)
-long re_max_failures = 4000;
+/* 4400 was enough to cause a crash on Alpha OSF/1,
+   whose default stack limit is 2mb.  */
+long int re_max_failures = 4000;
 #else
-long re_max_failures = 2000;
+long int re_max_failures = 2000;
 #endif
 
 union fail_stack_elt
 {
   unsigned char *pointer;
-  long integer;
+  long int integer;
 };
 
 typedef union fail_stack_elt fail_stack_elt_t;
@@ -1049,8 +1064,8 @@ typedef union fail_stack_elt fail_stack_elt_t;
 typedef struct
 {
   fail_stack_elt_t *stack;
-  unsigned long size;
-  unsigned long avail;                 /* Offset of next open position.  */
+  unsigned long int size;
+  unsigned long int avail;             /* Offset of next open position.  */
 } fail_stack_type;
 
 #else /* not INT_IS_16BIT */
@@ -1058,7 +1073,7 @@ typedef struct
 #if defined (MATCH_MAY_ALLOCATE)
 /* 4400 was enough to cause a crash on Alpha OSF/1,
    whose default stack limit is 2mb.  */
-int re_max_failures = 4000;
+int re_max_failures = 20000;
 #else
 int re_max_failures = 2000;
 #endif
@@ -1170,7 +1185,7 @@ typedef struct
 /* Used to omit pushing failure point id's when we're not debugging.  */
 #ifdef DEBUG
 #define DEBUG_PUSH PUSH_FAILURE_INT
-#define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()
+#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
 #else
 #define DEBUG_PUSH(item)
 #define DEBUG_POP(item_addr)
@@ -1194,7 +1209,7 @@ typedef struct
     /* Can't be int, since there is not a shred of a guarantee that int        \
        is wide enough to hold a value of something to which pointer can        \
        be assigned */                                                  \
-    s_reg_t this_reg;                                                  \
+    active_reg_t this_reg;                                             \
                                                                        \
     DEBUG_STATEMENT (failure_id++);                                    \
     DEBUG_STATEMENT (nfailure_points_pushed++);                                \
@@ -1202,7 +1217,7 @@ typedef struct
     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
                                                                        \
-    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);          \
+    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);         \
     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);      \
                                                                        \
     /* Ensure we have enough space allocated for what we will push.  */        \
@@ -1223,16 +1238,17 @@ typedef struct
       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
           this_reg++)                                                  \
        {                                                               \
-         DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);               \
+         DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);              \
          DEBUG_STATEMENT (num_regs_pushed++);                          \
                                                                        \
-         DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);       \
+         DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);         \
          PUSH_FAILURE_POINTER (regstart[this_reg]);                    \
                                                                        \
-         DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);           \
+         DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);             \
          PUSH_FAILURE_POINTER (regend[this_reg]);                      \
                                                                        \
-         DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);  \
+         DEBUG_PRINT2 ("    info: %p\n      ",                         \
+                       reg_info[this_reg].word.pointer);               \
          DEBUG_PRINT2 (" match_null=%d",                               \
                        REG_MATCH_NULL_STRING_P (reg_info[this_reg]));  \
          DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));  \
@@ -1244,17 +1260,17 @@ typedef struct
          PUSH_FAILURE_ELT (reg_info[this_reg].word);                   \
        }                                                               \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
+    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
     PUSH_FAILURE_INT (lowest_active_reg);                              \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
+    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
     PUSH_FAILURE_INT (highest_active_reg);                             \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing pattern 0x%x:\n", pattern_place);         \
+    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);           \
     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);          \
     PUSH_FAILURE_POINTER (pattern_place);                              \
                                                                        \
-    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);           \
+    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);             \
     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
                                 size2);                                \
     DEBUG_PRINT1 ("'\n");                                              \
@@ -1306,8 +1322,8 @@ typedef struct
 
 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
 {                                                                      \
-  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                       \
-  s_reg_t this_reg;                                                    \
+  DEBUG_STATEMENT (unsigned failure_id;)                               \
+  active_reg_t this_reg;                                               \
   const unsigned char *string_temp;                                    \
                                                                        \
   assert (!FAIL_STACK_EMPTY ());                                       \
@@ -1329,34 +1345,35 @@ typedef struct
   if (string_temp != NULL)                                             \
     str = (const char *) string_temp;                                  \
                                                                        \
-  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                      \
+  DEBUG_PRINT2 ("  Popping string %p: `", str);                                \
   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);     \
   DEBUG_PRINT1 ("'\n");                                                        \
                                                                        \
   pat = (unsigned char *) POP_FAILURE_POINTER ();                      \
-  DEBUG_PRINT2 ("  Popping pattern 0x%x:\n", pat);                     \
+  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);                       \
   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                      \
                                                                        \
   /* Restore register info.  */                                                \
   high_reg = (active_reg_t) POP_FAILURE_INT ();                                \
-  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);          \
+  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);         \
                                                                        \
   low_reg = (active_reg_t) POP_FAILURE_INT ();                         \
-  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);           \
+  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);          \
                                                                        \
   if (1)                                                               \
     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)         \
       {                                                                        \
-       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);               \
+       DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);              \
                                                                        \
        reg_info[this_reg].word = POP_FAILURE_ELT ();                   \
-       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);        \
+       DEBUG_PRINT2 ("      info: %p\n",                               \
+                     reg_info[this_reg].word.pointer);                 \
                                                                        \
        regend[this_reg] = (const char *) POP_FAILURE_POINTER ();       \
-       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);           \
+       DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);             \
                                                                        \
        regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();     \
-       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);       \
+       DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);         \
       }                                                                        \
   else                                                                 \
     {                                                                  \
@@ -1553,7 +1570,7 @@ static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
    reallocating to 0 bytes.  Such thing is not going to work too well.
    You have been warned!!  */
-#ifdef _MSC_VER
+#if defined(_MSC_VER)  && !defined(WIN32)
 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
    The REALLOC define eliminates a flurry of conversion warnings,
    but is not required. */
@@ -1661,15 +1678,29 @@ typedef struct
        }                                                               \
     }
 
-#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+/* The GNU C library provides support for user-defined character classes
+   and the functions from ISO C amendement 1.  */
+# ifdef CHARCLASS_NAME_MAX
+#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+# else
+/* This shouldn't happen but some implementation might still have this
+   problem.  Use a reasonable default value.  */
+#  define CHAR_CLASS_MAX_LENGTH 256
+# endif
+
+# define IS_CHAR_CLASS(string) wctype (string)
+#else
+# define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
 
-#define IS_CHAR_CLASS(string)                                          \
+# define IS_CHAR_CLASS(string)                                         \
    (STREQ (string, "alpha") || STREQ (string, "upper")                 \
     || STREQ (string, "lower") || STREQ (string, "digit")              \
     || STREQ (string, "alnum") || STREQ (string, "xdigit")             \
     || STREQ (string, "space") || STREQ (string, "print")              \
     || STREQ (string, "punct") || STREQ (string, "graph")              \
     || STREQ (string, "cntrl") || STREQ (string, "blank"))
+#endif
 \f
 #ifndef MATCH_MAY_ALLOCATE
 
@@ -2147,6 +2178,34 @@ regex_compile (pattern, size, syntax, bufp)
                        the leading `:' and `[' (but set bits for them).  */
                     if (c == ':' && *p == ']')
                       {
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+                        boolean is_lower = STREQ (str, "lower");
+                        boolean is_upper = STREQ (str, "upper");
+                       wctype_t wt;
+                        int ch;
+
+                       wt = wctype (str);
+                       if (wt == 0)
+                         FREE_STACK_RETURN (REG_ECTYPE);
+
+                        /* Throw away the ] at the end of the character
+                           class.  */
+                        PATFETCH (c);
+
+                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
+                         {
+                           if (iswctype (btowc (ch), wt))
+                             SET_LIST_BIT (ch);
+
+                           if (translate && (is_upper || is_lower)
+                               && (ISUPPER (ch) || ISLOWER (ch)))
+                             SET_LIST_BIT (ch);
+                         }
+
+                        had_char_class = true;
+#else
                         int ch;
                         boolean is_alnum = STREQ (str, "alnum");
                         boolean is_alpha = STREQ (str, "alpha");
@@ -2194,6 +2253,7 @@ regex_compile (pattern, size, syntax, bufp)
                              SET_LIST_BIT (ch);
                           }
                         had_char_class = true;
+#endif /* libc || wctype.h */
                       }
                     else
                       {
@@ -3551,12 +3611,14 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                  \
    == Sword)
 
+/* Disabled due to a compiler bug -- see comment at case wordbound */
+#if 0
 /* Test if the character before D and the one at D differ with respect
    to being word-constituent.  */
 #define AT_WORD_BOUNDARY(d)                                            \
   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                            \
    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
-
+#endif
 
 /* Free everything we malloc.  */
 #ifdef MATCH_MAY_ALLOCATE
@@ -4725,6 +4787,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                                dummy_low_reg, dummy_high_reg,
                                reg_dummy, reg_dummy, reg_info_dummy);
           }
+         /* Note fall through.  */
 
        unconditional_jump:
 #ifdef _LIBC
@@ -4763,7 +4826,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
           /* It doesn't matter what we push for the string here.  What
              the code at `fail' tests is the value for the pattern.  */
-          PUSH_FAILURE_POINT (0, 0, -2);
+          PUSH_FAILURE_POINT (NULL, NULL, -2);
           goto unconditional_jump;
 
 
@@ -4776,7 +4839,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
           /* See comments just above at `dummy_failure_jump' about the
              two zeroes.  */
-          PUSH_FAILURE_POINT (0, 0, -2);
+          PUSH_FAILURE_POINT (NULL, NULL, -2);
           break;
 
         /* Have to succeed matching what follows at least n times.
@@ -5355,7 +5418,13 @@ re_compile_pattern (pattern, length, bufp)
 /* BSD has one and only one pattern buffer.  */
 static struct re_pattern_buffer re_comp_buf;
 
-char * weak_function
+char *
+#ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+   these names if they don't use our functions, and still use
+   regcomp/regexec below without link errors.  */
+weak_function
+#endif
 re_comp (s)
     const char *s;
 {
@@ -5396,7 +5465,10 @@ re_comp (s)
 }
 
 
-int weak_function
+int
+#ifdef _LIBC
+weak_function
+#endif
 re_exec (s)
     const char *s;
 {