1 /* Copyright (C) 1991,92,93,94,95,96,97,98 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
27 #include <bits/libc-lock.h>
28 #include <locale/localeinfo.h>
31 # define HAVE_LONGLONG
32 # define LONGLONG long long
34 # define LONGLONG long
37 /* Those are flags in the conversion format. */
38 # define LONG 0x001 /* l: long or double */
39 # define LONGDBL 0x002 /* L: long long or long double */
40 # define SHORT 0x004 /* h: short */
41 # define SUPPRESS 0x008 /* *: suppress assignment */
42 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
43 # define NOSKIP 0x020 /* do not skip blanks */
44 # define WIDTH 0x040 /* width was given */
45 # define GROUP 0x080 /* ': group numbers */
46 # define MALLOC 0x100 /* a: malloc strings */
47 # define CHAR 0x200 /* hh: char */
49 # define TYPEMOD (LONG|LONGDBL|SHORT|CHAR)
57 # define va_list _IO_va_list
58 # define ungetc(c, s) ((void) ((int) c != EOF && --read_in), \
60 # define inchar() (c == EOF ? EOF \
61 : ((c = _IO_getc_unlocked (s)), \
62 (void) (c != EOF && ++read_in), c))
63 # define encode_error() do { \
64 if (errp != NULL) *errp |= 4; \
65 _IO_funlockfile (s); \
66 __set_errno (EILSEQ); \
69 # define conv_error() do { \
70 if (errp != NULL) *errp |= 2; \
71 _IO_funlockfile (s); \
74 # define input_error() do { \
75 _IO_funlockfile (s); \
76 if (errp != NULL) *errp |= 1; \
79 # define memory_error() do { \
80 _IO_funlockfile (s); \
81 __set_errno (ENOMEM); \
84 # define ARGCHECK(s, format) \
87 /* Check file argument for consistence. */ \
88 CHECK_FILE (s, EOF); \
89 if (s->_flags & _IO_NO_READS) \
91 __set_errno (EBADF); \
94 else if (format == NULL) \
100 # define LOCK_STREAM(S) \
101 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
103 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
105 # define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s))
106 # define inchar() (c == EOF ? EOF \
107 : ((c = getc (s)), (void) (c != EOF && ++read_in), c))
108 # define encode_error() do { \
110 __set_errno (EILSEQ); \
113 # define conv_error() do { \
117 # define input_error() do { \
119 return done ?: EOF; \
121 # define memory_error() do { \
123 __set_errno (ENOMEM); \
126 # define ARGCHECK(s, format) \
129 /* Check file argument for consistence. */ \
130 if (!__validfp (s) || !s->__mode.__read) \
132 __set_errno (EBADF); \
135 else if (format == NULL) \
137 __set_errno (EINVAL); \
142 /* XXX For now !!! */
143 # define flockfile(S) /* nothing */
144 # define funlockfile(S) /* nothing */
145 # define LOCK_STREAM(S)
146 # define UNLOCK_STREAM
148 # define LOCK_STREAM(S) \
149 __libc_cleanup_region_start (&__funlockfile, (S)); \
151 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
156 /* Read formatted input from S according to the format string
157 FORMAT, using the argument list in ARG.
158 Return the number of assignments made, or -1 for an input error. */
161 _IO_vfscanf (s, format, argptr, errp)
168 __vfscanf (FILE *s, const char *format, va_list argptr)
172 register const char *f = format;
173 register unsigned char fc; /* Current character of the format. */
174 register size_t done = 0; /* Assignments done. */
175 register size_t read_in = 0; /* Chars read in. */
176 register int c = 0; /* Last char read. */
177 register int width; /* Maximum field width. */
178 register int flags; /* Modifiers for current format element. */
180 /* Status for reading F-P nums. */
181 char got_dot, got_e, negative;
182 /* If a [...] is a [^...]. */
184 #define exp_char not_in
185 /* Base for integral numbers. */
187 /* Signedness for integral numbers. */
189 #define is_hexa number_signed
190 /* Decimal point character. */
192 /* The thousands character of the current locale. */
194 /* Integral holding variables. */
198 unsigned long long int uq;
200 unsigned long int ul;
202 /* Character-buffer pointer. */
204 wchar_t *wstr = NULL;
205 char **strptr = NULL;
207 /* We must not react on white spaces immediately because they can
208 possibly be matched even if in the input stream no character is
209 available anymore. */
212 char *tw; /* Temporary pointer. */
213 char *wp = NULL; /* Workspace. */
214 size_t wpmax = 0; /* Maximal size of workspace. */
215 size_t wpsize; /* Currently used bytes in workspace. */
219 if (wpsize == wpmax) \
222 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
223 wp = (char *) alloca (wpmax); \
225 memcpy (wp, old, wpsize); \
227 wp[wpsize++] = (Ch); \
232 __va_copy (arg, argptr);
234 arg = (va_list) argptr;
237 ARGCHECK (s, format);
239 /* Figure out the decimal point character. */
240 if (mbtowc (&decimal, _NL_CURRENT (LC_NUMERIC, DECIMAL_POINT),
241 strlen (_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT))) <= 0)
242 decimal = (wchar_t) *_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT);
243 /* Figure out the thousands separator character. */
244 if (mbtowc (&thousands, _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP),
245 strlen (_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP))) <= 0)
246 thousands = (wchar_t) *_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
248 /* Lock the stream. */
251 /* Run through the format string. */
255 /* Extract the next argument, which is of type TYPE.
256 For a %N$... spec, this is the Nth argument from the beginning;
257 otherwise it is the next argument after the state now in ARG. */
259 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
260 ({ unsigned int pos = argpos; \
262 __va_copy (arg, argptr); \
264 (void) va_arg (arg, void *); \
265 va_arg (arg, type); \
269 /* XXX Possible optimization. */
270 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
271 ({ va_list arg = (va_list) argptr; \
272 arg = (va_list) ((char *) arg \
274 * __va_rounded_size (void *)); \
275 va_arg (arg, type); \
278 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
279 ({ unsigned int pos = argpos; \
280 va_list arg = (va_list) argptr; \
282 (void) va_arg (arg, void *); \
283 va_arg (arg, type); \
290 /* Non-ASCII, may be a multibyte. */
291 int len = mblen (f, strlen (f));
313 /* Remember to skip spaces. */
320 /* Read a character. */
323 /* Characters other than format specs must just match. */
327 /* We saw white space char as the last character in the format
328 string. Now it's time to skip all leading white space. */
332 if (inchar () == EOF && errno == EINTR)
346 /* This is the start of the conversion string. */
349 /* Initialize state of modifiers. */
352 /* Prepare temporary buffer. */
355 /* Check for a positional parameter specification. */
360 argpos = argpos * 10 + (*f++ - '0');
365 /* Oops; that was actually the field width. */
373 /* Check for the assignment-suppressing and the number grouping flag. */
374 while (*f == '*' || *f == '\'')
385 /* We have seen width. */
389 /* Find the maximum field width. */
400 /* Check for type modifiers. */
401 while (*f == 'h' || *f == 'l' || *f == 'L' || *f == 'a' || *f == 'q')
405 /* int's are short int's. */
406 if (flags & (LONG|LONGDBL|CHAR))
407 /* Signal illegal format element. */
418 if (flags & (SHORT|LONGDBL|CHAR))
420 else if (flags & LONG)
422 /* A double `l' is equivalent to an `L'. */
427 /* int's are long int's. */
432 /* double's are long double's, and int's are long long int's. */
434 /* Signal illegal format element. */
439 /* The `a' is used as a flag only if followed by `s', `S' or
441 if (*f != 's' && *f != 'S' && *f != '[')
447 /* Signal illegal format element. */
449 /* String conversions (%s, %[) take a `char **'
450 arg and fill it in with a malloc'd pointer. */
455 /* End of the format string? */
459 /* Find the conversion specifier. */
461 if (skip_space || (fc != '[' && fc != 'c' && fc != 'C' && fc != 'n'))
463 /* Eat whitespace. */
464 int save_errno = errno;
467 if (inchar () == EOF && errno == EINTR)
477 case '%': /* Must match a literal '%'. */
488 case 'n': /* Answer number of assignments done. */
489 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
490 with the 'n' conversion specifier. */
491 if (!(flags & SUPPRESS))
493 /* Don't count the read-ahead. */
495 *ARG (long long int *) = read_in;
496 else if (flags & LONG)
497 *ARG (long int *) = read_in;
498 else if (flags & SHORT)
499 *ARG (short int *) = read_in;
501 *ARG (int *) = read_in;
503 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
504 /* We have a severe problem here. The ISO C standard
505 contradicts itself in explaining the effect of the %n
506 format in `scanf'. While in ISO C:1990 and the ISO C
507 Amendement 1:1995 the result is described as
509 Execution of a %n directive does not effect the
510 assignment count returned at the completion of
511 execution of the f(w)scanf function.
513 in ISO C Corrigendum 1:1994 the following was added:
516 Add the following fourth example:
519 int d1, d2, n1, n2, i;
520 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
521 the value 123 is assigned to d1 and the value3 to n1.
522 Because %n can never get an input failure the value
523 of 3 is also assigned to n2. The value of d2 is not
524 affected. The value 3 is assigned to i.
526 We go for now with the historically correct code from ISO C,
527 i.e., we don't count the %n assignments. When it ever
528 should proof to be wrong just remove the #ifdef above. */
534 case 'c': /* Match characters. */
535 if ((flags & LONG) == 0)
537 if (!(flags & SUPPRESS))
551 if (!(flags & SUPPRESS))
555 while (--width > 0 && inchar () != EOF);
558 while (--width > 0 && inchar () != EOF);
560 if (!(flags & SUPPRESS))
567 /* Get UTF-8 encoded wide character. Here we assume (as in
568 other parts of the libc) that we only have to handle
575 if (!(flags & SUPPRESS))
577 wstr = ARG (wchar_t *);
584 #define NEXT_WIDE_CHAR(First) \
587 /* EOF is only an error for the first character. */ \
595 if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
597 if ((c & 0xe0) == 0xc0) \
599 /* We expect two bytes. */ \
603 else if ((c & 0xf0) == 0xe0) \
605 /* We expect three bytes. */ \
609 else if ((c & 0xf8) == 0xf0) \
611 /* We expect four bytes. */ \
615 else if ((c & 0xfc) == 0xf8) \
617 /* We expect five bytes. */ \
623 /* We expect six bytes. */ \
632 || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
640 if (!(flags & SUPPRESS)) \
644 NEXT_WIDE_CHAR (first);
648 if (!(flags & SUPPRESS))
653 case 's': /* Read a string. */
655 /* We have to process a wide character string. */
656 goto wide_char_string;
658 #define STRING_ARG(Str, Type) \
659 if (!(flags & SUPPRESS)) \
661 if (flags & MALLOC) \
663 /* The string is to be stored in a malloc'd buffer. */ \
664 strptr = ARG (char **); \
665 if (strptr == NULL) \
667 /* Allocate an initial buffer. */ \
669 *strptr = malloc (strsize * sizeof (Type)); \
670 Str = (Type *) *strptr; \
673 Str = ARG (Type *); \
677 STRING_ARG (str, char);
690 #define STRING_ADD_CHAR(Str, c, Type) \
691 if (!(flags & SUPPRESS)) \
694 if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
696 /* Enlarge the buffer. */ \
697 Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
700 /* Can't allocate that much. Last-ditch effort. */\
701 Str = realloc (*strptr, \
702 (strsize + 1) * sizeof (Type)); \
705 /* We lose. Oh well. \
706 Terminate the string and stop converting, \
707 so at least we don't skip any input. */ \
708 ((Type *) (*strptr))[strsize] = '\0'; \
714 *strptr = (char *) Str; \
715 Str = ((Type *) *strptr) + strsize; \
721 *strptr = (char *) Str; \
722 Str = ((Type *) *strptr) + strsize; \
727 STRING_ADD_CHAR (str, c, char);
728 } while ((width <= 0 || --width > 0) && inchar () != EOF);
730 if (!(flags & SUPPRESS))
738 /* Wide character string. */
743 STRING_ARG (wstr, wchar_t);
748 NEXT_WIDE_CHAR (first);
752 /* XXX We would have to push back the whole wide char
753 with possibly many bytes. But since scanf does
754 not make a difference for white space characters
755 we can simply push back a simple <SP> which is
756 guaranteed to be in the [:space:] class. */
761 STRING_ADD_CHAR (wstr, val, wchar_t);
764 while (width <= 0 || --width > 0);
766 if (!(flags & SUPPRESS))
774 case 'x': /* Hexadecimal integer. */
775 case 'X': /* Ditto. */
780 case 'o': /* Octal integer. */
785 case 'u': /* Unsigned decimal integer. */
790 case 'd': /* Signed decimal integer. */
795 case 'i': /* Generic number. */
804 /* Check for a sign. */
805 if (c == '-' || c == '+')
813 /* Look for a leading indication of base. */
814 if (width != 0 && c == '0')
822 if (width != 0 && tolower (c) == 'x')
840 /* Read the number into workspace. */
841 while (c != EOF && width != 0)
843 if (base == 16 ? !isxdigit (c) :
844 ((!isdigit (c) || c - '0' >= base) &&
845 !((flags & GROUP) && base == 10 && c == thousands)))
854 /* The just read character is not part of the number anymore. */
858 (wpsize == 1 && (wp[0] == '+' || wp[0] == '-')))
859 /* There was no number. */
862 /* Convert the number. */
867 num.q = __strtoll_internal (wp, &tw, base, flags & GROUP);
869 num.uq = __strtoull_internal (wp, &tw, base, flags & GROUP);
874 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
876 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
881 if (!(flags & SUPPRESS))
886 *ARG (unsigned LONGLONG int *) = num.uq;
887 else if (flags & LONG)
888 *ARG (unsigned long int *) = num.ul;
889 else if (flags & SHORT)
890 *ARG (unsigned short int *)
891 = (unsigned short int) num.ul;
892 else if (flags & CHAR)
893 *ARG (unsigned char *) = (unsigned char) num.ul;
895 *ARG (unsigned int *) = (unsigned int) num.ul;
900 *ARG (LONGLONG int *) = num.q;
901 else if (flags & LONG)
902 *ARG (long int *) = num.l;
903 else if (flags & SHORT)
904 *ARG (short int *) = (short int) num.l;
905 else if (flags & CHAR)
906 *ARG (signed char *) = (signed char) num.ul;
908 *ARG (int *) = (int) num.l;
914 case 'e': /* Floating-point numbers. */
925 /* Check for a sign. */
926 if (c == '-' || c == '+')
929 if (inchar () == EOF)
930 /* EOF is only an input error before we read any chars. */
938 /* Take care for the special arguments "nan" and "inf". */
939 if (tolower (c) == 'n')
943 if (inchar () == EOF || tolower (c) != 'a')
946 if (inchar () == EOF || tolower (c) != 'n')
952 else if (tolower (c) == 'i')
954 /* Maybe "inf" or "infinity". */
956 if (inchar () == EOF || tolower (c) != 'n')
959 if (inchar () == EOF || tolower (c) != 'f')
962 /* It is as least "inf". */
963 if (inchar () != EOF)
965 if (tolower (c) == 'i')
967 /* No we have to read the rest as well. */
969 if (inchar () == EOF || tolower (c) != 'n')
972 if (inchar () == EOF || tolower (c) != 'i')
975 if (inchar () == EOF || tolower (c) != 't')
978 if (inchar () == EOF || tolower (c) != 'y')
995 if (tolower (c) == 'x')
997 /* It is a number in hexadecimal format. */
1003 /* Grouping is not allowed. */
1009 got_dot = got_e = 0;
1014 else if (!got_e && is_hexa && isxdigit (c))
1016 else if (got_e && wp[wpsize - 1] == exp_char
1017 && (c == '-' || c == '+'))
1019 else if (wpsize > 0 && !got_e && tolower (c) == exp_char)
1022 got_e = got_dot = 1;
1024 else if (c == decimal && !got_dot)
1029 else if ((flags & GROUP) && c == thousands && !got_dot)
1033 /* The last read character is not part of the number
1041 while (width != 0 && inchar () != EOF);
1043 /* Have we read any character? If we try to read a number
1044 in hexadecimal notation and we have read only the `0x'
1045 prefix this is an error. */
1046 if (wpsize == 0 || (is_hexa && wpsize == 2))
1050 /* Convert the number. */
1052 if (flags & LONGDBL)
1054 long double d = __strtold_internal (wp, &tw, flags & GROUP);
1055 if (!(flags & SUPPRESS) && tw != wp)
1056 *ARG (long double *) = negative ? -d : d;
1058 else if (flags & LONG)
1060 double d = __strtod_internal (wp, &tw, flags & GROUP);
1061 if (!(flags & SUPPRESS) && tw != wp)
1062 *ARG (double *) = negative ? -d : d;
1066 float d = __strtof_internal (wp, &tw, flags & GROUP);
1067 if (!(flags & SUPPRESS) && tw != wp)
1068 *ARG (float *) = negative ? -d : d;
1074 if (!(flags & SUPPRESS))
1078 case '[': /* Character class. */
1081 STRING_ARG (wstr, wchar_t);
1082 c = '\0'; /* This is to keep gcc quiet. */
1086 STRING_ARG (str, char);
1101 /* Fill WP with byte flags indexed by character.
1102 We will use this flag map for matching input characters. */
1103 if (wpmax < UCHAR_MAX)
1106 wp = (char *) alloca (wpmax);
1108 memset (wp, 0, UCHAR_MAX);
1111 if (fc == ']' || fc == '-')
1113 /* If ] or - appears before any char in the set, it is not
1114 the terminator or separator, but the first char in the
1120 while ((fc = *f++) != '\0' && fc != ']')
1122 if (fc == '-' && *f != '\0' && *f != ']' &&
1123 (unsigned char) f[-2] <= (unsigned char) *f)
1125 /* Add all characters from the one before the '-'
1126 up to (but not including) the next format char. */
1127 for (fc = f[-2]; fc < *f; ++fc)
1131 /* Add the character to the flag map. */
1136 if (!(flags & LONG))
1149 NEXT_WIDE_CHAR (first);
1150 if (val <= 255 && wp[val] == not_in)
1155 STRING_ADD_CHAR (wstr, val, wchar_t);
1165 if (!(flags & SUPPRESS))
1173 num.ul = read_in - 1; /* -1 because we already read one char. */
1176 if (wp[c] == not_in)
1181 STRING_ADD_CHAR (str, c, char);
1185 while (width != 0 && inchar () != EOF);
1187 if (read_in == num.ul)
1190 if (!(flags & SUPPRESS))
1198 case 'p': /* Generic pointer. */
1200 /* A PTR must be the same size as a `long int'. */
1201 flags &= ~(SHORT|LONGDBL);
1208 /* The last thing we saw int the format string was a white space.
1209 Consume the last white spaces. */
1214 while (isspace (c));
1218 /* Unlock stream. */
1226 __vfscanf (FILE *s, const char *format, va_list argptr)
1228 return _IO_vfscanf (s, format, argptr, NULL);
1232 weak_alias (__vfscanf, vfscanf)