(PREPARE_LOOP): Return __GCONV_EMPTY_INPUT only if input is really
[kopensolaris-gnu/glibc.git] / iconvdata / utf-32.c
1 /* Conversion module for UTF-32.
2    Copyright (C) 1999, 2000-2002 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, write to the Free
17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18    02111-1307 USA.  */
19
20 #include <byteswap.h>
21 #include <dlfcn.h>
22 #include <gconv.h>
23 #include <stddef.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 /* This is the Byte Order Mark character (BOM).  */
29 #define BOM     0x0000feffu
30 /* And in the other byte order.  */
31 #define BOM_OE  0xfffe0000u
32
33
34 /* Definitions used in the body of the `gconv' function.  */
35 #define FROM_LOOP               from_utf32_loop
36 #define TO_LOOP                 to_utf32_loop
37 #define DEFINE_INIT             0
38 #define DEFINE_FINI             0
39 #define MIN_NEEDED_FROM         4
40 #define MIN_NEEDED_TO           4
41 #define FROM_DIRECTION          (dir == from_utf32)
42 #define PREPARE_LOOP \
43   enum direction dir = ((struct utf32_data *) step->__data)->dir;             \
44   enum variant var = ((struct utf32_data *) step->__data)->var;               \
45   int swap;                                                                   \
46   if (FROM_DIRECTION && var == UTF_32)                                        \
47     {                                                                         \
48       if (data->__invocation_counter == 0)                                    \
49         {                                                                     \
50           /* We have to find out which byte order the file is encoded in.  */ \
51           if (inptr + 4 > inend)                                              \
52             return (inptr == inend                                            \
53                     ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);        \
54                                                                               \
55           if (get32u (inptr) == BOM)                                          \
56             /* Simply ignore the BOM character.  */                           \
57             *inptrp = inptr += 4;                                             \
58           else if (get32u (inptr) == BOM_OE)                                  \
59             {                                                                 \
60               ((struct utf32_data *) step->__data)->swap = 1;                 \
61               *inptrp = inptr += 4;                                           \
62             }                                                                 \
63         }                                                                     \
64     }                                                                         \
65   else if (!FROM_DIRECTION && var == UTF_32 && !data->__internal_use          \
66            && data->__invocation_counter == 0)                                \
67     {                                                                         \
68       /* Emit the Byte Order Mark.  */                                        \
69       if (__builtin_expect (outbuf + 4 > outend, 0))                          \
70         return __GCONV_FULL_OUTPUT;                                           \
71                                                                               \
72       put32u (outbuf, BOM);                                                   \
73       outbuf += 4;                                                            \
74     }                                                                         \
75   swap = ((struct utf32_data *) step->__data)->swap;
76 #define EXTRA_LOOP_ARGS         , var, swap
77
78
79 /* Direction of the transformation.  */
80 enum direction
81 {
82   illegal_dir,
83   to_utf32,
84   from_utf32
85 };
86
87 enum variant
88 {
89   illegal_var,
90   UTF_32,
91   UTF_32LE,
92   UTF_32BE
93 };
94
95 struct utf32_data
96 {
97   enum direction dir;
98   enum variant var;
99   int swap;
100 };
101
102
103 extern int gconv_init (struct __gconv_step *step);
104 int
105 gconv_init (struct __gconv_step *step)
106 {
107   /* Determine which direction.  */
108   struct utf32_data *new_data;
109   enum direction dir = illegal_dir;
110   enum variant var = illegal_var;
111   int result;
112
113   if (__strcasecmp (step->__from_name, "UTF-32//") == 0)
114     {
115       dir = from_utf32;
116       var = UTF_32;
117     }
118   else if (__strcasecmp (step->__to_name, "UTF-32//") == 0)
119     {
120       dir = to_utf32;
121       var = UTF_32;
122     }
123   else if (__strcasecmp (step->__from_name, "UTF-32BE//") == 0)
124     {
125       dir = from_utf32;
126       var = UTF_32BE;
127     }
128   else if (__strcasecmp (step->__to_name, "UTF-32BE//") == 0)
129     {
130       dir = to_utf32;
131       var = UTF_32BE;
132     }
133   else if (__strcasecmp (step->__from_name, "UTF-32LE//") == 0)
134     {
135       dir = from_utf32;
136       var = UTF_32LE;
137     }
138   else if (__strcasecmp (step->__to_name, "UTF-32LE//") == 0)
139     {
140       dir = to_utf32;
141       var = UTF_32LE;
142     }
143
144   result = __GCONV_NOCONV;
145   if (__builtin_expect (dir, to_utf32) != illegal_dir)
146     {
147       new_data = (struct utf32_data *) malloc (sizeof (struct utf32_data));
148
149       result = __GCONV_NOMEM;
150       if (new_data != NULL)
151         {
152           new_data->dir = dir;
153           new_data->var = var;
154           new_data->swap = ((var == UTF_32LE && BYTE_ORDER == BIG_ENDIAN)
155                             || (var == UTF_32BE
156                                 && BYTE_ORDER == LITTLE_ENDIAN));
157           step->__data = new_data;
158
159           if (dir == from_utf32)
160             {
161               step->__min_needed_from = MIN_NEEDED_FROM;
162               step->__max_needed_from = MIN_NEEDED_FROM;
163               step->__min_needed_to = MIN_NEEDED_TO;
164               step->__max_needed_to = MIN_NEEDED_TO;
165             }
166           else
167             {
168               step->__min_needed_from = MIN_NEEDED_TO;
169               step->__max_needed_from = MIN_NEEDED_TO;
170               step->__min_needed_to = MIN_NEEDED_FROM;
171               step->__max_needed_to = MIN_NEEDED_FROM;
172             }
173
174           step->__stateful = 0;
175
176           result = __GCONV_OK;
177         }
178     }
179
180   return result;
181 }
182
183
184 extern void gconv_end (struct __gconv_step *data);
185 void
186 gconv_end (struct __gconv_step *data)
187 {
188   free (data->__data);
189 }
190
191
192 /* Convert from the internal (UCS4-like) format to UTF-32.  */
193 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
194 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
195 #define LOOPFCT                 TO_LOOP
196 #define BODY \
197   {                                                                           \
198     uint32_t c = get32 (inptr);                                               \
199                                                                               \
200     if (__builtin_expect (c >= 0x110000, 0))                                  \
201       {                                                                       \
202         STANDARD_TO_LOOP_ERR_HANDLER (4);                                     \
203       }                                                                       \
204     else if (__builtin_expect (c >= 0xd800 && c < 0xe000, 0))                 \
205       {                                                                       \
206         /* Surrogate characters in UCS-4 input are not valid.                 \
207            We must catch this.  If we let surrogates pass through,            \
208            attackers could make a security hole exploit by                    \
209            generating "irregular UTF-32" sequences.  */                       \
210         result = __GCONV_ILLEGAL_INPUT;                                       \
211         if (! ignore_errors_p ())                                             \
212           break;                                                              \
213         inptr += 4;                                                           \
214         ++*irreversible;                                                      \
215         continue;                                                             \
216       }                                                                       \
217                                                                               \
218     if (swap)                                                                 \
219       put32 (outptr, bswap_32 (c));                                           \
220     else                                                                      \
221       put32 (outptr, c);                                                      \
222                                                                               \
223     outptr += 4;                                                              \
224     inptr += 4;                                                               \
225   }
226 #define LOOP_NEED_FLAGS
227 #define EXTRA_LOOP_DECLS \
228         , enum variant var, int swap
229 #include <iconv/loop.c>
230
231
232 /* Convert from UTF-32 to the internal (UCS4-like) format.  */
233 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
234 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
235 #define LOOPFCT                 FROM_LOOP
236 #define BODY \
237   {                                                                           \
238     uint32_t u1 = get32 (inptr);                                              \
239                                                                               \
240     if (swap)                                                                 \
241       u1 = bswap_32 (u1);                                                     \
242                                                                               \
243     if (__builtin_expect (u1 >= 0x110000, 0))                                 \
244       {                                                                       \
245         /* This is illegal.  */                                               \
246         STANDARD_FROM_LOOP_ERR_HANDLER (4);                                   \
247       }                                                                       \
248                                                                               \
249     put32 (outptr, u1);                                                       \
250     inptr += 4;                                                               \
251     outptr += 4;                                                              \
252   }
253 #define LOOP_NEED_FLAGS
254 #define EXTRA_LOOP_DECLS \
255         , enum variant var, int swap
256 #include <iconv/loop.c>
257
258
259 /* Now define the toplevel functions.  */
260 #include <iconv/skeleton.c>