Use __PMT instead of __P for function pointer.
[kopensolaris-gnu/glibc.git] / iconv / gconv_simple.c
1 /* Simple transformations functions.
2    Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <byteswap.h>
22 #include <endian.h>
23 #include <errno.h>
24 #include <gconv.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wchar.h>
29 #include <sys/param.h>
30
31 #ifndef EILSEQ
32 # define EILSEQ EINVAL
33 #endif
34
35
36 /* These are definitions used by some of the functions for handling
37    UTF-8 encoding below.  */
38 static const uint32_t encoding_mask[] =
39 {
40   ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
41 };
42
43 static const unsigned char encoding_byte[] =
44 {
45   0xc0, 0xe0, 0xf0, 0xf8, 0xfc
46 };
47
48
49
50 int
51 __gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data,
52                          const char **inbuf, const char *inbufend,
53                          size_t *written, int do_flush)
54 {
55   size_t do_write;
56
57   /* We have no stateful encoding.  So we don't have to do anything
58      special.  */
59   if (do_flush)
60     do_write = 0;
61   else
62     {
63       do_write = MIN (inbufend - *inbuf, data->outbufend - data->outbuf);
64
65       memcpy (data->outbuf, inbuf, do_write);
66
67       *inbuf -= do_write;
68       *data->outbuf += do_write;
69     }
70
71   /* ### TODO Actually, this number must be devided according to the
72      size of the input charset.  I.e., if the input is in UCS4 the
73      number of copied bytes must be divided by 4.  */
74   if (written != NULL)
75     *written = do_write;
76
77   return GCONV_OK;
78 }
79
80
81 /* Transform from the internal, UCS4-like format, to UCS4.  The
82    difference between the internal ucs4 format and the real UCS4
83    format is, if any, the endianess.  The Unicode/ISO 10646 says that
84    unless some higher protocol specifies it differently, the byte
85    order is big endian.*/
86 #define DEFINE_INIT             0
87 #define DEFINE_FINI             0
88 #define MIN_NEEDED_FROM         4
89 #define MIN_NEEDED_TO           4
90 #define FROM_DIRECTION          1
91 #define FROM_LOOP               internal_ucs4_loop
92 #define TO_LOOP                 internal_ucs4_loop /* This is not used.  */
93 #define FUNCTION_NAME           __gconv_transform_internal_ucs4
94
95
96 static inline int
97 internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend,
98                     unsigned char **outptrp, unsigned char *outend,
99                     mbstate_t *state, void *data, size_t *converted)
100 {
101   const unsigned char *inptr = *inptrp;
102   unsigned char *outptr = *outptrp;
103   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
104   int result;
105
106 #if __BYTE_ORDER == __LITTLE_ENDIAN
107   /* Sigh, we have to do some real work.  */
108   size_t cnt;
109
110   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
111     *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
112
113   *inptrp = inptr;
114   *outptrp = outptr;
115 #elif __BYTE_ORDER == __BIG_ENDIAN
116   /* Simply copy the data.  */
117   *inptrp = inptr + n_convert * 4;
118   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
119 #else
120 # error "This endianess is not supported."
121 #endif
122
123   /* Determine the status.  */
124   if (*outptrp == outend)
125     result = GCONV_FULL_OUTPUT;
126   else if (*inptrp == inend)
127     result = GCONV_EMPTY_INPUT;
128   else
129     result = GCONV_INCOMPLETE_INPUT;
130
131   if (converted != NULL)
132     converted += n_convert;
133
134   return result;
135 }
136
137 #include <iconv/skeleton.c>
138
139
140 /* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
141 #define DEFINE_INIT             0
142 #define DEFINE_FINI             0
143 #define MIN_NEEDED_FROM         1
144 #define MIN_NEEDED_TO           4
145 #define FROM_DIRECTION          1
146 #define FROM_LOOP               ascii_internal_loop
147 #define TO_LOOP                 ascii_internal_loop /* This is not used.  */
148 #define FUNCTION_NAME           __gconv_transform_ascii_internal
149
150 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
151 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
152 #define LOOPFCT                 FROM_LOOP
153 #define BODY \
154   {                                                                           \
155     if (*inptr > '\x7f')                                                      \
156       {                                                                       \
157         /* This is no correct ANSI_X3.4-1968 character.  */                   \
158         result = GCONV_ILLEGAL_INPUT;                                         \
159         break;                                                                \
160       }                                                                       \
161                                                                               \
162     /* It's an one byte sequence.  */                                         \
163     *((uint32_t *) outptr)++ = *inptr++;                                      \
164   }
165 #include <iconv/loop.c>
166 #include <iconv/skeleton.c>
167
168
169 /* Convert from the internal (UCS4-like) format to ISO 646-IRV.  */
170 #define DEFINE_INIT             0
171 #define DEFINE_FINI             0
172 #define MIN_NEEDED_FROM         4
173 #define MIN_NEEDED_TO           1
174 #define FROM_DIRECTION          1
175 #define FROM_LOOP               internal_ascii_loop
176 #define TO_LOOP                 internal_ascii_loop /* This is not used.  */
177 #define FUNCTION_NAME           __gconv_transform_internal_ascii
178
179 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
180 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
181 #define LOOPFCT                 FROM_LOOP
182 #define BODY \
183   {                                                                           \
184     if (*((uint32_t *) inptr) > 0x7f)                                         \
185       {                                                                       \
186         /* This is no correct ANSI_X3.4-1968 character.  */                   \
187         result = GCONV_ILLEGAL_INPUT;                                         \
188         break;                                                                \
189       }                                                                       \
190                                                                               \
191     /* It's an one byte sequence.  */                                         \
192     *outptr++ = *((uint32_t *) inptr)++;                                      \
193   }
194 #include <iconv/loop.c>
195 #include <iconv/skeleton.c>
196
197
198 /* Convert from the internal (UCS4-like) format to UTF-8.  */
199 #define DEFINE_INIT             0
200 #define DEFINE_FINI             0
201 #define MIN_NEEDED_FROM         4
202 #define MIN_NEEDED_TO           1
203 #define MAX_NEEDED_TO           6
204 #define FROM_DIRECTION          1
205 #define FROM_LOOP               internal_utf8_loop
206 #define TO_LOOP                 internal_utf8_loop /* This is not used.  */
207 #define FUNCTION_NAME           __gconv_transform_internal_utf8
208
209 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
210 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
211 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_TO
212 #define LOOPFCT                 FROM_LOOP
213 #define BODY \
214   {                                                                           \
215     uint32_t wc = *((uint32_t *) inptr);                                      \
216                                                                               \
217     /* Since we control every character we read this cannot happen.  */       \
218     assert (wc <= 0x7fffffff);                                                \
219                                                                               \
220     if (wc < 0x80)                                                            \
221       /* It's an one byte sequence.  */                                       \
222       *outptr++ = (unsigned char) wc;                                         \
223     else                                                                      \
224       {                                                                       \
225         size_t step;                                                          \
226         char *start;                                                          \
227                                                                               \
228         for (step = 2; step < 6; ++step)                                      \
229           if ((wc & encoding_mask[step - 2]) == 0)                            \
230             break;                                                            \
231                                                                               \
232         if (outptr + step >= outend)                                          \
233           {                                                                   \
234             /* Too long.  */                                                  \
235             result = GCONV_FULL_OUTPUT;                                       \
236             break;                                                            \
237           }                                                                   \
238                                                                               \
239         start = outptr;                                                       \
240         *outptr = encoding_byte[step - 2];                                    \
241         outptr += step;                                                       \
242         --step;                                                               \
243         do                                                                    \
244           {                                                                   \
245             start[step] = 0x80 | (wc & 0x3f);                                 \
246             wc >>= 6;                                                         \
247           }                                                                   \
248         while (--step > 0);                                                   \
249         start[0] |= wc;                                                       \
250       }                                                                       \
251                                                                               \
252     inptr += 4;                                                               \
253   }
254 #include <iconv/loop.c>
255 #include <iconv/skeleton.c>
256
257
258 /* Convert from UTF-8 to the internal (UCS4-like) format.  */
259 #define DEFINE_INIT             0
260 #define DEFINE_FINI             0
261 #define MIN_NEEDED_FROM         1
262 #define MAX_NEEDED_FROM         6
263 #define MIN_NEEDED_TO           4
264 #define FROM_DIRECTION          1
265 #define FROM_LOOP               utf8_internal_loop
266 #define TO_LOOP                 utf8_internal_loop /* This is not used.  */
267 #define FUNCTION_NAME           __gconv_transform_utf8_internal
268
269 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
270 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
271 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
272 #define LOOPFCT                 FROM_LOOP
273 #define BODY \
274   {                                                                           \
275     uint32_t ch;                                                              \
276     uint_fast32_t cnt;                                                        \
277     uint_fast32_t i;                                                          \
278                                                                               \
279     /* Next input byte.  */                                                   \
280     ch = *inptr;                                                              \
281                                                                               \
282     if (ch < 0x80)                                                            \
283       {                                                                       \
284         /* One byte sequence.  */                                             \
285         cnt = 1;                                                              \
286         ++inptr;                                                              \
287       }                                                                       \
288     else                                                                      \
289       {                                                                       \
290         if ((ch & 0xe0) == 0xc0)                                              \
291           {                                                                   \
292             cnt = 2;                                                          \
293             ch &= 0x1f;                                                       \
294           }                                                                   \
295         else if ((ch & 0xf0) == 0xe0)                                         \
296           {                                                                   \
297             /* We expect three bytes.  */                                     \
298             cnt = 3;                                                          \
299             ch &= 0x0f;                                                       \
300           }                                                                   \
301         else if ((ch & 0xf8) == 0xf0)                                         \
302           {                                                                   \
303             /* We expect four bytes.  */                                      \
304             cnt = 4;                                                          \
305             ch &= 0x07;                                                       \
306           }                                                                   \
307         else if ((ch & 0xfc) == 0xf8)                                         \
308           {                                                                   \
309             /* We expect five bytes.  */                                      \
310             cnt = 5;                                                          \
311             ch &= 0x03;                                                       \
312           }                                                                   \
313         else if ((ch & 0xfe) == 0xfc)                                         \
314           {                                                                   \
315             /* We expect six bytes.  */                                       \
316             cnt = 6;                                                          \
317             ch &= 0x01;                                                       \
318           }                                                                   \
319         else                                                                  \
320           {                                                                   \
321             /* This is an illegal encoding.  */                               \
322             result = GCONV_ILLEGAL_INPUT;                                     \
323             break;                                                            \
324           }                                                                   \
325                                                                               \
326         if (NEED_LENGTH_TEST && inptr + cnt > inend)                          \
327           {                                                                   \
328             /* We don't have enough input.  */                                \
329             result = GCONV_INCOMPLETE_INPUT;                                  \
330             break;                                                            \
331           }                                                                   \
332                                                                               \
333         /* Read the possible remaining bytes.  */                             \
334         for (i = 1; i < cnt; ++i)                                             \
335           {                                                                   \
336             uint32_t byte = inptr[i];                                         \
337                                                                               \
338             if ((byte & 0xc0) != 0x80)                                        \
339               {                                                               \
340                 /* This is an illegal encoding.  */                           \
341                 result = GCONV_ILLEGAL_INPUT;                                 \
342                 break;                                                        \
343               }                                                               \
344                                                                               \
345             ch <<= 6;                                                         \
346             ch |= byte & 0x3f;                                                \
347           }                                                                   \
348         inptr += cnt;                                                         \
349       }                                                                       \
350                                                                               \
351     /* Now adjust the pointers and store the result.  */                      \
352     *((uint32_t *) outptr)++ = ch;                                            \
353   }
354 #include <iconv/loop.c>
355 #include <iconv/skeleton.c>
356
357
358 /* Convert from UCS2 to the internal (UCS4-like) format.  */
359 #define DEFINE_INIT             0
360 #define DEFINE_FINI             0
361 #define MIN_NEEDED_FROM         2
362 #define MIN_NEEDED_TO           4
363 #define FROM_DIRECTION          1
364 #define FROM_LOOP               ucs2_internal_loop
365 #define TO_LOOP                 ucs2_internal_loop /* This is not used.  */
366 #define FUNCTION_NAME           __gconv_transform_ucs2_internal
367
368 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
369 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
370 #define LOOPFCT                 FROM_LOOP
371 #if __BYTE_ORDER == __LITTLE_ENDIAN
372 # define BODY \
373   *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr);                  \
374   inptr += 2;
375 #else
376 # define BODY \
377   *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
378 #endif
379 #include <iconv/loop.c>
380 #include <iconv/skeleton.c>
381
382
383 /* Convert from the internal (UCS4-like) format to UCS2.  */
384 #define DEFINE_INIT             0
385 #define DEFINE_FINI             0
386 #define MIN_NEEDED_FROM         4
387 #define MIN_NEEDED_TO           2
388 #define FROM_DIRECTION          1
389 #define FROM_LOOP               internal_ucs2_loop
390 #define TO_LOOP                 internal_ucs2_loop /* This is not used.  */
391 #define FUNCTION_NAME           __gconv_transform_internal_ucs2
392
393 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
394 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
395 #define LOOPFCT                 FROM_LOOP
396 #if __BYTE_ORDER == __LITTLE_ENDIAN
397 # define BODY \
398   {                                                                           \
399     if (*((uint32_t *) inptr) >= 0x10000)                                     \
400       {                                                                       \
401         result = GCONV_ILLEGAL_INPUT;                                         \
402         break;                                                                \
403       }                                                                       \
404     /* Please note that we use the `uint32_t' from-pointer as an `uint16_t'   \
405        pointer which works since we are on a little endian machine.  */       \
406     *((uint16_t *) outptr)++ = bswap_16 (*((uint16_t *) inptr));              \
407     inptr += 4;                                                               \
408   }
409 #else
410 # define BODY \
411   {                                                                           \
412     if (*((uint32_t *) inptr) >= 0x10000)                                     \
413       {                                                                       \
414         result = GCONV_ILLEGAL_INPUT;                                         \
415         break;                                                                \
416       }                                                                       \
417     *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++;                       \
418   }
419 #endif
420 #include <iconv/loop.c>
421 #include <iconv/skeleton.c>
422
423
424 /* Convert from UCS2 in little endian to the internal (UCS4-like) format.  */
425 #define DEFINE_INIT             0
426 #define DEFINE_FINI             0
427 #define MIN_NEEDED_FROM         2
428 #define MIN_NEEDED_TO           4
429 #define FROM_DIRECTION          1
430 #define FROM_LOOP               ucs2little_internal_loop
431 #define TO_LOOP                 ucs2little_internal_loop /* This is not used.*/
432 #define FUNCTION_NAME           __gconv_transform_ucs2little_internal
433
434 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
435 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
436 #define LOOPFCT                 FROM_LOOP
437 #if __BYTE_ORDER == __LITTLE_ENDIAN
438 # define BODY \
439   *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
440 #else
441 # define BODY \
442   *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr);                  \
443   inptr += 2;
444 #endif
445 #include <iconv/loop.c>
446 #include <iconv/skeleton.c>
447
448
449 /* Convert from the internal (UCS4-like) format to UCS2 in little endian.  */
450 #define DEFINE_INIT             0
451 #define DEFINE_FINI             0
452 #define MIN_NEEDED_FROM         4
453 #define MIN_NEEDED_TO           2
454 #define FROM_DIRECTION          1
455 #define FROM_LOOP               internal_ucs2little_loop
456 #define TO_LOOP                 internal_ucs2little_loop /* This is not used.*/
457 #define FUNCTION_NAME           __gconv_transform_internal_ucs2little
458
459 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
460 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
461 #define LOOPFCT                 FROM_LOOP
462 #if __BYTE_ORDER == __LITTLE_ENDIAN
463 # define BODY \
464   {                                                                           \
465     if (*((uint32_t *) inptr) >= 0x10000)                                     \
466       {                                                                       \
467         result = GCONV_ILLEGAL_INPUT;                                         \
468         break;                                                                \
469       }                                                                       \
470     *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++;                       \
471   }
472 #else
473 # define BODY \
474   {                                                                           \
475     if (*((uint32_t *) inptr) >= 0x10000)                                     \
476       {                                                                       \
477         result = GCONV_ILLEGAL_INPUT;                                         \
478         break;                                                                \
479       }                                                                       \
480     *((uint16_t *) outptr)++ = bswap_16 (((uint16_t *) inptr)[1]);            \
481     inptr += 4;                                                               \
482   }
483 #endif
484 #include <iconv/loop.c>
485 #include <iconv/skeleton.c>