(utf8_internal_loop): Correctly reconstruct stored character in state
[kopensolaris-gnu/glibc.git] / iconv / gconv_simple.c
1 /* Simple transformations functions.
2    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <byteswap.h>
22 #include <endian.h>
23 #include <errno.h>
24 #include <gconv.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wchar.h>
29 #include <sys/param.h>
30
31 #ifndef EILSEQ
32 # define EILSEQ EINVAL
33 #endif
34
35
36 /* These are definitions used by some of the functions for handling
37    UTF-8 encoding below.  */
38 static const uint32_t encoding_mask[] =
39 {
40   ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
41 };
42
43 static const unsigned char encoding_byte[] =
44 {
45   0xc0, 0xe0, 0xf0, 0xf8, 0xfc
46 };
47
48
49 /* Transform from the internal, UCS4-like format, to UCS4.  The
50    difference between the internal ucs4 format and the real UCS4
51    format is, if any, the endianess.  The Unicode/ISO 10646 says that
52    unless some higher protocol specifies it differently, the byte
53    order is big endian.*/
54 #define DEFINE_INIT             0
55 #define DEFINE_FINI             0
56 #define MIN_NEEDED_FROM         4
57 #define MIN_NEEDED_TO           4
58 #define FROM_DIRECTION          1
59 #define FROM_LOOP               internal_ucs4_loop
60 #define TO_LOOP                 internal_ucs4_loop /* This is not used.  */
61 #define FUNCTION_NAME           __gconv_transform_internal_ucs4
62
63
64 static inline int
65 internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend,
66                     unsigned char **outptrp, unsigned char *outend,
67                     mbstate_t *state, void *data, size_t *converted)
68 {
69   const unsigned char *inptr = *inptrp;
70   unsigned char *outptr = *outptrp;
71   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
72   int result;
73
74 #if __BYTE_ORDER == __LITTLE_ENDIAN
75   /* Sigh, we have to do some real work.  */
76   size_t cnt;
77
78   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
79     *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
80
81   *inptrp = inptr;
82   *outptrp = outptr;
83 #elif __BYTE_ORDER == __BIG_ENDIAN
84   /* Simply copy the data.  */
85   *inptrp = inptr + n_convert * 4;
86   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
87 #else
88 # error "This endianess is not supported."
89 #endif
90
91   /* Determine the status.  */
92   if (*outptrp == outend)
93     result = __GCONV_FULL_OUTPUT;
94   else if (*inptrp == inend)
95     result = __GCONV_EMPTY_INPUT;
96   else
97     result = __GCONV_INCOMPLETE_INPUT;
98
99   return result;
100 }
101
102 #ifndef _STRING_ARCH_unaligned
103 static inline int
104 internal_ucs4_loop_unaligned (const unsigned char **inptrp,
105                               const unsigned char *inend,
106                               unsigned char **outptrp, unsigned char *outend,
107                               mbstate_t *state, void *data, size_t *converted)
108 {
109   const unsigned char *inptr = *inptrp;
110   unsigned char *outptr = *outptrp;
111   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
112   int result;
113
114 # if __BYTE_ORDER == __LITTLE_ENDIAN
115   /* Sigh, we have to do some real work.  */
116   size_t cnt;
117
118   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
119     {
120       outptr[0] = inptr[3];
121       outptr[1] = inptr[2];
122       outptr[2] = inptr[1];
123       outptr[3] = inptr[0];
124     }
125
126   *inptrp = inptr;
127   *outptrp = outptr;
128 # elif __BYTE_ORDER == __BIG_ENDIAN
129   /* Simply copy the data.  */
130   *inptrp = inptr + n_convert * 4;
131   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
132 # else
133 #  error "This endianess is not supported."
134 # endif
135
136   /* Determine the status.  */
137   if (*outptrp == outend)
138     result = __GCONV_FULL_OUTPUT;
139   else if (*inptrp == inend)
140     result = __GCONV_EMPTY_INPUT;
141   else
142     result = __GCONV_INCOMPLETE_INPUT;
143
144   return result;
145 }
146 #endif
147
148
149 static inline int
150 internal_ucs4_loop_single (const unsigned char **inptrp,
151                            const unsigned char *inend,
152                            unsigned char **outptrp, unsigned char *outend,
153                            mbstate_t *state, void *data, size_t *converted)
154 {
155   size_t cnt = state->__count & 7;
156
157   while (*inptrp < inend && cnt < 4)
158     state->__value.__wchb[cnt++] = *(*inptrp)++;
159
160   if (cnt < 4)
161     {
162       /* Still not enough bytes.  Store the ones in the input buffer.  */
163       state->__count &= ~7;
164       state->__count |= cnt;
165
166       return __GCONV_INCOMPLETE_INPUT;
167     }
168
169 #if __BYTE_ORDER == __LITTLE_ENDIAN
170   (*outptrp)[0] = state->__value.__wchb[3];
171   (*outptrp)[1] = state->__value.__wchb[2];
172   (*outptrp)[2] = state->__value.__wchb[1];
173   (*outptrp)[3] = state->__value.__wchb[0];
174 #elif __BYTE_ORDER == __BIG_ENDIAN
175   /* XXX unaligned */
176   *(*((uint32_t **) outptrp)++) = state->__value.__wch;
177 #else
178 # error "This endianess is not supported."
179 #endif
180
181   /* Clear the state buffer.  */
182   state->__count &= ~7;
183
184   return __GCONV_OK;
185 }
186
187 #include <iconv/skeleton.c>
188
189
190 /* Transform from UCS4 to the internal, UCS4-like format.  Unlike
191    for the other direction we have to check for correct values here.  */
192 #define DEFINE_INIT             0
193 #define DEFINE_FINI             0
194 #define MIN_NEEDED_FROM         4
195 #define MIN_NEEDED_TO           4
196 #define FROM_DIRECTION          1
197 #define FROM_LOOP               ucs4_internal_loop
198 #define TO_LOOP                 ucs4_internal_loop /* This is not used.  */
199 #define FUNCTION_NAME           __gconv_transform_ucs4_internal
200
201
202 static inline int
203 ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
204                     unsigned char **outptrp, unsigned char *outend,
205                     mbstate_t *state, void *data, size_t *converted)
206 {
207   const unsigned char *inptr = *inptrp;
208   unsigned char *outptr = *outptrp;
209   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
210   int result;
211   size_t cnt;
212
213   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
214     {
215       uint32_t inval;
216
217 #if __BYTE_ORDER == __LITTLE_ENDIAN
218       inval = bswap_32 (*(uint32_t *) inptr);
219 #else
220       inval = *(uint32_t *) inptr;
221 #endif
222
223       if (inval > 0x7fffffff)
224         {
225           *inptrp = inptr;
226           *outptrp = outptr;
227           return __GCONV_ILLEGAL_INPUT;
228         }
229
230       *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
231     }
232
233   *inptrp = inptr;
234   *outptrp = outptr;
235
236   /* Determine the status.  */
237   if (*outptrp == outend)
238     result = __GCONV_FULL_OUTPUT;
239   else if (*inptrp == inend)
240     result = __GCONV_EMPTY_INPUT;
241   else
242     result = __GCONV_INCOMPLETE_INPUT;
243
244   return result;
245 }
246
247 #ifndef _STRING_ARCH_unaligned
248 static inline int
249 ucs4_internal_loop_unaligned (const unsigned char **inptrp,
250                               const unsigned char *inend,
251                               unsigned char **outptrp, unsigned char *outend,
252                               mbstate_t *state, void *data, size_t *converted)
253 {
254   const unsigned char *inptr = *inptrp;
255   unsigned char *outptr = *outptrp;
256   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
257   int result;
258   size_t cnt;
259
260   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
261     {
262       if (inptr[0] > 0x80)
263         {
264           /* The value is too large.  */
265           *inptrp = inptr;
266           *outptrp = outptr;
267           return __GCONV_ILLEGAL_INPUT;
268         }
269
270 # if __BYTE_ORDER == __LITTLE_ENDIAN
271       outptr[3] = inptr[0];
272       outptr[2] = inptr[1];
273       outptr[1] = inptr[2];
274       outptr[0] = inptr[3];
275 # else
276       outptr[0] = inptr[0];
277       outptr[1] = inptr[1];
278       outptr[2] = inptr[2];
279       outptr[3] = inptr[3];
280 # endif
281
282 # if __BYTE_ORDER == __LITTLE_ENDIAN
283       outptr[3] = inptr[0];
284       outptr[2] = inptr[1];
285       outptr[1] = inptr[2];
286       outptr[0] = inptr[3];
287 # else
288       outptr[0] = inptr[0];
289       outptr[1] = inptr[1];
290       outptr[2] = inptr[2];
291       outptr[3] = inptr[3];
292 # endif
293     }
294
295   *inptrp = inptr;
296   *outptrp = outptr;
297
298   /* Determine the status.  */
299   if (*outptrp == outend)
300     result = __GCONV_FULL_OUTPUT;
301   else if (*inptrp == inend)
302     result = __GCONV_EMPTY_INPUT;
303   else
304     result = __GCONV_INCOMPLETE_INPUT;
305
306   return result;
307 }
308 #endif
309
310
311 static inline int
312 ucs4_internal_loop_single (const unsigned char **inptrp,
313                            const unsigned char *inend,
314                            unsigned char **outptrp, unsigned char *outend,
315                            mbstate_t *state, void *data, size_t *converted)
316 {
317   size_t cnt = state->__count & 7;
318
319   while (*inptrp < inend && cnt < 4)
320     state->__value.__wchb[cnt++] = *(*inptrp)++;
321
322   if (cnt < 4)
323     {
324       /* Still not enough bytes.  Store the ones in the input buffer.  */
325       state->__count &= ~7;
326       state->__count |= cnt;
327
328       return __GCONV_INCOMPLETE_INPUT;
329     }
330
331   if (((unsigned char *) state->__value.__wchb)[0] > 0x80)
332     /* The value is too large.  */
333     return __GCONV_ILLEGAL_INPUT;
334
335 #if __BYTE_ORDER == __LITTLE_ENDIAN
336   (*outptrp)[0] = state->__value.__wchb[3];
337   (*outptrp)[1] = state->__value.__wchb[2];
338   (*outptrp)[2] = state->__value.__wchb[1];
339   (*outptrp)[3] = state->__value.__wchb[0];
340 #elif __BYTE_ORDER == __BIG_ENDIAN
341   (*outptrp)[0] = state->__value.__wchb[0];
342   (*outptrp)[1] = state->__value.__wchb[1];
343   (*outptrp)[2] = state->__value.__wchb[2];
344   (*outptrp)[3] = state->__value.__wchb[3];
345 #endif
346
347   /* Clear the state buffer.  */
348   state->__count &= ~7;
349
350   return __GCONV_OK;
351 }
352
353 #include <iconv/skeleton.c>
354
355
356 /* Similarly for the little endian form.  */
357 #define DEFINE_INIT             0
358 #define DEFINE_FINI             0
359 #define MIN_NEEDED_FROM         4
360 #define MIN_NEEDED_TO           4
361 #define FROM_DIRECTION          1
362 #define FROM_LOOP               internal_ucs4le_loop
363 #define TO_LOOP                 internal_ucs4le_loop /* This is not used.  */
364 #define FUNCTION_NAME           __gconv_transform_internal_ucs4le
365
366
367 static inline int
368 internal_ucs4le_loop (const unsigned char **inptrp, const unsigned char *inend,
369                       unsigned char **outptrp, unsigned char *outend,
370                       mbstate_t *state, void *data, size_t *converted)
371 {
372   const unsigned char *inptr = *inptrp;
373   unsigned char *outptr = *outptrp;
374   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
375   int result;
376
377 #if __BYTE_ORDER == __BIG_ENDIAN
378   /* Sigh, we have to do some real work.  */
379   size_t cnt;
380
381   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
382     *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
383
384   *inptrp = inptr;
385   *outptrp = outptr;
386 #elif __BYTE_ORDER == __LITTLE_ENDIAN
387   /* Simply copy the data.  */
388   *inptrp = inptr + n_convert * 4;
389   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
390 #else
391 # error "This endianess is not supported."
392 #endif
393
394   /* Determine the status.  */
395   if (*outptrp == outend)
396     result = __GCONV_FULL_OUTPUT;
397   else if (*inptrp == inend)
398     result = __GCONV_EMPTY_INPUT;
399   else
400     result = __GCONV_INCOMPLETE_INPUT;
401
402   return result;
403 }
404
405 #ifndef _STRING_ARCH_unaligned
406 static inline int
407 internal_ucs4le_loop_unaligned (const unsigned char **inptrp,
408                                 const unsigned char *inend,
409                                 unsigned char **outptrp, unsigned char *outend,
410                                 mbstate_t *state, void *data,
411                                 size_t *converted)
412 {
413   const unsigned char *inptr = *inptrp;
414   unsigned char *outptr = *outptrp;
415   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
416   int result;
417
418 # if __BYTE_ORDER == __BIG_ENDIAN
419   /* Sigh, we have to do some real work.  */
420   size_t cnt;
421
422   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
423     {
424       outptr[0] = inptr[3];
425       outptr[1] = inptr[2];
426       outptr[2] = inptr[1];
427       outptr[3] = inptr[0];
428     }
429
430   *inptrp = inptr;
431   *outptrp = outptr;
432 # elif __BYTE_ORDER == __LITTLE_ENDIAN
433   /* Simply copy the data.  */
434   *inptrp = inptr + n_convert * 4;
435   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
436 # else
437 #  error "This endianess is not supported."
438 # endif
439
440   /* Determine the status.  */
441   if (*outptrp == outend)
442     result = __GCONV_FULL_OUTPUT;
443   else if (*inptrp == inend)
444     result = __GCONV_EMPTY_INPUT;
445   else
446     result = __GCONV_INCOMPLETE_INPUT;
447
448   return result;
449 }
450 #endif
451
452
453 static inline int
454 internal_ucs4le_loop_single (const unsigned char **inptrp,
455                              const unsigned char *inend,
456                              unsigned char **outptrp, unsigned char *outend,
457                              mbstate_t *state, void *data, size_t *converted)
458 {
459   size_t cnt = state->__count & 7;
460
461   while (*inptrp < inend && cnt < 4)
462     state->__value.__wchb[cnt++] = *(*inptrp)++;
463
464   if (cnt < 4)
465     {
466       /* Still not enough bytes.  Store the ones in the input buffer.  */
467       state->__count &= ~7;
468       state->__count |= cnt;
469
470       return __GCONV_INCOMPLETE_INPUT;
471     }
472
473 #if __BYTE_ORDER == __BIG_ENDIAN
474   (*outptrp)[0] = state->__value.__wchb[3];
475   (*outptrp)[1] = state->__value.__wchb[2];
476   (*outptrp)[2] = state->__value.__wchb[1];
477   (*outptrp)[3] = state->__value.__wchb[0];
478 #else
479   /* XXX unaligned */
480   *(*((uint32_t **) outptrp)++) = state->__value.__wch;
481 #endif
482
483   /* Clear the state buffer.  */
484   state->__count &= ~7;
485
486   return __GCONV_OK;
487 }
488
489 #include <iconv/skeleton.c>
490
491
492 /* And finally from UCS4-LE to the internal encoding.  */
493 #define DEFINE_INIT             0
494 #define DEFINE_FINI             0
495 #define MIN_NEEDED_FROM         4
496 #define MIN_NEEDED_TO           4
497 #define FROM_DIRECTION          1
498 #define FROM_LOOP               ucs4le_internal_loop
499 #define TO_LOOP                 ucs4le_internal_loop /* This is not used.  */
500 #define FUNCTION_NAME           __gconv_transform_ucs4le_internal
501
502
503 static inline int
504 ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
505                       unsigned char **outptrp, unsigned char *outend,
506                       mbstate_t *state, void *data, size_t *converted)
507 {
508   const unsigned char *inptr = *inptrp;
509   unsigned char *outptr = *outptrp;
510   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
511   int result;
512   size_t cnt;
513
514   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
515     {
516       uint32_t inval;
517
518 #if __BYTE_ORDER == __BIG_ENDIAN
519       inval = bswap_32 (*(uint32_t *) inptr);
520 #else
521       inval = *(uint32_t *) inptr;
522 #endif
523
524       if (inval > 0x7fffffff)
525         return __GCONV_ILLEGAL_INPUT;
526
527       *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
528     }
529
530   *inptrp = inptr;
531   *outptrp = outptr;
532
533   /* Determine the status.  */
534   if (*outptrp == outend)
535     result = __GCONV_FULL_OUTPUT;
536   else if (*inptrp == inend)
537     result = __GCONV_EMPTY_INPUT;
538   else
539     result = __GCONV_INCOMPLETE_INPUT;
540
541   return result;
542 }
543
544 #ifndef _STRING_ARCH_unaligned
545 static inline int
546 ucs4le_internal_loop_unaligned (const unsigned char **inptrp,
547                                 const unsigned char *inend,
548                                 unsigned char **outptrp, unsigned char *outend,
549                                 mbstate_t *state, void *data,
550                                 size_t *converted)
551 {
552   const unsigned char *inptr = *inptrp;
553   unsigned char *outptr = *outptrp;
554   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
555   int result;
556   size_t cnt;
557
558   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
559     {
560       if (inptr[3] > 0x80)
561         {
562           /* The value is too large.  */
563           *inptrp = inptr;
564           *outptrp = outptr;
565           return __GCONV_ILLEGAL_INPUT;
566         }
567
568
569 # if __BYTE_ORDER == __BIG_ENDIAN
570       outptr[3] = inptr[0];
571       outptr[2] = inptr[1];
572       outptr[1] = inptr[2];
573       outptr[0] = inptr[3];
574 # else
575       outptr[0] = inptr[0];
576       outptr[1] = inptr[1];
577       outptr[2] = inptr[2];
578       outptr[3] = inptr[3];
579 # endif
580     }
581
582   *inptrp = inptr;
583   *outptrp = outptr;
584
585   /* Determine the status.  */
586   if (*outptrp == outend)
587     result = __GCONV_FULL_OUTPUT;
588   else if (*inptrp == inend)
589     result = __GCONV_EMPTY_INPUT;
590   else
591     result = __GCONV_INCOMPLETE_INPUT;
592
593   return result;
594 }
595 #endif
596
597
598 static inline int
599 ucs4le_internal_loop_single (const unsigned char **inptrp,
600                              const unsigned char *inend,
601                              unsigned char **outptrp, unsigned char *outend,
602                              mbstate_t *state, void *data, size_t *converted)
603 {
604   size_t cnt = state->__count & 7;
605
606   while (*inptrp < inend && cnt < 4)
607     state->__value.__wchb[cnt++] = *(*inptrp)++;
608
609   if (cnt < 4)
610     {
611       /* Still not enough bytes.  Store the ones in the input buffer.  */
612       state->__count &= ~7;
613       state->__count |= cnt;
614
615       return __GCONV_INCOMPLETE_INPUT;
616     }
617
618   if (((unsigned char *) state->__value.__wchb)[3] > 0x80)
619     /* The value is too large.  */
620     return __GCONV_ILLEGAL_INPUT;
621
622 #if __BYTE_ORDER == __BIG_ENDIAN
623   (*outptrp)[0] = state->__value.__wchb[3];
624   (*outptrp)[1] = state->__value.__wchb[2];
625   (*outptrp)[2] = state->__value.__wchb[1];
626   (*outptrp)[3] = state->__value.__wchb[0];
627 #elif __BYTE_ORDER == __BIG_ENDIAN
628   (*outptrp)[0] = state->__value.__wchb[0];
629   (*outptrp)[1] = state->__value.__wchb[1];
630   (*outptrp)[2] = state->__value.__wchb[2];
631   (*outptrp)[3] = state->__value.__wchb[3];
632 #endif
633
634   /* Clear the state buffer.  */
635   state->__count &= ~7;
636
637   return __GCONV_OK;
638 }
639
640 #include <iconv/skeleton.c>
641
642
643 /* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
644 #define DEFINE_INIT             0
645 #define DEFINE_FINI             0
646 #define MIN_NEEDED_FROM         1
647 #define MIN_NEEDED_TO           4
648 #define FROM_DIRECTION          1
649 #define FROM_LOOP               ascii_internal_loop
650 #define TO_LOOP                 ascii_internal_loop /* This is not used.  */
651 #define FUNCTION_NAME           __gconv_transform_ascii_internal
652 #define ONE_DIRECTION           1
653
654 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
655 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
656 #define LOOPFCT                 FROM_LOOP
657 #define BODY \
658   {                                                                           \
659     if (*inptr > '\x7f')                                                      \
660       {                                                                       \
661         /* This is no correct ANSI_X3.4-1968 character.  */                   \
662         result = __GCONV_ILLEGAL_INPUT;                                       \
663         break;                                                                \
664       }                                                                       \
665                                                                               \
666     /* It's an one byte sequence.  */                                         \
667     /* XXX unaligned.  */                                                     \
668     *((uint32_t *) outptr)++ = *inptr++;                                      \
669   }
670 #include <iconv/loop.c>
671 #include <iconv/skeleton.c>
672
673
674 /* Convert from the internal (UCS4-like) format to ISO 646-IRV.  */
675 #define DEFINE_INIT             0
676 #define DEFINE_FINI             0
677 #define MIN_NEEDED_FROM         4
678 #define MIN_NEEDED_TO           1
679 #define FROM_DIRECTION          1
680 #define FROM_LOOP               internal_ascii_loop
681 #define TO_LOOP                 internal_ascii_loop /* This is not used.  */
682 #define FUNCTION_NAME           __gconv_transform_internal_ascii
683 #define ONE_DIRECTION           1
684
685 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
686 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
687 #define LOOPFCT                 FROM_LOOP
688 #define BODY \
689   {                                                                           \
690     if (*((uint32_t *) inptr) > 0x7f)                                         \
691       {                                                                       \
692         /* This is no correct ANSI_X3.4-1968 character.  */                   \
693         result = __GCONV_ILLEGAL_INPUT;                                       \
694         break;                                                                \
695       }                                                                       \
696                                                                               \
697     /* It's an one byte sequence.  */                                         \
698     *outptr++ = *((uint32_t *) inptr)++;                                      \
699   }
700 #include <iconv/loop.c>
701 #include <iconv/skeleton.c>
702
703
704 /* Convert from the internal (UCS4-like) format to UTF-8.  */
705 #define DEFINE_INIT             0
706 #define DEFINE_FINI             0
707 #define MIN_NEEDED_FROM         4
708 #define MIN_NEEDED_TO           1
709 #define MAX_NEEDED_TO           6
710 #define FROM_DIRECTION          1
711 #define FROM_LOOP               internal_utf8_loop
712 #define TO_LOOP                 internal_utf8_loop /* This is not used.  */
713 #define FUNCTION_NAME           __gconv_transform_internal_utf8
714 #define ONE_DIRECTION           1
715
716 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
717 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
718 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_TO
719 #define LOOPFCT                 FROM_LOOP
720 #define BODY \
721   {                                                                           \
722     uint32_t wc = *((uint32_t *) inptr);                                      \
723                                                                               \
724     /* Since we control every character we read this cannot happen.  */       \
725     assert (wc <= 0x7fffffff);                                                \
726                                                                               \
727     if (wc < 0x80)                                                            \
728       /* It's an one byte sequence.  */                                       \
729       *outptr++ = (unsigned char) wc;                                         \
730     else                                                                      \
731       {                                                                       \
732         size_t step;                                                          \
733         char *start;                                                          \
734                                                                               \
735         for (step = 2; step < 6; ++step)                                      \
736           if ((wc & encoding_mask[step - 2]) == 0)                            \
737             break;                                                            \
738                                                                               \
739         if (outptr + step > outend)                                           \
740           {                                                                   \
741             /* Too long.  */                                                  \
742             result = __GCONV_FULL_OUTPUT;                                     \
743             break;                                                            \
744           }                                                                   \
745                                                                               \
746         start = outptr;                                                       \
747         *outptr = encoding_byte[step - 2];                                    \
748         outptr += step;                                                       \
749         --step;                                                               \
750         do                                                                    \
751           {                                                                   \
752             start[step] = 0x80 | (wc & 0x3f);                                 \
753             wc >>= 6;                                                         \
754           }                                                                   \
755         while (--step > 0);                                                   \
756         start[0] |= wc;                                                       \
757       }                                                                       \
758                                                                               \
759     inptr += 4;                                                               \
760   }
761 #include <iconv/loop.c>
762 #include <iconv/skeleton.c>
763
764
765 /* Convert from UTF-8 to the internal (UCS4-like) format.  */
766 #define DEFINE_INIT             0
767 #define DEFINE_FINI             0
768 #define MIN_NEEDED_FROM         1
769 #define MAX_NEEDED_FROM         6
770 #define MIN_NEEDED_TO           4
771 #define FROM_DIRECTION          1
772 #define FROM_LOOP               utf8_internal_loop
773 #define TO_LOOP                 utf8_internal_loop /* This is not used.  */
774 #define FUNCTION_NAME           __gconv_transform_utf8_internal
775 #define ONE_DIRECTION           1
776
777 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
778 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
779 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
780 #define LOOPFCT                 FROM_LOOP
781 #define BODY \
782   {                                                                           \
783     uint32_t ch;                                                              \
784     uint_fast32_t cnt;                                                        \
785     uint_fast32_t i;                                                          \
786                                                                               \
787     /* Next input byte.  */                                                   \
788     ch = *inptr;                                                              \
789                                                                               \
790     if (ch < 0x80)                                                            \
791       {                                                                       \
792         /* One byte sequence.  */                                             \
793         cnt = 1;                                                              \
794         ++inptr;                                                              \
795       }                                                                       \
796     else                                                                      \
797       {                                                                       \
798         if (ch >= 0xc2 && ch < 0xe0)                                          \
799           {                                                                   \
800             /* We expect two bytes.  The first byte cannot be 0xc0 or 0xc1,   \
801                otherwise the wide character could have been represented       \
802                using a single byte.  */                                       \
803             cnt = 2;                                                          \
804             ch &= 0x1f;                                                       \
805           }                                                                   \
806         else if ((ch & 0xf0) == 0xe0)                                         \
807           {                                                                   \
808             /* We expect three bytes.  */                                     \
809             cnt = 3;                                                          \
810             ch &= 0x0f;                                                       \
811           }                                                                   \
812         else if ((ch & 0xf8) == 0xf0)                                         \
813           {                                                                   \
814             /* We expect four bytes.  */                                      \
815             cnt = 4;                                                          \
816             ch &= 0x07;                                                       \
817           }                                                                   \
818         else if ((ch & 0xfc) == 0xf8)                                         \
819           {                                                                   \
820             /* We expect five bytes.  */                                      \
821             cnt = 5;                                                          \
822             ch &= 0x03;                                                       \
823           }                                                                   \
824         else if ((ch & 0xfe) == 0xfc)                                         \
825           {                                                                   \
826             /* We expect six bytes.  */                                       \
827             cnt = 6;                                                          \
828             ch &= 0x01;                                                       \
829           }                                                                   \
830         else                                                                  \
831           {                                                                   \
832             /* This is an illegal encoding.  */                               \
833             result = __GCONV_ILLEGAL_INPUT;                                   \
834             break;                                                            \
835           }                                                                   \
836                                                                               \
837         if (NEED_LENGTH_TEST && inptr + cnt > inend)                          \
838           {                                                                   \
839             /* We don't have enough input.  But before we report that check   \
840                that all the bytes are correct.  */                            \
841             for (i = 1; inptr + i < inend; ++i)                               \
842               if ((inptr[i] & 0xc0) != 0x80)                                  \
843                 break;                                                        \
844             result = (inptr + i == inend                                      \
845                       ? __GCONV_INCOMPLETE_INPUT : __GCONV_ILLEGAL_INPUT);    \
846             break;                                                            \
847           }                                                                   \
848                                                                               \
849         /* Read the possible remaining bytes.  */                             \
850         for (i = 1; i < cnt; ++i)                                             \
851           {                                                                   \
852             uint32_t byte = inptr[i];                                         \
853                                                                               \
854             if ((byte & 0xc0) != 0x80)                                        \
855               /* This is an illegal encoding.  */                             \
856               break;                                                          \
857                                                                               \
858             ch <<= 6;                                                         \
859             ch |= byte & 0x3f;                                                \
860           }                                                                   \
861                                                                               \
862         /* If i < cnt, some trail byte was not >= 0x80, < 0xc0.               \
863            If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could       \
864            have been represented with fewer than cnt bytes.  */               \
865         if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0))       \
866           {                                                                   \
867             /* This is an illegal encoding.  */                               \
868             result = __GCONV_ILLEGAL_INPUT;                                   \
869             break;                                                            \
870           }                                                                   \
871                                                                               \
872         inptr += cnt;                                                         \
873       }                                                                       \
874                                                                               \
875     /* Now adjust the pointers and store the result.  */                      \
876     *((uint32_t *) outptr)++ = ch;                                            \
877   }
878
879 #define STORE_REST \
880   {                                                                           \
881     /* We store the remaining bytes while converting them into the UCS4       \
882        format.  We can assume that the first byte in the buffer is            \
883        correct and that it requires a larger number of bytes than there       \
884        are in the input buffer.  */                                           \
885     wint_t ch = **inptrp;                                                     \
886     size_t cnt;                                                               \
887                                                                               \
888     state->__count = inend - *inptrp;                                         \
889                                                                               \
890     if (ch >= 0xc2 && ch < 0xe0)                                              \
891       {                                                                       \
892         /* We expect two bytes.  The first byte cannot be 0xc0 or             \
893            0xc1, otherwise the wide character could have been                 \
894            represented using a single byte.  */                               \
895         cnt = 2;                                                              \
896         ch &= 0x1f;                                                           \
897       }                                                                       \
898     else if ((ch & 0xf0) == 0xe0)                                             \
899       {                                                                       \
900         /* We expect three bytes.  */                                         \
901         cnt = 3;                                                              \
902         ch &= 0x0f;                                                           \
903       }                                                                       \
904     else if ((ch & 0xf8) == 0xf0)                                             \
905       {                                                                       \
906         /* We expect four bytes.  */                                          \
907         cnt = 4;                                                              \
908         ch &= 0x07;                                                           \
909       }                                                                       \
910     else if ((ch & 0xfc) == 0xf8)                                             \
911       {                                                                       \
912         /* We expect five bytes.  */                                          \
913         cnt = 5;                                                              \
914         ch &= 0x03;                                                           \
915       }                                                                       \
916     else                                                                      \
917       {                                                                       \
918         /* We expect six bytes.  */                                           \
919         cnt = 6;                                                              \
920         ch &= 0x01;                                                           \
921       }                                                                       \
922                                                                               \
923     /* The first byte is already consumed.  */                                \
924     --cnt;                                                                    \
925     while (++(*inptrp) < inend)                                               \
926       {                                                                       \
927         ch <<= 6;                                                             \
928         ch |= **inptrp & 0x3f;                                                \
929         --cnt;                                                                \
930       }                                                                       \
931                                                                               \
932     /* Shift for the so far missing bytes.  */                                \
933     ch <<= cnt * 6;                                                           \
934                                                                               \
935     /* Store the value.  */                                                   \
936     state->__value.__wch = ch;                                                \
937   }
938
939 #define UNPACK_BYTES \
940   {                                                                           \
941     wint_t wch = state->__value.__wch;                                        \
942     size_t ntotal;                                                            \
943     inlen = state->__count;                                                   \
944                                                                               \
945     if (state->__value.__wch <= 0x7ff)                                        \
946       {                                                                       \
947         bytebuf[0] = 0xc0;                                                    \
948         ntotal = 2;                                                           \
949       }                                                                       \
950     else if (state->__value.__wch <= 0xffff)                                  \
951       {                                                                       \
952         bytebuf[0] = 0xe0;                                                    \
953         ntotal = 3;                                                           \
954       }                                                                       \
955     else if (state->__value.__wch <= 0x1fffff)                                \
956       {                                                                       \
957         bytebuf[0] = 0xf0;                                                    \
958         ntotal = 4;                                                           \
959       }                                                                       \
960     else if (state->__value.__wch <= 0x3ffffff)                               \
961       {                                                                       \
962         bytebuf[0] = 0xf8;                                                    \
963         ntotal = 5;                                                           \
964       }                                                                       \
965     else                                                                      \
966       {                                                                       \
967         bytebuf[0] = 0xfc;                                                    \
968         ntotal = 6;                                                           \
969       }                                                                       \
970                                                                               \
971     do                                                                        \
972       {                                                                       \
973         if (--ntotal < inlen)                                                 \
974           bytebuf[ntotal] = 0x80 | (wch & 0x3f);                              \
975         wch >>= 6;                                                            \
976       }                                                                       \
977     while (ntotal > 1);                                                       \
978                                                                               \
979     bytebuf[0] |= wch;                                                        \
980   }
981
982 #include <iconv/loop.c>
983 #include <iconv/skeleton.c>
984
985
986 /* Convert from UCS2 to the internal (UCS4-like) format.  */
987 #define DEFINE_INIT             0
988 #define DEFINE_FINI             0
989 #define MIN_NEEDED_FROM         2
990 #define MIN_NEEDED_TO           4
991 #define FROM_DIRECTION          1
992 #define FROM_LOOP               ucs2_internal_loop
993 #define TO_LOOP                 ucs2_internal_loop /* This is not used.  */
994 #define FUNCTION_NAME           __gconv_transform_ucs2_internal
995 #define ONE_DIRECTION           1
996
997 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
998 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
999 #define LOOPFCT                 FROM_LOOP
1000 #define BODY \
1001   *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
1002 #include <iconv/loop.c>
1003 #include <iconv/skeleton.c>
1004
1005
1006 /* Convert from the internal (UCS4-like) format to UCS2.  */
1007 #define DEFINE_INIT             0
1008 #define DEFINE_FINI             0
1009 #define MIN_NEEDED_FROM         4
1010 #define MIN_NEEDED_TO           2
1011 #define FROM_DIRECTION          1
1012 #define FROM_LOOP               internal_ucs2_loop
1013 #define TO_LOOP                 internal_ucs2_loop /* This is not used.  */
1014 #define FUNCTION_NAME           __gconv_transform_internal_ucs2
1015 #define ONE_DIRECTION           1
1016
1017 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1018 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1019 #define LOOPFCT                 FROM_LOOP
1020 #define BODY \
1021   {                                                                           \
1022     if (*((uint32_t *) inptr) >= 0x10000)                                     \
1023       {                                                                       \
1024         result = __GCONV_ILLEGAL_INPUT;                                       \
1025         break;                                                                \
1026       }                                                                       \
1027     *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++;                       \
1028   }
1029 #include <iconv/loop.c>
1030 #include <iconv/skeleton.c>
1031
1032
1033 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1034 #define DEFINE_INIT             0
1035 #define DEFINE_FINI             0
1036 #define MIN_NEEDED_FROM         2
1037 #define MIN_NEEDED_TO           4
1038 #define FROM_DIRECTION          1
1039 #define FROM_LOOP               ucs2reverse_internal_loop
1040 #define TO_LOOP                 ucs2reverse_internal_loop/* This is not used.*/
1041 #define FUNCTION_NAME           __gconv_transform_ucs2reverse_internal
1042 #define ONE_DIRECTION           1
1043
1044 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1045 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1046 #define LOOPFCT                 FROM_LOOP
1047 #define BODY \
1048   *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr);                  \
1049   inptr += 2;
1050 #include <iconv/loop.c>
1051 #include <iconv/skeleton.c>
1052
1053
1054 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1055 #define DEFINE_INIT             0
1056 #define DEFINE_FINI             0
1057 #define MIN_NEEDED_FROM         4
1058 #define MIN_NEEDED_TO           2
1059 #define FROM_DIRECTION          1
1060 #define FROM_LOOP               internal_ucs2reverse_loop
1061 #define TO_LOOP                 internal_ucs2reverse_loop/* This is not used.*/
1062 #define FUNCTION_NAME           __gconv_transform_internal_ucs2reverse
1063 #define ONE_DIRECTION           1
1064
1065 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1066 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1067 #define LOOPFCT                 FROM_LOOP
1068 #define BODY \
1069   {                                                                           \
1070     uint32_t val = *((uint32_t *) inptr);                                     \
1071     if (val >= 0x10000)                                                       \
1072       {                                                                       \
1073         result = __GCONV_ILLEGAL_INPUT;                                       \
1074         break;                                                                \
1075       }                                                                       \
1076     *((uint16_t *) outptr)++ = bswap_16 (val);                                \
1077     inptr += 4;                                                               \
1078   }
1079 #include <iconv/loop.c>
1080 #include <iconv/skeleton.c>