(__gconv_read_conf): Don't call realpath.
[kopensolaris-gnu/glibc.git] / iconv / gconv_simple.c
1 /* Simple transformations functions.
2    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <byteswap.h>
22 #include <dlfcn.h>
23 #include <endian.h>
24 #include <errno.h>
25 #include <gconv.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <sys/param.h>
31
32 #ifndef EILSEQ
33 # define EILSEQ EINVAL
34 #endif
35
36
37 /* These are definitions used by some of the functions for handling
38    UTF-8 encoding below.  */
39 static const uint32_t encoding_mask[] =
40 {
41   ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
42 };
43
44 static const unsigned char encoding_byte[] =
45 {
46   0xc0, 0xe0, 0xf0, 0xf8, 0xfc
47 };
48
49
50 /* Transform from the internal, UCS4-like format, to UCS4.  The
51    difference between the internal ucs4 format and the real UCS4
52    format is, if any, the endianess.  The Unicode/ISO 10646 says that
53    unless some higher protocol specifies it differently, the byte
54    order is big endian.*/
55 #define DEFINE_INIT             0
56 #define DEFINE_FINI             0
57 #define MIN_NEEDED_FROM         4
58 #define MIN_NEEDED_TO           4
59 #define FROM_DIRECTION          1
60 #define FROM_LOOP               internal_ucs4_loop
61 #define TO_LOOP                 internal_ucs4_loop /* This is not used.  */
62 #define FUNCTION_NAME           __gconv_transform_internal_ucs4
63
64
65 static inline int
66 internal_ucs4_loop (struct __gconv_step *step,
67                     struct __gconv_step_data *step_data,
68                     const unsigned char **inptrp, const unsigned char *inend,
69                     unsigned char **outptrp, unsigned char *outend,
70                     size_t *irreversible)
71 {
72   const unsigned char *inptr = *inptrp;
73   unsigned char *outptr = *outptrp;
74   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
75   int result;
76
77 #if __BYTE_ORDER == __LITTLE_ENDIAN
78   /* Sigh, we have to do some real work.  */
79   size_t cnt;
80
81   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
82     *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
83
84   *inptrp = inptr;
85   *outptrp = outptr;
86 #elif __BYTE_ORDER == __BIG_ENDIAN
87   /* Simply copy the data.  */
88   *inptrp = inptr + n_convert * 4;
89   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
90 #else
91 # error "This endianess is not supported."
92 #endif
93
94   /* Determine the status.  */
95   if (*inptrp == inend)
96     result = __GCONV_EMPTY_INPUT;
97   else if (*outptrp == outend)
98     result = __GCONV_FULL_OUTPUT;
99   else
100     result = __GCONV_INCOMPLETE_INPUT;
101
102   return result;
103 }
104
105 #ifndef _STRING_ARCH_unaligned
106 static inline int
107 internal_ucs4_loop_unaligned (struct __gconv_step *step,
108                               struct __gconv_step_data *step_data,
109                               const unsigned char **inptrp,
110                               const unsigned char *inend,
111                               unsigned char **outptrp, unsigned char *outend,
112                               size_t *irreversible)
113 {
114   const unsigned char *inptr = *inptrp;
115   unsigned char *outptr = *outptrp;
116   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
117   int result;
118
119 # if __BYTE_ORDER == __LITTLE_ENDIAN
120   /* Sigh, we have to do some real work.  */
121   size_t cnt;
122
123   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
124     {
125       outptr[0] = inptr[3];
126       outptr[1] = inptr[2];
127       outptr[2] = inptr[1];
128       outptr[3] = inptr[0];
129     }
130
131   *inptrp = inptr;
132   *outptrp = outptr;
133 # elif __BYTE_ORDER == __BIG_ENDIAN
134   /* Simply copy the data.  */
135   *inptrp = inptr + n_convert * 4;
136   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
137 # else
138 #  error "This endianess is not supported."
139 # endif
140
141   /* Determine the status.  */
142   if (*outptrp == outend)
143     result = __GCONV_FULL_OUTPUT;
144   else if (*inptrp == inend)
145     result = __GCONV_EMPTY_INPUT;
146   else
147     result = __GCONV_INCOMPLETE_INPUT;
148
149   return result;
150 }
151 #endif
152
153
154 static inline int
155 internal_ucs4_loop_single (struct __gconv_step *step,
156                            struct __gconv_step_data *step_data,
157                            const unsigned char **inptrp,
158                            const unsigned char *inend,
159                            unsigned char **outptrp, unsigned char *outend,
160                            size_t *irreversible)
161 {
162   mbstate_t *state = step_data->__statep;
163   size_t cnt = state->__count & 7;
164
165   while (*inptrp < inend && cnt < 4)
166     state->__value.__wchb[cnt++] = *(*inptrp)++;
167
168   if (__builtin_expect (cnt, 4) < 4)
169     {
170       /* Still not enough bytes.  Store the ones in the input buffer.  */
171       state->__count &= ~7;
172       state->__count |= cnt;
173
174       return __GCONV_INCOMPLETE_INPUT;
175     }
176
177 #if __BYTE_ORDER == __LITTLE_ENDIAN
178   (*outptrp)[0] = state->__value.__wchb[3];
179   (*outptrp)[1] = state->__value.__wchb[2];
180   (*outptrp)[2] = state->__value.__wchb[1];
181   (*outptrp)[3] = state->__value.__wchb[0];
182
183   *outptrp += 4;
184 #elif __BYTE_ORDER == __BIG_ENDIAN
185   /* XXX unaligned */
186   *(*((uint32_t **) outptrp)++) = state->__value.__wch;
187 #else
188 # error "This endianess is not supported."
189 #endif
190
191   /* Clear the state buffer.  */
192   state->__count &= ~7;
193
194   return __GCONV_OK;
195 }
196
197 #include <iconv/skeleton.c>
198
199
200 /* Transform from UCS4 to the internal, UCS4-like format.  Unlike
201    for the other direction we have to check for correct values here.  */
202 #define DEFINE_INIT             0
203 #define DEFINE_FINI             0
204 #define MIN_NEEDED_FROM         4
205 #define MIN_NEEDED_TO           4
206 #define FROM_DIRECTION          1
207 #define FROM_LOOP               ucs4_internal_loop
208 #define TO_LOOP                 ucs4_internal_loop /* This is not used.  */
209 #define FUNCTION_NAME           __gconv_transform_ucs4_internal
210
211
212 static inline int
213 ucs4_internal_loop (struct __gconv_step *step,
214                     struct __gconv_step_data *step_data,
215                     const unsigned char **inptrp, const unsigned char *inend,
216                     unsigned char **outptrp, unsigned char *outend,
217                     size_t *irreversible)
218 {
219   int flags = step_data->__flags;
220   const unsigned char *inptr = *inptrp;
221   unsigned char *outptr = *outptrp;
222   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
223   int result;
224   size_t cnt;
225
226   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
227     {
228       uint32_t inval;
229
230 #if __BYTE_ORDER == __LITTLE_ENDIAN
231       inval = bswap_32 (*(uint32_t *) inptr);
232 #else
233       inval = *(uint32_t *) inptr;
234 #endif
235
236       if (__builtin_expect (inval, 0) > 0x7fffffff)
237         {
238           /* The value is too large.  We don't try transliteration here since
239              this is not an error because of the lack of possibilities to
240              represent the result.  This is a genuine bug in the input since
241              UCS4 does not allow such values.  */
242           if (flags & __GCONV_IGNORE_ERRORS)
243             {
244               /* Just ignore this character.  */
245               ++*irreversible;
246               continue;
247             }
248
249           *inptrp = inptr;
250           *outptrp = outptr;
251           return __GCONV_ILLEGAL_INPUT;
252         }
253
254       *((uint32_t *) outptr)++ = inval;
255     }
256
257   *inptrp = inptr;
258   *outptrp = outptr;
259
260   /* Determine the status.  */
261   if (*inptrp == inend)
262     result = __GCONV_EMPTY_INPUT;
263   else if (*outptrp == outend)
264     result = __GCONV_FULL_OUTPUT;
265   else
266     result = __GCONV_INCOMPLETE_INPUT;
267
268   return result;
269 }
270
271 #ifndef _STRING_ARCH_unaligned
272 static inline int
273 ucs4_internal_loop_unaligned (struct __gconv_step *step,
274                               struct __gconv_step_data *step_data,
275                               const unsigned char **inptrp,
276                               const unsigned char *inend,
277                               unsigned char **outptrp, unsigned char *outend,
278                               size_t *irreversible)
279 {
280   int flags = step_data->__flags;
281   const unsigned char *inptr = *inptrp;
282   unsigned char *outptr = *outptrp;
283   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
284   int result;
285   size_t cnt;
286
287   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
288     {
289       if (__builtin_expect (inptr[0], 0) > 0x80)
290         {
291           /* The value is too large.  We don't try transliteration here since
292              this is not an error because of the lack of possibilities to
293              represent the result.  This is a genuine bug in the input since
294              UCS4 does not allow such values.  */
295           if (flags & __GCONV_IGNORE_ERRORS)
296             {
297               /* Just ignore this character.  */
298               ++*irreversible;
299               continue;
300             }
301
302           *inptrp = inptr;
303           *outptrp = outptr;
304           return __GCONV_ILLEGAL_INPUT;
305         }
306
307 # if __BYTE_ORDER == __LITTLE_ENDIAN
308       outptr[3] = inptr[0];
309       outptr[2] = inptr[1];
310       outptr[1] = inptr[2];
311       outptr[0] = inptr[3];
312 # else
313       outptr[0] = inptr[0];
314       outptr[1] = inptr[1];
315       outptr[2] = inptr[2];
316       outptr[3] = inptr[3];
317 # endif
318       outptr += 4;
319     }
320
321   *inptrp = inptr;
322   *outptrp = outptr;
323
324   /* Determine the status.  */
325   if (*inptrp == inend)
326     result = __GCONV_EMPTY_INPUT;
327   else if (*outptrp == outend)
328     result = __GCONV_FULL_OUTPUT;
329   else
330     result = __GCONV_INCOMPLETE_INPUT;
331
332   return result;
333 }
334 #endif
335
336
337 static inline int
338 ucs4_internal_loop_single (struct __gconv_step *step,
339                            struct __gconv_step_data *step_data,
340                            const unsigned char **inptrp,
341                            const unsigned char *inend,
342                            unsigned char **outptrp, unsigned char *outend,
343                            size_t *irreversible)
344 {
345   mbstate_t *state = step_data->__statep;
346   int flags = step_data->__flags;
347   size_t cnt = state->__count & 7;
348
349   while (*inptrp < inend && cnt < 4)
350     state->__value.__wchb[cnt++] = *(*inptrp)++;
351
352   if (__builtin_expect (cnt, 4) < 4)
353     {
354       /* Still not enough bytes.  Store the ones in the input buffer.  */
355       state->__count &= ~7;
356       state->__count |= cnt;
357
358       return __GCONV_INCOMPLETE_INPUT;
359     }
360
361   if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0], 0)
362       > 0x80)
363     {
364       /* The value is too large.  We don't try transliteration here since
365          this is not an error because of the lack of possibilities to
366          represent the result.  This is a genuine bug in the input since
367          UCS4 does not allow such values.  */
368       if (!(flags & __GCONV_IGNORE_ERRORS))
369         {
370           *inptrp -= cnt - (state->__count & 7);
371           return __GCONV_ILLEGAL_INPUT;
372         }
373     }
374   else
375     {
376 #if __BYTE_ORDER == __LITTLE_ENDIAN
377       (*outptrp)[0] = state->__value.__wchb[3];
378       (*outptrp)[1] = state->__value.__wchb[2];
379       (*outptrp)[2] = state->__value.__wchb[1];
380       (*outptrp)[3] = state->__value.__wchb[0];
381 #elif __BYTE_ORDER == __BIG_ENDIAN
382       (*outptrp)[0] = state->__value.__wchb[0];
383       (*outptrp)[1] = state->__value.__wchb[1];
384       (*outptrp)[2] = state->__value.__wchb[2];
385       (*outptrp)[3] = state->__value.__wchb[3];
386 #endif
387
388       *outptrp += 4;
389     }
390
391   /* Clear the state buffer.  */
392   state->__count &= ~7;
393
394   return __GCONV_OK;
395 }
396
397 #include <iconv/skeleton.c>
398
399
400 /* Similarly for the little endian form.  */
401 #define DEFINE_INIT             0
402 #define DEFINE_FINI             0
403 #define MIN_NEEDED_FROM         4
404 #define MIN_NEEDED_TO           4
405 #define FROM_DIRECTION          1
406 #define FROM_LOOP               internal_ucs4le_loop
407 #define TO_LOOP                 internal_ucs4le_loop /* This is not used.  */
408 #define FUNCTION_NAME           __gconv_transform_internal_ucs4le
409
410
411 static inline int
412 internal_ucs4le_loop (struct __gconv_step *step,
413                       struct __gconv_step_data *step_data,
414                       const unsigned char **inptrp, const unsigned char *inend,
415                       unsigned char **outptrp, unsigned char *outend,
416                       size_t *irreversible)
417 {
418   const unsigned char *inptr = *inptrp;
419   unsigned char *outptr = *outptrp;
420   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
421   int result;
422
423 #if __BYTE_ORDER == __BIG_ENDIAN
424   /* Sigh, we have to do some real work.  */
425   size_t cnt;
426
427   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
428     *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
429
430   *inptrp = inptr;
431   *outptrp = outptr;
432 #elif __BYTE_ORDER == __LITTLE_ENDIAN
433   /* Simply copy the data.  */
434   *inptrp = inptr + n_convert * 4;
435   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
436 #else
437 # error "This endianess is not supported."
438 #endif
439
440   /* Determine the status.  */
441   if (*inptrp == inend)
442     result = __GCONV_EMPTY_INPUT;
443   else if (*outptrp == outend)
444     result = __GCONV_FULL_OUTPUT;
445   else
446     result = __GCONV_INCOMPLETE_INPUT;
447
448   return result;
449 }
450
451 #ifndef _STRING_ARCH_unaligned
452 static inline int
453 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
454                                 struct __gconv_step_data *step_data,
455                                 const unsigned char **inptrp,
456                                 const unsigned char *inend,
457                                 unsigned char **outptrp, unsigned char *outend,
458                                 size_t *irreversible)
459 {
460   const unsigned char *inptr = *inptrp;
461   unsigned char *outptr = *outptrp;
462   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
463   int result;
464
465 # if __BYTE_ORDER == __BIG_ENDIAN
466   /* Sigh, we have to do some real work.  */
467   size_t cnt;
468
469   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
470     {
471       outptr[0] = inptr[3];
472       outptr[1] = inptr[2];
473       outptr[2] = inptr[1];
474       outptr[3] = inptr[0];
475     }
476
477   *inptrp = inptr;
478   *outptrp = outptr;
479 # elif __BYTE_ORDER == __LITTLE_ENDIAN
480   /* Simply copy the data.  */
481   *inptrp = inptr + n_convert * 4;
482   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
483 # else
484 #  error "This endianess is not supported."
485 # endif
486
487   /* Determine the status.  */
488   if (*inptrp == inend)
489     result = __GCONV_EMPTY_INPUT;
490   else if (*outptrp == outend)
491     result = __GCONV_FULL_OUTPUT;
492   else
493     result = __GCONV_INCOMPLETE_INPUT;
494
495   return result;
496 }
497 #endif
498
499
500 static inline int
501 internal_ucs4le_loop_single (struct __gconv_step *step,
502                              struct __gconv_step_data *step_data,
503                              const unsigned char **inptrp,
504                              const unsigned char *inend,
505                              unsigned char **outptrp, unsigned char *outend,
506                              size_t *irreversible)
507 {
508   mbstate_t *state = step_data->__statep;
509   size_t cnt = state->__count & 7;
510
511   while (*inptrp < inend && cnt < 4)
512     state->__value.__wchb[cnt++] = *(*inptrp)++;
513
514   if (__builtin_expect (cnt, 4) < 4)
515     {
516       /* Still not enough bytes.  Store the ones in the input buffer.  */
517       state->__count &= ~7;
518       state->__count |= cnt;
519
520       return __GCONV_INCOMPLETE_INPUT;
521     }
522
523 #if __BYTE_ORDER == __BIG_ENDIAN
524   (*outptrp)[0] = state->__value.__wchb[3];
525   (*outptrp)[1] = state->__value.__wchb[2];
526   (*outptrp)[2] = state->__value.__wchb[1];
527   (*outptrp)[3] = state->__value.__wchb[0];
528
529   *outptrp += 4;
530 #else
531   /* XXX unaligned */
532   *(*((uint32_t **) outptrp)++) = state->__value.__wch;
533 #endif
534
535   /* Clear the state buffer.  */
536   state->__count &= ~7;
537
538   return __GCONV_OK;
539 }
540
541 #include <iconv/skeleton.c>
542
543
544 /* And finally from UCS4-LE to the internal encoding.  */
545 #define DEFINE_INIT             0
546 #define DEFINE_FINI             0
547 #define MIN_NEEDED_FROM         4
548 #define MIN_NEEDED_TO           4
549 #define FROM_DIRECTION          1
550 #define FROM_LOOP               ucs4le_internal_loop
551 #define TO_LOOP                 ucs4le_internal_loop /* This is not used.  */
552 #define FUNCTION_NAME           __gconv_transform_ucs4le_internal
553
554
555 static inline int
556 ucs4le_internal_loop (struct __gconv_step *step,
557                       struct __gconv_step_data *step_data,
558                       const unsigned char **inptrp, const unsigned char *inend,
559                       unsigned char **outptrp, unsigned char *outend,
560                       size_t *irreversible)
561 {
562   int flags = step_data->__flags;
563   const unsigned char *inptr = *inptrp;
564   unsigned char *outptr = *outptrp;
565   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
566   int result;
567   size_t cnt;
568
569   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
570     {
571       uint32_t inval;
572
573 #if __BYTE_ORDER == __BIG_ENDIAN
574       inval = bswap_32 (*(uint32_t *) inptr);
575 #else
576       inval = *(uint32_t *) inptr;
577 #endif
578
579       if (__builtin_expect (inval, 0) > 0x7fffffff)
580         {
581           /* The value is too large.  We don't try transliteration here since
582              this is not an error because of the lack of possibilities to
583              represent the result.  This is a genuine bug in the input since
584              UCS4 does not allow such values.  */
585           if (flags & __GCONV_IGNORE_ERRORS)
586             {
587               /* Just ignore this character.  */
588               ++*irreversible;
589               continue;
590             }
591
592           return __GCONV_ILLEGAL_INPUT;
593         }
594
595       *((uint32_t *) outptr)++ = inval;
596     }
597
598   *inptrp = inptr;
599   *outptrp = outptr;
600
601   /* Determine the status.  */
602   if (*inptrp == inend)
603     result = __GCONV_EMPTY_INPUT;
604   else if (*outptrp == outend)
605     result = __GCONV_FULL_OUTPUT;
606   else
607     result = __GCONV_INCOMPLETE_INPUT;
608
609   return result;
610 }
611
612 #ifndef _STRING_ARCH_unaligned
613 static inline int
614 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
615                                 struct __gconv_step_data *step_data,
616                                 const unsigned char **inptrp,
617                                 const unsigned char *inend,
618                                 unsigned char **outptrp, unsigned char *outend,
619                                 size_t *irreversible)
620 {
621   int flags = step_data->__flags;
622   const unsigned char *inptr = *inptrp;
623   unsigned char *outptr = *outptrp;
624   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
625   int result;
626   size_t cnt;
627
628   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
629     {
630       if (__builtin_expect (inptr[3], 0) > 0x80)
631         {
632           /* The value is too large.  We don't try transliteration here since
633              this is not an error because of the lack of possibilities to
634              represent the result.  This is a genuine bug in the input since
635              UCS4 does not allow such values.  */
636           if (flags & __GCONV_IGNORE_ERRORS)
637             {
638               /* Just ignore this character.  */
639               ++*irreversible;
640               continue;
641             }
642
643           *inptrp = inptr;
644           *outptrp = outptr;
645           return __GCONV_ILLEGAL_INPUT;
646         }
647
648 # if __BYTE_ORDER == __BIG_ENDIAN
649       outptr[3] = inptr[0];
650       outptr[2] = inptr[1];
651       outptr[1] = inptr[2];
652       outptr[0] = inptr[3];
653 # else
654       outptr[0] = inptr[0];
655       outptr[1] = inptr[1];
656       outptr[2] = inptr[2];
657       outptr[3] = inptr[3];
658 # endif
659
660       outptr += 4;
661     }
662
663   *inptrp = inptr;
664   *outptrp = outptr;
665
666   /* Determine the status.  */
667   if (*inptrp == inend)
668     result = __GCONV_EMPTY_INPUT;
669   else if (*outptrp == outend)
670     result = __GCONV_FULL_OUTPUT;
671   else
672     result = __GCONV_INCOMPLETE_INPUT;
673
674   return result;
675 }
676 #endif
677
678
679 static inline int
680 ucs4le_internal_loop_single (struct __gconv_step *step,
681                              struct __gconv_step_data *step_data,
682                              const unsigned char **inptrp,
683                              const unsigned char *inend,
684                              unsigned char **outptrp, unsigned char *outend,
685                              size_t *irreversible)
686 {
687   mbstate_t *state = step_data->__statep;
688   int flags = step_data->__flags;
689   size_t cnt = state->__count & 7;
690
691   while (*inptrp < inend && cnt < 4)
692     state->__value.__wchb[cnt++] = *(*inptrp)++;
693
694   if (__builtin_expect (cnt, 4) < 4)
695     {
696       /* Still not enough bytes.  Store the ones in the input buffer.  */
697       state->__count &= ~7;
698       state->__count |= cnt;
699
700       return __GCONV_INCOMPLETE_INPUT;
701     }
702
703   if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3], 0)
704       > 0x80)
705     {
706       /* The value is too large.  We don't try transliteration here since
707          this is not an error because of the lack of possibilities to
708          represent the result.  This is a genuine bug in the input since
709          UCS4 does not allow such values.  */
710       if (!(flags & __GCONV_IGNORE_ERRORS))
711         return __GCONV_ILLEGAL_INPUT;
712     }
713   else
714     {
715 #if __BYTE_ORDER == __BIG_ENDIAN
716       (*outptrp)[0] = state->__value.__wchb[3];
717       (*outptrp)[1] = state->__value.__wchb[2];
718       (*outptrp)[2] = state->__value.__wchb[1];
719       (*outptrp)[3] = state->__value.__wchb[0];
720 #elif __BYTE_ORDER == __BIG_ENDIAN
721       (*outptrp)[0] = state->__value.__wchb[0];
722       (*outptrp)[1] = state->__value.__wchb[1];
723       (*outptrp)[2] = state->__value.__wchb[2];
724       (*outptrp)[3] = state->__value.__wchb[3];
725 #endif
726
727       *outptrp += 4;
728     }
729
730   /* Clear the state buffer.  */
731   state->__count &= ~7;
732
733   return __GCONV_OK;
734 }
735
736 #include <iconv/skeleton.c>
737
738
739 /* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
740 #define DEFINE_INIT             0
741 #define DEFINE_FINI             0
742 #define MIN_NEEDED_FROM         1
743 #define MIN_NEEDED_TO           4
744 #define FROM_DIRECTION          1
745 #define FROM_LOOP               ascii_internal_loop
746 #define TO_LOOP                 ascii_internal_loop /* This is not used.  */
747 #define FUNCTION_NAME           __gconv_transform_ascii_internal
748 #define ONE_DIRECTION           1
749
750 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
751 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
752 #define LOOPFCT                 FROM_LOOP
753 #define BODY \
754   {                                                                           \
755     if (__builtin_expect (*inptr, 0) > '\x7f')                                \
756       {                                                                       \
757         /* The value is too large.  We don't try transliteration here since   \
758            this is not an error because of the lack of possibilities to       \
759            represent the result.  This is a genuine bug in the input since    \
760            ASCII does not allow such values.  */                              \
761         if (! ignore_errors_p ())                                             \
762           {                                                                   \
763             /* This is no correct ANSI_X3.4-1968 character.  */               \
764             result = __GCONV_ILLEGAL_INPUT;                                   \
765             break;                                                            \
766           }                                                                   \
767                                                                               \
768         ++*irreversible;                                                      \
769         ++inptr;                                                              \
770       }                                                                       \
771     else                                                                      \
772       /* It's an one byte sequence.  */                                       \
773       /* XXX unaligned.  */                                                   \
774       *((uint32_t *) outptr)++ = *inptr++;                                    \
775   }
776 #define LOOP_NEED_FLAGS
777 #include <iconv/loop.c>
778 #include <iconv/skeleton.c>
779
780
781 /* Convert from the internal (UCS4-like) format to ISO 646-IRV.  */
782 #define DEFINE_INIT             0
783 #define DEFINE_FINI             0
784 #define MIN_NEEDED_FROM         4
785 #define MIN_NEEDED_TO           1
786 #define FROM_DIRECTION          1
787 #define FROM_LOOP               internal_ascii_loop
788 #define TO_LOOP                 internal_ascii_loop /* This is not used.  */
789 #define FUNCTION_NAME           __gconv_transform_internal_ascii
790 #define ONE_DIRECTION           1
791
792 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
793 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
794 #define LOOPFCT                 FROM_LOOP
795 #define BODY \
796   {                                                                           \
797     /* XXX unaligned.  */                                                     \
798     if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f)                   \
799       {                                                                       \
800         STANDARD_ERR_HANDLER (4);                                             \
801       }                                                                       \
802     else                                                                      \
803       /* It's an one byte sequence.  */                                       \
804       *outptr++ = *((uint32_t *) inptr)++;                                    \
805   }
806 #define LOOP_NEED_FLAGS
807 #include <iconv/loop.c>
808 #include <iconv/skeleton.c>
809
810
811 /* Convert from the internal (UCS4-like) format to UTF-8.  */
812 #define DEFINE_INIT             0
813 #define DEFINE_FINI             0
814 #define MIN_NEEDED_FROM         4
815 #define MIN_NEEDED_TO           1
816 #define MAX_NEEDED_TO           6
817 #define FROM_DIRECTION          1
818 #define FROM_LOOP               internal_utf8_loop
819 #define TO_LOOP                 internal_utf8_loop /* This is not used.  */
820 #define FUNCTION_NAME           __gconv_transform_internal_utf8
821 #define ONE_DIRECTION           1
822
823 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
824 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
825 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_TO
826 #define LOOPFCT                 FROM_LOOP
827 #define BODY \
828   {                                                                           \
829     uint32_t wc = *((uint32_t *) inptr);                                      \
830                                                                               \
831     /* Since we control every character we read this cannot happen.  */       \
832     assert (wc <= 0x7fffffff);                                                \
833                                                                               \
834     if (wc < 0x80)                                                            \
835       /* It's an one byte sequence.  */                                       \
836       *outptr++ = (unsigned char) wc;                                         \
837     else                                                                      \
838       {                                                                       \
839         size_t step;                                                          \
840         char *start;                                                          \
841                                                                               \
842         for (step = 2; step < 6; ++step)                                      \
843           if ((wc & encoding_mask[step - 2]) == 0)                            \
844             break;                                                            \
845                                                                               \
846         if (__builtin_expect (outptr + step > outend, 0))                     \
847           {                                                                   \
848             /* Too long.  */                                                  \
849             result = __GCONV_FULL_OUTPUT;                                     \
850             break;                                                            \
851           }                                                                   \
852                                                                               \
853         start = outptr;                                                       \
854         *outptr = encoding_byte[step - 2];                                    \
855         outptr += step;                                                       \
856         --step;                                                               \
857         do                                                                    \
858           {                                                                   \
859             start[step] = 0x80 | (wc & 0x3f);                                 \
860             wc >>= 6;                                                         \
861           }                                                                   \
862         while (--step > 0);                                                   \
863         start[0] |= wc;                                                       \
864       }                                                                       \
865                                                                               \
866     inptr += 4;                                                               \
867   }
868 #include <iconv/loop.c>
869 #include <iconv/skeleton.c>
870
871
872 /* Convert from UTF-8 to the internal (UCS4-like) format.  */
873 #define DEFINE_INIT             0
874 #define DEFINE_FINI             0
875 #define MIN_NEEDED_FROM         1
876 #define MAX_NEEDED_FROM         6
877 #define MIN_NEEDED_TO           4
878 #define FROM_DIRECTION          1
879 #define FROM_LOOP               utf8_internal_loop
880 #define TO_LOOP                 utf8_internal_loop /* This is not used.  */
881 #define FUNCTION_NAME           __gconv_transform_utf8_internal
882 #define ONE_DIRECTION           1
883
884 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
885 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
886 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
887 #define LOOPFCT                 FROM_LOOP
888 #define BODY \
889   {                                                                           \
890     uint32_t ch;                                                              \
891     uint_fast32_t cnt;                                                        \
892     uint_fast32_t i;                                                          \
893                                                                               \
894     /* Next input byte.  */                                                   \
895     ch = *inptr;                                                              \
896                                                                               \
897     if (ch < 0x80)                                                            \
898       {                                                                       \
899         /* One byte sequence.  */                                             \
900         cnt = 1;                                                              \
901         ++inptr;                                                              \
902       }                                                                       \
903     else                                                                      \
904       {                                                                       \
905         if (ch >= 0xc2 && ch < 0xe0)                                          \
906           {                                                                   \
907             /* We expect two bytes.  The first byte cannot be 0xc0 or 0xc1,   \
908                otherwise the wide character could have been represented       \
909                using a single byte.  */                                       \
910             cnt = 2;                                                          \
911             ch &= 0x1f;                                                       \
912           }                                                                   \
913         else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0)                  \
914           {                                                                   \
915             /* We expect three bytes.  */                                     \
916             cnt = 3;                                                          \
917             ch &= 0x0f;                                                       \
918           }                                                                   \
919         else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0)                  \
920           {                                                                   \
921             /* We expect four bytes.  */                                      \
922             cnt = 4;                                                          \
923             ch &= 0x07;                                                       \
924           }                                                                   \
925         else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8)                  \
926           {                                                                   \
927             /* We expect five bytes.  */                                      \
928             cnt = 5;                                                          \
929             ch &= 0x03;                                                       \
930           }                                                                   \
931         else if (__builtin_expect (ch & 0xfe, 0xfc) == 0xfc)                  \
932           {                                                                   \
933             /* We expect six bytes.  */                                       \
934             cnt = 6;                                                          \
935             ch &= 0x01;                                                       \
936           }                                                                   \
937         else                                                                  \
938           {                                                                   \
939             int skipped;                                                      \
940                                                                               \
941             if (! ignore_errors_p ())                                         \
942               {                                                               \
943                 /* This is an illegal encoding.  */                           \
944                 result = __GCONV_ILLEGAL_INPUT;                               \
945                 break;                                                        \
946               }                                                               \
947                                                                               \
948             /* Search the end of this ill-formed UTF-8 character.  This       \
949                is the next byte with (x & 0xc0) != 0x80.  */                  \
950              skipped = 0;                                                     \
951              do                                                               \
952                {                                                              \
953                  ++inptr;                                                     \
954                  ++skipped;                                                   \
955                }                                                              \
956              while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \
957                                                                               \
958              continue;                                                        \
959           }                                                                   \
960                                                                               \
961         if (__builtin_expect (inptr + cnt > inend, 0))                      \
962           {                                                                   \
963             /* We don't have enough input.  But before we report that check   \
964                that all the bytes are correct.  */                            \
965             for (i = 1; inptr + i < inend; ++i)                               \
966               if ((inptr[i] & 0xc0) != 0x80)                                  \
967                 break;                                                        \
968                                                                               \
969             if (__builtin_expect (inptr + i == inend, 1))                     \
970               {                                                               \
971                 result = __GCONV_INCOMPLETE_INPUT;                            \
972                 break;                                                        \
973               }                                                               \
974                                                                               \
975             /* This is an illegal character.  */                              \
976             if (ignore_errors_p ())                                           \
977               {                                                               \
978                 /* Ignore it.  */                                             \
979                 inptr += i;                                                   \
980                 ++*irreversible;                                              \
981                 continue;                                                     \
982               }                                                               \
983                                                                               \
984             result = __GCONV_ILLEGAL_INPUT;                                   \
985             break;                                                            \
986           }                                                                   \
987                                                                               \
988         /* Read the possible remaining bytes.  */                             \
989         for (i = 1; i < cnt; ++i)                                             \
990           {                                                                   \
991             uint32_t byte = inptr[i];                                         \
992                                                                               \
993             if ((byte & 0xc0) != 0x80)                                        \
994               /* This is an illegal encoding.  */                             \
995               break;                                                          \
996                                                                               \
997             ch <<= 6;                                                         \
998             ch |= byte & 0x3f;                                                \
999           }                                                                   \
1000                                                                               \
1001         /* If i < cnt, some trail byte was not >= 0x80, < 0xc0.               \
1002            If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could       \
1003            have been represented with fewer than cnt bytes.  */               \
1004         if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0))               \
1005           {                                                                   \
1006             /* This is an illegal encoding.  */                               \
1007             if (ignore_errors_p ())                                           \
1008               {                                                               \
1009                 inptr += i;                                                   \
1010                 ++*irreversible;                                              \
1011                 continue;                                                     \
1012               }                                                               \
1013                                                                               \
1014             result = __GCONV_ILLEGAL_INPUT;                                   \
1015             break;                                                            \
1016           }                                                                   \
1017                                                                               \
1018         inptr += cnt;                                                         \
1019       }                                                                       \
1020                                                                               \
1021     /* Now adjust the pointers and store the result.  */                      \
1022     *((uint32_t *) outptr)++ = ch;                                            \
1023   }
1024 #define LOOP_NEED_FLAGS
1025
1026 #define STORE_REST \
1027   {                                                                           \
1028     /* We store the remaining bytes while converting them into the UCS4       \
1029        format.  We can assume that the first byte in the buffer is            \
1030        correct and that it requires a larger number of bytes than there       \
1031        are in the input buffer.  */                                           \
1032     wint_t ch = **inptrp;                                                     \
1033     size_t cnt;                                                               \
1034                                                                               \
1035     state->__count = inend - *inptrp;                                         \
1036                                                                               \
1037     if (ch >= 0xc2 && ch < 0xe0)                                              \
1038       {                                                                       \
1039         /* We expect two bytes.  The first byte cannot be 0xc0 or             \
1040            0xc1, otherwise the wide character could have been                 \
1041            represented using a single byte.  */                               \
1042         cnt = 2;                                                              \
1043         ch &= 0x1f;                                                           \
1044       }                                                                       \
1045     else if (__builtin_expect (ch & 0xf0, 0xe0) == 0xe0)                      \
1046       {                                                                       \
1047         /* We expect three bytes.  */                                         \
1048         cnt = 3;                                                              \
1049         ch &= 0x0f;                                                           \
1050       }                                                                       \
1051     else if (__builtin_expect (ch & 0xf8, 0xf0) == 0xf0)                      \
1052       {                                                                       \
1053         /* We expect four bytes.  */                                          \
1054         cnt = 4;                                                              \
1055         ch &= 0x07;                                                           \
1056       }                                                                       \
1057     else if (__builtin_expect (ch & 0xfc, 0xf8) == 0xf8)                      \
1058       {                                                                       \
1059         /* We expect five bytes.  */                                          \
1060         cnt = 5;                                                              \
1061         ch &= 0x03;                                                           \
1062       }                                                                       \
1063     else                                                                      \
1064       {                                                                       \
1065         /* We expect six bytes.  */                                           \
1066         cnt = 6;                                                              \
1067         ch &= 0x01;                                                           \
1068       }                                                                       \
1069                                                                               \
1070     /* The first byte is already consumed.  */                                \
1071     --cnt;                                                                    \
1072     while (++(*inptrp) < inend)                                               \
1073       {                                                                       \
1074         ch <<= 6;                                                             \
1075         ch |= **inptrp & 0x3f;                                                \
1076         --cnt;                                                                \
1077       }                                                                       \
1078                                                                               \
1079     /* Shift for the so far missing bytes.  */                                \
1080     ch <<= cnt * 6;                                                           \
1081                                                                               \
1082     /* Store the value.  */                                                   \
1083     state->__value.__wch = ch;                                                \
1084   }
1085
1086 #define UNPACK_BYTES \
1087   {                                                                           \
1088     wint_t wch = state->__value.__wch;                                        \
1089     size_t ntotal;                                                            \
1090     inlen = state->__count;                                                   \
1091                                                                               \
1092     if (state->__value.__wch <= 0x7ff)                                        \
1093       {                                                                       \
1094         bytebuf[0] = 0xc0;                                                    \
1095         ntotal = 2;                                                           \
1096       }                                                                       \
1097     else if (__builtin_expect (state->__value.__wch, 0) <= 0xffff)            \
1098       {                                                                       \
1099         bytebuf[0] = 0xe0;                                                    \
1100         ntotal = 3;                                                           \
1101       }                                                                       \
1102     else if (__builtin_expect (state->__value.__wch, 0) <= 0x1fffff)          \
1103       {                                                                       \
1104         bytebuf[0] = 0xf0;                                                    \
1105         ntotal = 4;                                                           \
1106       }                                                                       \
1107     else if (__builtin_expect (state->__value.__wch, 0) <= 0x3ffffff)         \
1108       {                                                                       \
1109         bytebuf[0] = 0xf8;                                                    \
1110         ntotal = 5;                                                           \
1111       }                                                                       \
1112     else                                                                      \
1113       {                                                                       \
1114         bytebuf[0] = 0xfc;                                                    \
1115         ntotal = 6;                                                           \
1116       }                                                                       \
1117                                                                               \
1118     do                                                                        \
1119       {                                                                       \
1120         if (--ntotal < inlen)                                                 \
1121           bytebuf[ntotal] = 0x80 | (wch & 0x3f);                              \
1122         wch >>= 6;                                                            \
1123       }                                                                       \
1124     while (ntotal > 1);                                                       \
1125                                                                               \
1126     bytebuf[0] |= wch;                                                        \
1127   }
1128
1129 #include <iconv/loop.c>
1130 #include <iconv/skeleton.c>
1131
1132
1133 /* Convert from UCS2 to the internal (UCS4-like) format.  */
1134 #define DEFINE_INIT             0
1135 #define DEFINE_FINI             0
1136 #define MIN_NEEDED_FROM         2
1137 #define MIN_NEEDED_TO           4
1138 #define FROM_DIRECTION          1
1139 #define FROM_LOOP               ucs2_internal_loop
1140 #define TO_LOOP                 ucs2_internal_loop /* This is not used.  */
1141 #define FUNCTION_NAME           __gconv_transform_ucs2_internal
1142 #define ONE_DIRECTION           1
1143
1144 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1145 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1146 #define LOOPFCT                 FROM_LOOP
1147 #define BODY \
1148   *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++;
1149 #include <iconv/loop.c>
1150 #include <iconv/skeleton.c>
1151
1152
1153 /* Convert from the internal (UCS4-like) format to UCS2.  */
1154 #define DEFINE_INIT             0
1155 #define DEFINE_FINI             0
1156 #define MIN_NEEDED_FROM         4
1157 #define MIN_NEEDED_TO           2
1158 #define FROM_DIRECTION          1
1159 #define FROM_LOOP               internal_ucs2_loop
1160 #define TO_LOOP                 internal_ucs2_loop /* This is not used.  */
1161 #define FUNCTION_NAME           __gconv_transform_internal_ucs2
1162 #define ONE_DIRECTION           1
1163
1164 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1165 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1166 #define LOOPFCT                 FROM_LOOP
1167 #define BODY \
1168   {                                                                           \
1169     if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000)               \
1170       {                                                                       \
1171         STANDARD_ERR_HANDLER (4);                                             \
1172       }                                                                       \
1173     else                                                                      \
1174       *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++;                     \
1175   }
1176 #define LOOP_NEED_FLAGS
1177 #include <iconv/loop.c>
1178 #include <iconv/skeleton.c>
1179
1180
1181 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1182 #define DEFINE_INIT             0
1183 #define DEFINE_FINI             0
1184 #define MIN_NEEDED_FROM         2
1185 #define MIN_NEEDED_TO           4
1186 #define FROM_DIRECTION          1
1187 #define FROM_LOOP               ucs2reverse_internal_loop
1188 #define TO_LOOP                 ucs2reverse_internal_loop/* This is not used.*/
1189 #define FUNCTION_NAME           __gconv_transform_ucs2reverse_internal
1190 #define ONE_DIRECTION           1
1191
1192 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1193 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1194 #define LOOPFCT                 FROM_LOOP
1195 #define BODY \
1196   *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr);                  \
1197   inptr += 2;
1198 #include <iconv/loop.c>
1199 #include <iconv/skeleton.c>
1200
1201
1202 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1203 #define DEFINE_INIT             0
1204 #define DEFINE_FINI             0
1205 #define MIN_NEEDED_FROM         4
1206 #define MIN_NEEDED_TO           2
1207 #define FROM_DIRECTION          1
1208 #define FROM_LOOP               internal_ucs2reverse_loop
1209 #define TO_LOOP                 internal_ucs2reverse_loop/* This is not used.*/
1210 #define FUNCTION_NAME           __gconv_transform_internal_ucs2reverse
1211 #define ONE_DIRECTION           1
1212
1213 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1214 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1215 #define LOOPFCT                 FROM_LOOP
1216 #define BODY \
1217   {                                                                           \
1218     uint32_t val = *((uint32_t *) inptr);                                     \
1219     if (__builtin_expect (val, 0) >= 0x10000)                                 \
1220       {                                                                       \
1221         STANDARD_ERR_HANDLER (4);                                             \
1222       }                                                                       \
1223     *((uint16_t *) outptr)++ = bswap_16 (val);                                \
1224     inptr += 4;                                                               \
1225   }
1226 #define LOOP_NEED_FLAGS
1227 #include <iconv/loop.c>
1228 #include <iconv/skeleton.c>