Don't return error for invald error if ignore flag is set.
[kopensolaris-gnu/glibc.git] / iconvdata / johab.c
1 /* Mapping tables for JOHAB handling.
2    Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
5    and Ulrich Drepper <drepper@cygnus.com>, 1998.
6
7    The GNU C Library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Library General Public License as
9    published by the Free Software Foundation; either version 2 of the
10    License, or (at your option) any later version.
11
12    The GNU C Library is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    Library General Public License for more details.
16
17    You should have received a copy of the GNU Library General Public
18    License along with the GNU C Library; see the file COPYING.LIB.  If not,
19    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20    Boston, MA 02111-1307, USA.  */
21
22 #include <stdint.h>
23 #include <ksc5601.h>
24
25 /* The table for Bit pattern to Hangul Jamo
26    5 bits each are used to encode
27    leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
28    and trailing consonants(27 + 1 filler).
29
30    KS C 5601-1992 Annex 3 Table 2
31    0 : Filler, -1: invalid, >= 1 : valid
32
33  */
34 static const int init[32] =
35 {
36   -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
37   19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
38 };
39 static const int mid[32] =
40 {
41   -1, -1, 0, 1, 2, 3, 4, 5,
42   -1, -1, 6, 7, 8, 9, 10, 11,
43   -1, -1, 12, 13, 14, 15, 16, 17,
44   -1, -1, 18, 19, 20, 21, -1, -1
45 };
46 static const int final[32] =
47 {
48   -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49   -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
50 };
51
52 /*
53    Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
54    defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
55
56    It's to be considered later which Jamo block to use, Compatibility
57    block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
58
59  */
60 static const uint32_t init_to_ucs[19] =
61 {
62   0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
63   0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
64   0x314c, 0x314d, 0x314e
65 };
66
67 static const uint32_t final_to_ucs[31] =
68 {
69   L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
70   0x313a, 0x313b, 0x314c, 0x313d, 0x313e, 0x313f,
71   0x3140, L'\0', L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0',
72   L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
73 };
74
75 /* The following three arrays are used to convert
76    precomposed Hangul syllables in [0xac00,0xd???]
77    to Jamo bit patterns for Johab encoding
78
79    cf. : KS C 5601-1992, Annex3 Table 2
80
81    Arrays are used to speed up things although it's possible
82    to get the same result arithmetically.
83
84  */
85 static const int init_to_bit[19] =
86 {
87   0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
88   0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
89   0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
90   0xd000
91 };
92
93 static const int mid_to_bit[21] =
94 {
95           0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
96   0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
97   0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
98   0x0340, 0x0360, 0x0380, 0x03a0
99 };
100
101 static const int final_to_bit[28] =
102 {
103   1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
104   0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
105 };
106
107 /* The conversion table from
108    UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
109    to Johab
110
111    cf. 1. KS C 5601-1992 Annex 3 Table 2
112    2. Unicode 2.0 manual
113
114  */
115 static const uint16_t jamo_from_ucs_table[51] =
116 {
117   0x8841, 0x8c41,
118   0x8444,
119   0x9041,
120   0x8446, 0x8447,
121   0x9441, 0x9841, 0x9c41,
122   0x844a, 0x844b, 0x844c, 0x844d, 0x884e, 0x884f, 0x8450,
123   0xa041, 0xa441, 0xa841,
124   0x8454,
125   0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
126   0xc041, 0xc441, 0xc841, 0xca41, 0xd041,
127   0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
128   0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
129   0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
130   0x8741, 0x8761, 0x8781, 0x87a1
131 };
132
133
134 static inline uint32_t
135 johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
136 {
137   if (idx <= 0xdefe)
138     return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2
139                                            - (c2 > 0x90 ? 0x43 : 0x31)];
140   else
141     return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
142                                              - (c2 > 0x90 ? 0x43 : 0x31)];
143 }
144 /* Definitions used in the body of the `gconv' function.  */
145 #define CHARSET_NAME            "JOHAB//"
146 #define FROM_LOOP               from_johab
147 #define TO_LOOP                 to_johab
148 #define DEFINE_INIT             1
149 #define DEFINE_FINI             1
150 #define MIN_NEEDED_FROM         1
151 #define MAX_NEEDED_FROM         2
152 #define MIN_NEEDED_TO           4
153
154
155 /* First define the conversion function from JOHAB to UCS4.  */
156 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
157 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
158 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
159 #define LOOPFCT                 FROM_LOOP
160 #define BODY \
161   {                                                                           \
162     uint32_t ch = *inptr;                                                     \
163                                                                               \
164     /* half-width Korean Currency WON sign                                    \
165        if (ch == 0x5c)                                                        \
166          ch =  0x20a9;                                                        \
167        else if (ch < 0x7f)                                                    \
168          ch = (uint32_t) ch;                                                  \
169     */                                                                        \
170     if (ch < 0x7f)                                                            \
171       /* Plain ASCII.  */                                                     \
172       ++inptr;                                                                \
173     /* Johab : 1. Hangul                                                      \
174        1st byte : 0x84-0xd3                                                   \
175        2nd byte : 0x41-0x7e, 0x81-0xfe                                        \
176        2. Hanja & Symbol  :                                                   \
177        1st byte : 0xd8-0xde, 0xe0-0xf9                                        \
178        2nd byte : 0x31-0x7e, 0x91-0xfe                                        \
179        0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */               \
180     else                                                                      \
181       {                                                                       \
182         if (ch > 0xf9 || ch == 0xdf || (ch > 0x7e && ch < 0x84)               \
183             || (ch > 0xd3 && ch < 0xd9))                                      \
184           {                                                                   \
185             /* These are illegal.  */                                         \
186             if (! ignore_errors_p ())                                         \
187               {                                                               \
188                 /* This is an illegal character.  */                          \
189                 result = __GCONV_ILLEGAL_INPUT;                               \
190                 break;                                                        \
191               }                                                               \
192                                                                               \
193             ++inptr;                                                          \
194             ++*converted;                                                     \
195             continue;                                                         \
196           }                                                                   \
197         else                                                                  \
198           {                                                                   \
199             /* Two-byte character.  First test whether the next               \
200                character is also available.  */                               \
201             uint32_t ch2;                                                     \
202             uint_fast32_t idx;                                                \
203                                                                               \
204             if (NEED_LENGTH_TEST && inptr + 1 >= inend)                       \
205               {                                                               \
206                 /* The second character is not available.  Store the          \
207                    intermediate result.  */                                   \
208                 result = __GCONV_INCOMPLETE_INPUT;                            \
209                 break;                                                        \
210               }                                                               \
211                                                                               \
212             ch2 = inptr[1];                                                   \
213             idx = ch * 256 + ch2;                                             \
214             if (ch <= 0xd3)                                                   \
215               {                                                               \
216                 /* Hangul */                                                  \
217                 uint_fast32_t i, m, f;                                        \
218                                                                               \
219                 i = init[(idx & 0x7c00) >> 10];                               \
220                 m = mid[(idx & 0x03e0) >> 5];                                 \
221                 f = final[idx & 0x001f];                                      \
222                                                                               \
223                 if (i == -1 || m == -1 || f == -1)                            \
224                   {                                                           \
225                     /* This is illegal.  */                                   \
226                     if (! ignore_errors_p ())                                 \
227                       {                                                       \
228                         /* This is an illegal character.  */                  \
229                         result = __GCONV_ILLEGAL_INPUT;                       \
230                         break;                                                \
231                       }                                                       \
232                                                                               \
233                     ++inptr;                                                  \
234                     ++*converted;                                             \
235                     continue;                                                 \
236                   }                                                           \
237                 else if (i > 0 && m > 0)                                      \
238                   ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00;            \
239                 else if (i > 0 && m == 0 && f == 0)                           \
240                   ch = init_to_ucs[i - 1];                                    \
241                 else if (i == 0 && m > 0 && f == 0)                           \
242                   ch = 0x314e + m;      /* 0x314f + m - 1 */                  \
243                 else if (i == 0 && m == 0 && f > 0)                           \
244                   ch = final_to_ucs[f - 1];     /* round trip?? */            \
245                 else                                                          \
246                   {                                                           \
247                     /* This is illegal.  */                                   \
248                     if (! ignore_errors_p ())                                 \
249                       {                                                       \
250                         /* This is an illegal character.  */                  \
251                         result = __GCONV_ILLEGAL_INPUT;                       \
252                         break;                                                \
253                       }                                                       \
254                                                                               \
255                     ++inptr;                                                  \
256                     ++*converted;                                             \
257                     continue;                                                 \
258                   }                                                           \
259               }                                                               \
260             else                                                              \
261               {                                                               \
262                 if (ch2 < 0x31 || (ch2 > 0x7e && ch2 < 0x91) || ch2 == 0xff)  \
263                   {                                                           \
264                     /* This is illegal.  */                                   \
265                     if (! ignore_errors_p ())                                 \
266                       {                                                       \
267                         /* This is an illegal character.  */                  \
268                         result = __GCONV_ILLEGAL_INPUT;                       \
269                         break;                                                \
270                       }                                                       \
271                                                                               \
272                     ++inptr;                                                  \
273                     ++*converted;                                             \
274                     continue;                                                 \
275                   }                                                           \
276                 else if (ch == 0xda && ch2 > 0xa0 && ch2 < 0xd4)              \
277                   {                                                           \
278                     /* This is illegal.  Modern Hangul Jaso is defined        \
279                        elsewhere in Johab */                                  \
280                     if (! ignore_errors_p ())                                 \
281                       {                                                       \
282                         /* This is an illegal character.  */                  \
283                         result = __GCONV_ILLEGAL_INPUT;                       \
284                         break;                                                \
285                       }                                                       \
286                                                                               \
287                     ++inptr;                                                  \
288                     ++*converted;                                             \
289                     continue;                                                 \
290                   }                                                           \
291                 else                                                          \
292                   {                                                           \
293                     ch = johab_sym_hanja_to_ucs (idx, ch, ch2);               \
294                     /* if (idx <= 0xdefe)                                     \
295                          ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192          \
296                                                    + ch2 - (ch2 > 0x90        \
297                                                             ? 0x43 : 0x31)];  \
298                        else                                                   \
299                          ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192         \
300                                                      + ch2 -  (ch2 > 0x90     \
301                                                                ?0x43 : 0x31)];\
302                     */                                                        \
303                   }                                                           \
304               }                                                               \
305           }                                                                   \
306                                                                               \
307         if (ch == 0)                                                          \
308           {                                                                   \
309             /* This is an illegal character.  */                              \
310             if (! ignore_errors_p ())                                         \
311               {                                                               \
312                 /* This is an illegal character.  */                          \
313                 result = __GCONV_ILLEGAL_INPUT;                               \
314                 break;                                                        \
315               }                                                               \
316                                                                               \
317             inptr += 2;                                                       \
318             ++*converted;                                                     \
319             continue;                                                         \
320           }                                                                   \
321                                                                               \
322         inptr += 2;                                                           \
323       }                                                                       \
324                                                                               \
325     put32 (outptr, ch);                                                       \
326     outptr += 4;                                                              \
327   }
328 #include <iconv/loop.c>
329
330
331 /* Next, define the other direction.  */
332 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
333 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
334 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
335 #define LOOPFCT                 TO_LOOP
336 #define BODY \
337   {                                                                           \
338     uint32_t ch = get32 (inptr);                                              \
339     /*                                                                        \
340        if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0])))      \
341          {                                                                    \
342            if (ch >= 0x0391 && ch <= 0x0451)                                  \
343              cp = from_ucs4_greek[ch - 0x391];                                \
344            else if (ch >= 0x2010 && ch <= 0x9fa0)                             \
345              cp = from_ucs4_cjk[ch - 0x02010];                                \
346            else                                                               \
347              break;                                                           \
348          }                                                                    \
349        else                                                                   \
350          cp = from_ucs4_lat1[ch];                                             \
351     */                                                                        \
352                                                                               \
353     if (ch < 0x7f)                                                            \
354       *outptr++ = ch;                                                         \
355     else                                                                      \
356       {                                                                       \
357         if (ch >= 0xac00 && ch <= 0xd7a3)                                     \
358           {                                                                   \
359             if (NEED_LENGTH_TEST && outptr + 2 > outend)                      \
360               {                                                               \
361                 result = __GCONV_FULL_OUTPUT;                                 \
362                 break;                                                        \
363               }                                                               \
364                                                                               \
365             ch -= 0xac00;                                                     \
366                                                                               \
367             ch = (init_to_bit[ch / 588]   /* 21 * 28 = 588 */                 \
368                   + mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */     \
369                   + final_to_bit[ch %  28]);  /* (ch % (21 * 28)) % 28 */     \
370                                                                               \
371             *outptr++ = ch / 256;                                             \
372             *outptr++ = ch % 256;                                             \
373           }                                                                   \
374         /* KS C 5601-1992 Annex 3 regards  0xA4DA(Hangul Filler : U3164)      \
375            as symbol */                                                       \
376         else if (ch >= 0x3131 && ch <= 0x3163)                                \
377           {                                                                   \
378             ch = jamo_from_ucs_table[ch - 0x3131];                            \
379                                                                               \
380             if (NEED_LENGTH_TEST && outptr + 2 > outend)                      \
381               {                                                               \
382                 result = __GCONV_FULL_OUTPUT;                                 \
383                 break;                                                        \
384               }                                                               \
385                                                                               \
386             *outptr++ = ch / 256;                                             \
387             *outptr++ = ch % 256;                                             \
388           }                                                                   \
389         else if ((ch >= 0x4e00 && ch <= 0x9fa5)                               \
390                  || (ch >= 0xf900 && ch <= 0xfa0b))                           \
391           {                                                                   \
392             size_t written;                                                   \
393             uint32_t temp;                                                    \
394                                                                               \
395             written = ucs4_to_ksc5601_hanja (ch, outptr,                      \
396                                              (NEED_LENGTH_TEST                \
397                                               ? outend - outptr : 2));        \
398             if (NEED_LENGTH_TEST && written == 0)                             \
399               {                                                               \
400                 result = __GCONV_FULL_OUTPUT;                                 \
401                 break;                                                        \
402               }                                                               \
403             if (written == __UNKNOWN_10646_CHAR)                              \
404               {                                                               \
405                 if (! ignore_errors_p ())                                     \
406                   {                                                           \
407                     /* This is an illegal character.  */                      \
408                     result = __GCONV_ILLEGAL_INPUT;                           \
409                     break;                                                    \
410                   }                                                           \
411                                                                               \
412                 inptr += 4;                                                   \
413                 ++*converted;                                                 \
414                 continue;                                                     \
415               }                                                               \
416                                                                               \
417             outptr[0] -= 0x4a;                                                \
418             outptr[1] -= 0x21;                                                \
419                                                                               \
420             temp = outptr[0] * 94 + outptr[1];                                \
421                                                                               \
422             outptr[0] = 0xe0 + temp / 188;                                    \
423             outptr[1] = temp % 188;                                           \
424             outptr[1] += outptr[1] >= 78 ? 0x43 : 0x31;                       \
425                                                                               \
426             outptr += 2;                                                      \
427           }                                                                   \
428         else                                                                  \
429           {                                                                   \
430             size_t written;                                                   \
431                                                                               \
432             written = ucs4_to_ksc5601_sym (ch, outptr,                        \
433                                            (NEED_LENGTH_TEST                  \
434                                             ? outend - outptr : 2));          \
435             if (NEED_LENGTH_TEST && written == 0)                             \
436               {                                                               \
437                 result = __GCONV_FULL_OUTPUT;                                 \
438                 break;                                                        \
439               }                                                               \
440             if (written == __UNKNOWN_10646_CHAR)                              \
441               {                                                               \
442                 if (! ignore_errors_p ())                                     \
443                   {                                                           \
444                     /* This is an illegal character.  */                      \
445                     result = __GCONV_ILLEGAL_INPUT;                           \
446                     break;                                                    \
447                   }                                                           \
448                                                                               \
449                 inptr += 4;                                                   \
450                 ++*converted;                                                 \
451                 continue;                                                     \
452               }                                                               \
453                                                                               \
454             outptr[0] -= 0x4a;                                                \
455             outptr[1] += 0x80;                                                \
456                                                                               \
457             outptr[1] += (outptr[0] % 2                                       \
458                           ? 0 : (outptr[1] > 0xee ? 0x43 : 0x31));            \
459             outptr[1] -= 0xa1;                                                \
460             outptr[0] /= 2;                                                   \
461             outptr[0] += 0xe0;                                                \
462                                                                               \
463             outptr += 2;                                                      \
464           }                                                                   \
465       }                                                                       \
466                                                                               \
467     inptr += 4;                                                               \
468   }
469 #include <iconv/loop.c>
470
471
472 /* Now define the toplevel functions.  */
473 #include <iconv/skeleton.c>