fccfbabd4efc11d169cf3b26f366c5ef96047c82
[kopensolaris-gnu/glibc.git] / iconvdata / johab.c
1 /* Mapping tables for JOHAB handling.
2    Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
5    and Ulrich Drepper <drepper@cygnus.com>, 1998.
6
7    The GNU C Library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Library General Public License as
9    published by the Free Software Foundation; either version 2 of the
10    License, or (at your option) any later version.
11
12    The GNU C Library is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    Library General Public License for more details.
16
17    You should have received a copy of the GNU Library General Public
18    License along with the GNU C Library; see the file COPYING.LIB.  If not,
19    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20    Boston, MA 02111-1307, USA.  */
21
22 #include <stdint.h>
23 #include <ksc5601.h>
24
25 /* The table for Bit pattern to Hangul Jamo
26    5 bits each are used to encode
27    leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
28    and trailing consonants(27 + 1 filler).
29
30    KS C 5601-1992 Annex 3 Table 2
31    0 : Filler, -1: invalid, >= 1 : valid
32
33  */
34 static const int init[32] =
35 {
36   -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
37   19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
38 };
39 static const int mid[32] =
40 {
41   -1, -1, 0, 1, 2, 3, 4, 5,
42   -1, -1, 6, 7, 8, 9, 10, 11,
43   -1, -1, 12, 13, 14, 15, 16, 17,
44   -1, -1, 18, 19, 20, 21, -1, -1
45 };
46 static const int final[32] =
47 {
48   -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49   -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
50 };
51
52 /*
53    Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
54    defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
55
56    It's to be considered later which Jamo block to use, Compatibility
57    block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
58
59  */
60 static const uint32_t init_to_ucs[19] =
61 {
62   0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
63   0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
64   0x314c, 0x314d, 0x314e
65 };
66
67 static const uint32_t final_to_ucs[31] =
68 {
69   L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
70   0x313a, 0x313b, 0x314c, 0x313d, 0x313e, 0x313f,
71   0x3140, L'\0', L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0',
72   L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
73 };
74
75 /* The following three arrays are used to convert
76    precomposed Hangul syllables in [0xac00,0xd???]
77    to Jamo bit patterns for Johab encoding
78
79    cf. : KS C 5601-1992, Annex3 Table 2
80
81    Arrays are used to speed up things although it's possible
82    to get the same result arithmetically.
83
84  */
85 static const int init_to_bit[19] =
86 {
87   0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
88   0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
89   0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
90   0xd000
91 };
92
93 static const int mid_to_bit[21] =
94 {
95           0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
96   0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
97   0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
98   0x0340, 0x0360, 0x0380, 0x03a0
99 };
100
101 static const int final_to_bit[28] =
102 {
103   1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
104   0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
105 };
106
107 /* The conversion table from
108    UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
109    to Johab
110
111    cf. 1. KS C 5601-1992 Annex 3 Table 2
112    2. Unicode 2.0 manual
113
114  */
115 static const uint16_t jamo_from_ucs_table[51] =
116 {
117   0x8841, 0x8c41,
118   0x8444,
119   0x9041,
120   0x8446, 0x8447,
121   0x9441, 0x9841, 0x9c41,
122   0x844a, 0x844b, 0x844c, 0x844d, 0x884e, 0x884f, 0x8450,
123   0xa041, 0xa441, 0xa841,
124   0x8454,
125   0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
126   0xc041, 0xc441, 0xc841, 0xca41, 0xd041,
127   0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
128   0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
129   0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
130   0x8741, 0x8761, 0x8781, 0x87a1
131 };
132
133
134 static inline uint32_t
135 johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
136 {
137   if (idx <= 0xdefe)
138     return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2
139                                            - (c2 > 0x90 ? 0x43 : 0x31)];
140   else
141     return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
142                                              - (c2 > 0x90 ? 0x43 : 0x31)];
143 }
144 /* Definitions used in the body of the `gconv' function.  */
145 #define CHARSET_NAME            "JOHAB//"
146 #define FROM_LOOP               from_johab
147 #define TO_LOOP                 to_johab
148 #define DEFINE_INIT             1
149 #define DEFINE_FINI             1
150 #define MIN_NEEDED_FROM         1
151 #define MAX_NEEDED_FROM         2
152 #define MIN_NEEDED_TO           4
153
154
155 /* First define the conversion function from JOHAB to UCS4.  */
156 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
157 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
158 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
159 #define LOOPFCT                 FROM_LOOP
160 #define BODY \
161   {                                                                           \
162     uint32_t ch = *inptr;                                                     \
163                                                                               \
164     /* half-width Korean Currency WON sign                                    \
165        if (ch == 0x5c)                                                        \
166          ch =  0x20a9;                                                        \
167        else if (ch < 0x7f)                                                    \
168          ch = (uint32_t) ch;                                                  \
169     */                                                                        \
170     if (ch < 0x7f)                                                            \
171       /* Plain ASCII.  */                                                     \
172       ++inptr;                                                                \
173     /* Johab : 1. Hangul                                                      \
174        1st byte : 0x84-0xd3                                                   \
175        2nd byte : 0x41-0x7e, 0x81-0xfe                                        \
176        2. Hanja & Symbol  :                                                   \
177        1st byte : 0xd8-0xde, 0xe0-0xf9                                        \
178        2nd byte : 0x31-0x7e, 0x91-0xfe                                        \
179        0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */               \
180     else                                                                      \
181       {                                                                       \
182         if (__builtin_expect (ch, 0) > 0xf9                                   \
183             || __builtin_expect (ch, 0) == 0xdf                               \
184             || (__builtin_expect (ch, 0) > 0x7e && ch < 0x84)                 \
185             || (__builtin_expect (ch, 0) > 0xd3 && ch < 0xd9))                \
186           {                                                                   \
187             /* These are illegal.  */                                         \
188             if (! ignore_errors_p ())                                         \
189               {                                                               \
190                 /* This is an illegal character.  */                          \
191                 result = __GCONV_ILLEGAL_INPUT;                               \
192                 break;                                                        \
193               }                                                               \
194                                                                               \
195             ++inptr;                                                          \
196             ++*converted;                                                     \
197             continue;                                                         \
198           }                                                                   \
199         else                                                                  \
200           {                                                                   \
201             /* Two-byte character.  First test whether the next               \
202                character is also available.  */                               \
203             uint32_t ch2;                                                     \
204             uint_fast32_t idx;                                                \
205                                                                               \
206             if (NEED_LENGTH_TEST && __builtin_expect (inptr + 1 >= inend, 0)) \
207               {                                                               \
208                 /* The second character is not available.  Store the          \
209                    intermediate result.  */                                   \
210                 result = __GCONV_INCOMPLETE_INPUT;                            \
211                 break;                                                        \
212               }                                                               \
213                                                                               \
214             ch2 = inptr[1];                                                   \
215             idx = ch * 256 + ch2;                                             \
216             if (__builtin_expect (ch, 0) <= 0xd3)                             \
217               {                                                               \
218                 /* Hangul */                                                  \
219                 uint_fast32_t i, m, f;                                        \
220                                                                               \
221                 i = init[(idx & 0x7c00) >> 10];                               \
222                 m = mid[(idx & 0x03e0) >> 5];                                 \
223                 f = final[idx & 0x001f];                                      \
224                                                                               \
225                 if (__builtin_expect (i, 0) == -1                             \
226                     || __builtin_expect (m, 0) == -1                          \
227                     || __builtin_expect (f, 0) == -1)                         \
228                   {                                                           \
229                     /* This is illegal.  */                                   \
230                     if (! ignore_errors_p ())                                 \
231                       {                                                       \
232                         /* This is an illegal character.  */                  \
233                         result = __GCONV_ILLEGAL_INPUT;                       \
234                         break;                                                \
235                       }                                                       \
236                                                                               \
237                     ++inptr;                                                  \
238                     ++*converted;                                             \
239                     continue;                                                 \
240                   }                                                           \
241                 else if (i > 0 && m > 0)                                      \
242                   ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00;            \
243                 else if (i > 0 && m == 0 && f == 0)                           \
244                   ch = init_to_ucs[i - 1];                                    \
245                 else if (i == 0 && m > 0 && f == 0)                           \
246                   ch = 0x314e + m;      /* 0x314f + m - 1 */                  \
247                 else if (__builtin_expect (i | m, 0) == 0                     \
248                          && __builtin_expect (f, 1) > 0)                      \
249                   ch = final_to_ucs[f - 1];     /* round trip?? */            \
250                 else                                                          \
251                   {                                                           \
252                     /* This is illegal.  */                                   \
253                     if (! ignore_errors_p ())                                 \
254                       {                                                       \
255                         /* This is an illegal character.  */                  \
256                         result = __GCONV_ILLEGAL_INPUT;                       \
257                         break;                                                \
258                       }                                                       \
259                                                                               \
260                     ++inptr;                                                  \
261                     ++*converted;                                             \
262                     continue;                                                 \
263                   }                                                           \
264               }                                                               \
265             else                                                              \
266               {                                                               \
267                 if (__builtin_expect (ch2, 0x31) < 0x31                       \
268                     || (__builtin_expect (ch2, 0x7e) > 0x7e && ch2 < 0x91)    \
269                     || __builtin_expect (ch2, 0) == 0xff                      \
270                     || (__builtin_expect (ch, 0) == 0xda                      \
271                         && ch2 > 0xa0 && ch2 < 0xd4))                         \
272                   {                                                           \
273                     /* This is illegal.  */                                   \
274                     if (! ignore_errors_p ())                                 \
275                       {                                                       \
276                         /* This is an illegal character.  */                  \
277                         result = __GCONV_ILLEGAL_INPUT;                       \
278                         break;                                                \
279                       }                                                       \
280                                                                               \
281                     ++inptr;                                                  \
282                     ++*converted;                                             \
283                     continue;                                                 \
284                   }                                                           \
285                 else                                                          \
286                   {                                                           \
287                     ch = johab_sym_hanja_to_ucs (idx, ch, ch2);               \
288                     /* if (idx <= 0xdefe)                                     \
289                          ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192          \
290                                                    + ch2 - (ch2 > 0x90        \
291                                                             ? 0x43 : 0x31)];  \
292                        else                                                   \
293                          ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192         \
294                                                      + ch2 -  (ch2 > 0x90     \
295                                                                ?0x43 : 0x31)];\
296                     */                                                        \
297                   }                                                           \
298               }                                                               \
299           }                                                                   \
300                                                                               \
301         if (__builtin_expect (ch, 1) == 0)                                    \
302           {                                                                   \
303             /* This is an illegal character.  */                              \
304             if (! ignore_errors_p ())                                         \
305               {                                                               \
306                 /* This is an illegal character.  */                          \
307                 result = __GCONV_ILLEGAL_INPUT;                               \
308                 break;                                                        \
309               }                                                               \
310                                                                               \
311             inptr += 2;                                                       \
312             ++*converted;                                                     \
313             continue;                                                         \
314           }                                                                   \
315                                                                               \
316         inptr += 2;                                                           \
317       }                                                                       \
318                                                                               \
319     put32 (outptr, ch);                                                       \
320     outptr += 4;                                                              \
321   }
322 #include <iconv/loop.c>
323
324
325 /* Next, define the other direction.  */
326 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
327 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
328 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
329 #define LOOPFCT                 TO_LOOP
330 #define BODY \
331   {                                                                           \
332     uint32_t ch = get32 (inptr);                                              \
333     /*                                                                        \
334        if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0])))      \
335          {                                                                    \
336            if (ch >= 0x0391 && ch <= 0x0451)                                  \
337              cp = from_ucs4_greek[ch - 0x391];                                \
338            else if (ch >= 0x2010 && ch <= 0x9fa0)                             \
339              cp = from_ucs4_cjk[ch - 0x02010];                                \
340            else                                                               \
341              break;                                                           \
342          }                                                                    \
343        else                                                                   \
344          cp = from_ucs4_lat1[ch];                                             \
345     */                                                                        \
346                                                                               \
347     if (ch < 0x7f)                                                            \
348       *outptr++ = ch;                                                         \
349     else                                                                      \
350       {                                                                       \
351         if (ch >= 0xac00 && ch <= 0xd7a3)                                     \
352           {                                                                   \
353             if (NEED_LENGTH_TEST && __builtin_expect (outptr + 2 > outend, 0))\
354               {                                                               \
355                 result = __GCONV_FULL_OUTPUT;                                 \
356                 break;                                                        \
357               }                                                               \
358                                                                               \
359             ch -= 0xac00;                                                     \
360                                                                               \
361             ch = (init_to_bit[ch / 588]   /* 21 * 28 = 588 */                 \
362                   + mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */     \
363                   + final_to_bit[ch %  28]);  /* (ch % (21 * 28)) % 28 */     \
364                                                                               \
365             *outptr++ = ch / 256;                                             \
366             *outptr++ = ch % 256;                                             \
367           }                                                                   \
368         /* KS C 5601-1992 Annex 3 regards  0xA4DA(Hangul Filler : U3164)      \
369            as symbol */                                                       \
370         else if (ch >= 0x3131 && ch <= 0x3163)                                \
371           {                                                                   \
372             ch = jamo_from_ucs_table[ch - 0x3131];                            \
373                                                                               \
374             if (NEED_LENGTH_TEST && __builtin_expect (outptr + 2 > outend, 0))\
375               {                                                               \
376                 result = __GCONV_FULL_OUTPUT;                                 \
377                 break;                                                        \
378               }                                                               \
379                                                                               \
380             *outptr++ = ch / 256;                                             \
381             *outptr++ = ch % 256;                                             \
382           }                                                                   \
383         else if ((ch >= 0x4e00 && ch <= 0x9fa5)                               \
384                  || (ch >= 0xf900 && ch <= 0xfa0b))                           \
385           {                                                                   \
386             size_t written;                                                   \
387             uint32_t temp;                                                    \
388                                                                               \
389             written = ucs4_to_ksc5601_hanja (ch, outptr,                      \
390                                              (NEED_LENGTH_TEST                \
391                                               ? outend - outptr : 2));        \
392             if (NEED_LENGTH_TEST && __builtin_expect (written, 1) == 0)       \
393               {                                                               \
394                 result = __GCONV_FULL_OUTPUT;                                 \
395                 break;                                                        \
396               }                                                               \
397             if (__builtin_expect (written, 0) == __UNKNOWN_10646_CHAR)        \
398               {                                                               \
399                 if (! ignore_errors_p ())                                     \
400                   {                                                           \
401                     /* This is an illegal character.  */                      \
402                     result = __GCONV_ILLEGAL_INPUT;                           \
403                     break;                                                    \
404                   }                                                           \
405                                                                               \
406                 inptr += 4;                                                   \
407                 ++*converted;                                                 \
408                 continue;                                                     \
409               }                                                               \
410                                                                               \
411             outptr[0] -= 0x4a;                                                \
412             outptr[1] -= 0x21;                                                \
413                                                                               \
414             temp = outptr[0] * 94 + outptr[1];                                \
415                                                                               \
416             outptr[0] = 0xe0 + temp / 188;                                    \
417             outptr[1] = temp % 188;                                           \
418             outptr[1] += outptr[1] >= 78 ? 0x43 : 0x31;                       \
419                                                                               \
420             outptr += 2;                                                      \
421           }                                                                   \
422         else                                                                  \
423           {                                                                   \
424             size_t written;                                                   \
425                                                                               \
426             written = ucs4_to_ksc5601_sym (ch, outptr,                        \
427                                            (NEED_LENGTH_TEST                  \
428                                             ? outend - outptr : 2));          \
429             if (NEED_LENGTH_TEST && __builtin_expect (written, 1) == 0)       \
430               {                                                               \
431                 result = __GCONV_FULL_OUTPUT;                                 \
432                 break;                                                        \
433               }                                                               \
434             if (__builtin_expect (written, 1) == __UNKNOWN_10646_CHAR)        \
435               {                                                               \
436                 if (! ignore_errors_p ())                                     \
437                   {                                                           \
438                     /* This is an illegal character.  */                      \
439                     result = __GCONV_ILLEGAL_INPUT;                           \
440                     break;                                                    \
441                   }                                                           \
442                                                                               \
443                 inptr += 4;                                                   \
444                 ++*converted;                                                 \
445                 continue;                                                     \
446               }                                                               \
447                                                                               \
448             outptr[0] -= 0x4a;                                                \
449             outptr[1] += 0x80;                                                \
450                                                                               \
451             outptr[1] += (outptr[0] % 2                                       \
452                           ? 0 : (outptr[1] > 0xee ? 0x43 : 0x31));            \
453             outptr[1] -= 0xa1;                                                \
454             outptr[0] /= 2;                                                   \
455             outptr[0] += 0xe0;                                                \
456                                                                               \
457             outptr += 2;                                                      \
458           }                                                                   \
459       }                                                                       \
460                                                                               \
461     inptr += 4;                                                               \
462   }
463 #include <iconv/loop.c>
464
465
466 /* Now define the toplevel functions.  */
467 #include <iconv/skeleton.c>