Use get16, get32, put16, and put32 instead of direct casting pointer
[kopensolaris-gnu/glibc.git] / iconvdata / johab.c
1 /* Mapping tables for JOHAB handling.
2    Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
5    and Ulrich Drepper <drepper@cygnus.com>, 1998.
6
7    The GNU C Library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Library General Public License as
9    published by the Free Software Foundation; either version 2 of the
10    License, or (at your option) any later version.
11
12    The GNU C Library is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    Library General Public License for more details.
16
17    You should have received a copy of the GNU Library General Public
18    License along with the GNU C Library; see the file COPYING.LIB.  If not,
19    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20    Boston, MA 02111-1307, USA.  */
21
22 #include <stdint.h>
23 #include <ksc5601.h>
24
25 /* The table for Bit pattern to Hangul Jamo
26    5 bits each are used to encode
27    leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
28    and trailing consonants(27 + 1 filler).
29
30    KS C 5601-1992 Annex 3 Table 2
31    0 : Filler, -1: invalid, >= 1 : valid
32
33  */
34 static const int init[32] =
35 {
36   -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
37   19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
38 };
39 static const int mid[32] =
40 {
41   -1, -1, 0, 1, 2, 3, 4, 5,
42   -1, -1, 6, 7, 8, 9, 10, 11,
43   -1, -1, 12, 13, 14, 15, 16, 17,
44   -1, -1, 18, 19, 20, 21, -1, -1
45 };
46 static const int final[32] =
47 {
48   -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49   -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
50 };
51
52 /*
53    Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
54    defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
55
56    It's to be considered later which Jamo block to use, Compatibility
57    block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
58
59  */
60 static const uint32_t init_to_ucs[19] =
61 {
62   0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
63   0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
64   0x314c, 0x314d, 0x314e
65 };
66
67 static const uint32_t final_to_ucs[31] =
68 {
69   L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
70   0x313a, 0x313b, 0x314c, 0x313d, 0x313e, 0x313f,
71   0x3140, L'\0', L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0',
72   L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
73 };
74
75 /* The following three arrays are used to convert
76    precomposed Hangul syllables in [0xac00,0xd???]
77    to Jamo bit patterns for Johab encoding
78
79    cf. : KS C 5601-1992, Annex3 Table 2
80
81    Arrays are used to speed up things although it's possible
82    to get the same result arithmetically.
83
84  */
85 static const int init_to_bit[19] =
86 {
87   0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
88   0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
89   0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
90   0xd000
91 };
92
93 static const int mid_to_bit[21] =
94 {
95           0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
96   0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
97   0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
98   0x0340, 0x0360, 0x0380, 0x03a0
99 };
100
101 static const int final_to_bit[28] =
102 {
103   1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
104   0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
105 };
106
107 /* The conversion table from
108    UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
109    to Johab
110
111    cf. 1. KS C 5601-1992 Annex 3 Table 2
112    2. Unicode 2.0 manual
113
114  */
115 static const uint16_t jamo_from_ucs_table[51] =
116 {
117   0x8841, 0x8c41,
118   0x8444,
119   0x9041,
120   0x8446, 0x8447,
121   0x9441, 0x9841, 0x9c41,
122   0x844a, 0x844b, 0x844c, 0x844d, 0x884e, 0x884f, 0x8450,
123   0xa041, 0xa441, 0xa841,
124   0x8454,
125   0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
126   0xc041, 0xc441, 0xc841, 0xca41, 0xd041,
127   0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
128   0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
129   0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
130   0x8741, 0x8761, 0x8781, 0x87a1
131 };
132
133
134 static inline uint32_t
135 johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
136 {
137   if (idx <= 0xdefe)
138     return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2
139                                            - (c2 > 0x90 ? 0x43 : 0x31)];
140   else
141     return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
142                                              - (c2 > 0x90 ? 0x43 : 0x31)];
143 }
144 /* Definitions used in the body of the `gconv' function.  */
145 #define CHARSET_NAME            "JOHAB//"
146 #define FROM_LOOP               from_johab
147 #define TO_LOOP                 to_johab
148 #define DEFINE_INIT             1
149 #define DEFINE_FINI             1
150 #define MIN_NEEDED_FROM         1
151 #define MAX_NEEDED_FROM         2
152 #define MIN_NEEDED_TO           4
153
154
155 /* First define the conversion function from JOHAB to UCS4.  */
156 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
157 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
158 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
159 #define LOOPFCT                 FROM_LOOP
160 #define BODY \
161   {                                                                           \
162     uint32_t ch = *inptr;                                                     \
163                                                                               \
164     /* half-width Korean Currency WON sign                                    \
165        if (ch == 0x5c)                                                        \
166          ch =  0x20a9;                                                        \
167        else if (ch < 0x7f)                                                    \
168          ch = (uint32_t) ch;                                                  \
169     */                                                                        \
170     if (ch < 0x7f)                                                            \
171       /* Plain ASCII.  */                                                     \
172       ++inptr;                                                                \
173     /* Johab : 1. Hangul                                                      \
174        1st byte : 0x84-0xd3                                                   \
175        2nd byte : 0x41-0x7e, 0x81-0xfe                                        \
176        2. Hanja & Symbol  :                                                   \
177        1st byte : 0xd8-0xde, 0xe0-0xf9                                        \
178        2nd byte : 0x31-0x7e, 0x91-0xfe                                        \
179        0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */               \
180     else                                                                      \
181       {                                                                       \
182         if (ch > 0xf9 || ch == 0xdf || (ch > 0x7e && ch < 0x84)               \
183             || (ch > 0xd3 && ch < 0xd9))                                      \
184           {                                                                   \
185             /* These are illegal.  */                                         \
186             result = __GCONV_ILLEGAL_INPUT;                                   \
187             break;                                                            \
188           }                                                                   \
189         else                                                                  \
190           {                                                                   \
191             /* Two-byte character.  First test whether the next               \
192                character is also available.  */                               \
193             uint32_t ch2;                                                     \
194             uint_fast32_t idx;                                                \
195                                                                               \
196             if (NEED_LENGTH_TEST && inptr + 1 >= inend)                       \
197               {                                                               \
198                 /* The second character is not available.  Store the          \
199                    intermediate result.  */                                   \
200                 result = __GCONV_INCOMPLETE_INPUT;                            \
201                 break;                                                        \
202               }                                                               \
203                                                                               \
204             ch2 = inptr[1];                                                   \
205             idx = ch * 256 + ch2;                                             \
206             if (ch <= 0xd3)                                                   \
207               {                                                               \
208                 /* Hangul */                                                  \
209                 uint_fast32_t i, m, f;                                        \
210                                                                               \
211                 i = init[(idx & 0x7c00) >> 10];                               \
212                 m = mid[(idx & 0x03e0) >> 5];                                 \
213                 f = final[idx & 0x001f];                                      \
214                                                                               \
215                 if (i == -1 || m == -1 || f == -1)                            \
216                   {                                                           \
217                     /* This is illegal.  */                                   \
218                     result = __GCONV_ILLEGAL_INPUT;                           \
219                     break;                                                    \
220                   }                                                           \
221                 else if (i > 0 && m > 0)                                      \
222                   ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00;            \
223                 else if (i > 0 && m == 0 && f == 0)                           \
224                   ch = init_to_ucs[i - 1];                                    \
225                 else if (i == 0 && m > 0 && f == 0)                           \
226                   ch = 0x314e + m;      /* 0x314f + m - 1 */                  \
227                 else if (i == 0 && m == 0 && f > 0)                           \
228                   ch = final_to_ucs[f - 1];     /* round trip?? */            \
229                 else                                                          \
230                   {                                                           \
231                     /* This is illegal.  */                                   \
232                     result = __GCONV_ILLEGAL_INPUT;                           \
233                     break;                                                    \
234                   }                                                           \
235               }                                                               \
236             else                                                              \
237               {                                                               \
238                 if (ch2 < 0x31 || (ch2 > 0x7e && ch2 < 0x91) || ch2 == 0xff)  \
239                   {                                                           \
240                     /* This is illegal.  */                                   \
241                     result = __GCONV_ILLEGAL_INPUT;                           \
242                     break;                                                    \
243                   }                                                           \
244                 else if (ch == 0xda && ch2 > 0xa0 && ch2 < 0xd4)              \
245                   {                                                           \
246                     /* This is illegal.  Modern Hangul Jaso is defined        \
247                        elsewhere in Johab */                                  \
248                     result = __GCONV_ILLEGAL_INPUT;                           \
249                     break;                                                    \
250                   }                                                           \
251                 else                                                          \
252                   {                                                           \
253                     ch = johab_sym_hanja_to_ucs (idx, ch, ch2);               \
254                     /* if (idx <= 0xdefe)                                     \
255                          ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192          \
256                                                    + ch2 - (ch2 > 0x90        \
257                                                             ? 0x43 : 0x31)];  \
258                        else                                                   \
259                          ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192         \
260                                                      + ch2 -  (ch2 > 0x90     \
261                                                                ?0x43 : 0x31)];\
262                     */                                                        \
263                   }                                                           \
264               }                                                               \
265           }                                                                   \
266                                                                               \
267         if (ch == 0)                                                          \
268           {                                                                   \
269             /* This is an illegal character.  */                              \
270             result = __GCONV_ILLEGAL_INPUT;                                   \
271             break;                                                            \
272           }                                                                   \
273                                                                               \
274         inptr += 2;                                                           \
275       }                                                                       \
276                                                                               \
277     put32 (outptr, ch);                                                       \
278     outptr += 4;                                                              \
279   }
280 #include <iconv/loop.c>
281
282
283 /* Next, define the other direction.  */
284 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
285 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
286 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
287 #define LOOPFCT                 TO_LOOP
288 #define BODY \
289   {                                                                           \
290     uint32_t ch = get32 (inptr);                                              \
291     /*                                                                        \
292        if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0])))      \
293          {                                                                    \
294            if (ch >= 0x0391 && ch <= 0x0451)                                  \
295              cp = from_ucs4_greek[ch - 0x391];                                \
296            else if (ch >= 0x2010 && ch <= 0x9fa0)                             \
297              cp = from_ucs4_cjk[ch - 0x02010];                                \
298            else                                                               \
299              break;                                                           \
300          }                                                                    \
301        else                                                                   \
302          cp = from_ucs4_lat1[ch];                                             \
303     */                                                                        \
304                                                                               \
305     if (ch < 0x7f)                                                            \
306       *outptr++ = ch;                                                         \
307     else                                                                      \
308       {                                                                       \
309         if (ch >= 0xac00 && ch <= 0xd7a3)                                     \
310           {                                                                   \
311             if (NEED_LENGTH_TEST && outptr + 2 > outend)                      \
312               {                                                               \
313                 result = __GCONV_FULL_OUTPUT;                                 \
314                 break;                                                        \
315               }                                                               \
316                                                                               \
317             ch -= 0xac00;                                                     \
318                                                                               \
319             ch = (init_to_bit[ch / 588]   /* 21 * 28 = 588 */                 \
320                   + mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */     \
321                   + final_to_bit[ch %  28]);  /* (ch % (21 * 28)) % 28 */     \
322                                                                               \
323             *outptr++ = ch / 256;                                             \
324             *outptr++ = ch % 256;                                             \
325           }                                                                   \
326         /* KS C 5601-1992 Annex 3 regards  0xA4DA(Hangul Filler : U3164)      \
327            as symbol */                                                       \
328         else if (ch >= 0x3131 && ch <= 0x3163)                                \
329           {                                                                   \
330             ch = jamo_from_ucs_table[ch - 0x3131];                            \
331                                                                               \
332             if (NEED_LENGTH_TEST && outptr + 2 > outend)                      \
333               {                                                               \
334                 result = __GCONV_FULL_OUTPUT;                                 \
335                 break;                                                        \
336               }                                                               \
337                                                                               \
338             *outptr++ = ch / 256;                                             \
339             *outptr++ = ch % 256;                                             \
340           }                                                                   \
341         else if ((ch >= 0x4e00 && ch <= 0x9fa5)                               \
342                  || (ch >= 0xf900 && ch <= 0xfa0b))                           \
343           {                                                                   \
344             size_t written;                                                   \
345             uint32_t temp;                                                    \
346                                                                               \
347             written = ucs4_to_ksc5601_hanja (ch, outptr,                      \
348                                              (NEED_LENGTH_TEST                \
349                                               ? outend - outptr : 2));        \
350             if (NEED_LENGTH_TEST && written == 0)                             \
351               {                                                               \
352                 result = __GCONV_FULL_OUTPUT;                                 \
353                 break;                                                        \
354               }                                                               \
355             if (written == __UNKNOWN_10646_CHAR)                              \
356               {                                                               \
357                 result = __GCONV_ILLEGAL_INPUT;                               \
358                 break;                                                        \
359               }                                                               \
360                                                                               \
361             outptr[0] -= 0x4a;                                                \
362             outptr[1] -= 0x21;                                                \
363                                                                               \
364             temp = outptr[0] * 94 + outptr[1];                                \
365                                                                               \
366             outptr[0] = 0xe0 + temp / 188;                                    \
367             outptr[1] = temp % 188;                                           \
368             outptr[1] += outptr[1] >= 78 ? 0x43 : 0x31;                       \
369                                                                               \
370             outptr += 2;                                                      \
371           }                                                                   \
372         else                                                                  \
373           {                                                                   \
374             size_t written;                                                   \
375                                                                               \
376             written = ucs4_to_ksc5601_sym (ch, outptr,                        \
377                                            (NEED_LENGTH_TEST                  \
378                                             ? outend - outptr : 2));          \
379             if (NEED_LENGTH_TEST && written == 0)                             \
380               {                                                               \
381                 result = __GCONV_FULL_OUTPUT;                                 \
382                 break;                                                        \
383               }                                                               \
384             if (written == __UNKNOWN_10646_CHAR)                              \
385               {                                                               \
386                 result = __GCONV_ILLEGAL_INPUT;                               \
387                 break;                                                        \
388               }                                                               \
389                                                                               \
390             outptr[0] -= 0x4a;                                                \
391             outptr[1] += 0x80;                                                \
392                                                                               \
393             outptr[1] += (outptr[0] % 2                                       \
394                           ? 0 : (outptr[1] > 0xee ? 0x43 : 0x31));            \
395             outptr[1] -= 0xa1;                                                \
396             outptr[0] /= 2;                                                   \
397             outptr[0] += 0xe0;                                                \
398                                                                               \
399             outptr += 2;                                                      \
400           }                                                                   \
401       }                                                                       \
402                                                                               \
403     inptr += 4;                                                               \
404   }
405 #include <iconv/loop.c>
406
407
408 /* Now define the toplevel functions.  */
409 #include <iconv/skeleton.c>