ec792009f045492053fb6847603b01e869e0aa36
[kopensolaris-gnu/glibc.git] / iconvdata / shift_jisx0213.c
1 /* Conversion from and to Shift_JISX0213.
2    Copyright (C) 2002 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Bruno Haible <bruno@clisp.org>, 2002.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, write to the Free
18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19    02111-1307 USA.  */
20
21 #include <dlfcn.h>
22 #include <stdint.h>
23 #include <gconv.h>
24
25 /* The structure of Shift_JISX0213 is as follows:
26
27    0x00..0x7F: ISO646-JP, an ASCII variant
28
29    0x{A1..DF}: JISX0201 Katakana.
30
31    0x{81..9F,E0..EF}{40..7E,80..FC}: JISX0213 plane 1.
32
33    0x{F0..FC}{40..7E,80..FC}: JISX0213 plane 2, with irregular row mapping.
34
35    Note that some JISX0213 characters are not contained in Unicode 3.2
36    and are therefore best represented as sequences of Unicode characters.
37 */
38
39 #include "jisx0213.h"
40
41 /* Definitions used in the body of the `gconv' function.  */
42 #define CHARSET_NAME            "SHIFT_JISX0213//"
43 #define FROM_LOOP               from_shift_jisx0213
44 #define TO_LOOP                 to_shift_jisx0213
45 #define DEFINE_INIT             1
46 #define DEFINE_FINI             1
47 #define MIN_NEEDED_FROM         1
48 #define MAX_NEEDED_FROM         2
49 #define MIN_NEEDED_TO           4
50 #define PREPARE_LOOP \
51   int saved_state;                                                            \
52   int *statep = &data->__statep->__count;
53 #define EXTRA_LOOP_ARGS         , statep
54
55
56 /* Since we might have to reset input pointer we must be able to save
57    and restore the state.  */
58 #define SAVE_RESET_STATE(Save) \
59   if (Save)                                                                   \
60     saved_state = *statep;                                                    \
61   else                                                                        \
62     *statep = saved_state
63
64
65 /* During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state
66    contains the last two bytes to be output, shifted by 3 bits.  */
67
68 /* Since this is a stateful encoding we have to provide code which resets
69    the output state to the initial state.  This has to be done during the
70    flushing.  */
71 #define EMIT_SHIFT_TO_INIT \
72   if (data->__statep->__count != 0)                                           \
73     {                                                                         \
74       if (FROM_DIRECTION)                                                     \
75         /* We don't use shift states in the FROM_DIRECTION.  */               \
76         data->__statep->__count = 0;                                          \
77       else                                                                    \
78         {                                                                     \
79           if (__builtin_expect (outbuf + 2 <= outend, 1))                     \
80             {                                                                 \
81               /* Write out the last character.  */                            \
82               uint32_t lasttwo = data->__statep->__count >> 3;                \
83               *outbuf++ = (lasttwo >> 8) & 0xff;                              \
84               *outbuf++ = lasttwo & 0xff;                                     \
85               data->__statep->__count = 0;                                    \
86             }                                                                 \
87           else                                                                \
88             /* We don't have enough room in the output buffer.  */            \
89             status = __GCONV_FULL_OUTPUT;                                     \
90         }                                                                     \
91     }
92
93
94 /* First define the conversion function from Shift_JISX0213 to UCS-4.  */
95 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
96 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
97 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
98 #define LOOPFCT                 FROM_LOOP
99 #define BODY \
100   {                                                                           \
101     uint32_t ch = *inptr;                                                     \
102                                                                               \
103     if (ch < 0x80)                                                            \
104       {                                                                       \
105         /* Plain ISO646-JP character.  */                                     \
106         if (__builtin_expect (ch == 0x5c, 0))                                 \
107           ch = 0xa5;                                                          \
108         else if (__builtin_expect (ch == 0x7e, 0))                            \
109           ch = 0x203e;                                                        \
110         ++inptr;                                                              \
111       }                                                                       \
112     else if (ch >= 0xa1 && ch <= 0xdf)                                        \
113       {                                                                       \
114         /* Half-width katakana.  */                                           \
115         ch += 0xfec0;                                                         \
116         ++inptr;                                                              \
117       }                                                                       \
118     else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc))        \
119       {                                                                       \
120         /* Two byte character.  */                                            \
121         uint32_t ch2;                                                         \
122                                                                               \
123         if (__builtin_expect (inptr + 1 >= inend, 0))                         \
124           {                                                                   \
125             /* The second byte is not available.  */                          \
126             result = __GCONV_INCOMPLETE_INPUT;                                \
127             break;                                                            \
128           }                                                                   \
129                                                                               \
130         ch2 = inptr[1];                                                       \
131                                                                               \
132         /* The second byte must be in the range 0x{40..7E,80..FC}.  */        \
133         if (__builtin_expect (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc, 0))    \
134           {                                                                   \
135             /* This is an illegal character.  */                              \
136             if (! ignore_errors_p ())                                         \
137               {                                                               \
138                 result = __GCONV_ILLEGAL_INPUT;                               \
139                 break;                                                        \
140               }                                                               \
141                                                                               \
142             ++inptr;                                                          \
143             ++*irreversible;                                                  \
144             break;                                                            \
145           }                                                                   \
146                                                                               \
147         /* Convert to row and column.  */                                     \
148         if (ch < 0xe0)                                                        \
149           ch -= 0x81;                                                         \
150         else                                                                  \
151           ch -= 0xc1;                                                         \
152         if (ch2 < 0x80)                                                       \
153           ch2 -= 0x40;                                                        \
154         else                                                                  \
155           ch2 -= 0x41;                                                        \
156         /* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb.  */                         \
157         ch = 2 * ch;                                                          \
158         if (ch2 >= 0x5e)                                                      \
159           ch2 -= 0x5e, ch++;                                                  \
160         ch2 += 0x21;                                                          \
161         if (ch >= 0x5e)                                                       \
162           {                                                                   \
163             /* Handling of JISX 0213 plane 2 rows.  */                        \
164             if (ch >= 0x67)                                                   \
165               ch += 230;                                                      \
166             else if (ch >= 0x63 || ch == 0x5f)                                \
167               ch += 168;                                                      \
168             else                                                              \
169               ch += 162;                                                      \
170           }                                                                   \
171                                                                               \
172         ch = jisx0213_to_ucs4 (0x121 + ch, ch2);                              \
173                                                                               \
174         if (ch == 0)                                                          \
175           {                                                                   \
176             /* This is an illegal character.  */                              \
177             if (! ignore_errors_p ())                                         \
178               {                                                               \
179                 result = __GCONV_ILLEGAL_INPUT;                               \
180                 break;                                                        \
181              }                                                                \
182                                                                               \
183             ++inptr;                                                          \
184             ++*irreversible;                                                  \
185             break;                                                            \
186           }                                                                   \
187                                                                               \
188         inptr += 2;                                                           \
189                                                                               \
190         if (ch < 0x80)                                                        \
191           {                                                                   \
192             /* It's a combining character.  */                                \
193             uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];             \
194             uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];             \
195                                                                               \
196             /* See whether we have room for two characters.  */               \
197             if (outptr + 8 <= outend)                                         \
198               {                                                               \
199                 put32 (outptr, u1);                                           \
200                 outptr += 4;                                                  \
201                 put32 (outptr, u2);                                           \
202                 outptr += 4;                                                  \
203                 continue;                                                     \
204               }                                                               \
205             else                                                              \
206               {                                                               \
207                 result = __GCONV_FULL_OUTPUT;                                 \
208                 break;                                                        \
209               }                                                               \
210           }                                                                   \
211       }                                                                       \
212     else                                                                      \
213       {                                                                       \
214         /* This is illegal.  */                                               \
215         if (! ignore_errors_p ())                                             \
216           {                                                                   \
217             result = __GCONV_ILLEGAL_INPUT;                                   \
218             break;                                                            \
219           }                                                                   \
220                                                                               \
221         ++inptr;                                                              \
222         ++*irreversible;                                                      \
223         continue;                                                             \
224       }                                                                       \
225                                                                               \
226     put32 (outptr, ch);                                                       \
227     outptr += 4;                                                              \
228   }
229 #define LOOP_NEED_FLAGS
230 #define EXTRA_LOOP_DECLS        , int *statep
231 #include <iconv/loop.c>
232
233
234 /* Next, define the other direction, from UCS-4 to Shift_JISX0213.  */
235
236 /* Composition tables for each of the relevant combining characters.  */
237 static const struct
238 {
239   uint16_t base;
240   uint16_t composed;
241 } comp_table_data[] =
242 {
243 #define COMP_TABLE_IDX_02E5 0
244 #define COMP_TABLE_LEN_02E5 1
245   { 0x8684, 0x8685 }, /* 0x12B65 = 0x12B64 U+02E5 */
246 #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5)
247 #define COMP_TABLE_LEN_02E9 1
248   { 0x8680, 0x8686 }, /* 0x12B66 = 0x12B60 U+02E9 */
249 #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9)
250 #define COMP_TABLE_LEN_0300 5
251   { 0x857b, 0x8663 }, /* 0x12B44 = 0x1295C U+0300 */
252   { 0x8657, 0x8667 }, /* 0x12B48 = 0x12B38 U+0300 */
253   { 0x8656, 0x8669 }, /* 0x12B4A = 0x12B37 U+0300 */
254   { 0x864f, 0x866b }, /* 0x12B4C = 0x12B30 U+0300 */
255   { 0x8662, 0x866d }, /* 0x12B4E = 0x12B43 U+0300 */
256 #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
257 #define COMP_TABLE_LEN_0301 4
258   { 0x8657, 0x8668 }, /* 0x12B49 = 0x12B38 U+0301 */
259   { 0x8656, 0x866a }, /* 0x12B4B = 0x12B37 U+0301 */
260   { 0x864f, 0x866c }, /* 0x12B4D = 0x12B30 U+0301 */
261   { 0x8662, 0x866e }, /* 0x12B4F = 0x12B43 U+0301 */
262 #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301)
263 #define COMP_TABLE_LEN_309A 14
264   { 0x82a9, 0x82f5 }, /* 0x12477 = 0x1242B U+309A */
265   { 0x82ab, 0x82f6 }, /* 0x12478 = 0x1242D U+309A */
266   { 0x82ad, 0x82f7 }, /* 0x12479 = 0x1242F U+309A */
267   { 0x82af, 0x82f8 }, /* 0x1247A = 0x12431 U+309A */
268   { 0x82b1, 0x82f9 }, /* 0x1247B = 0x12433 U+309A */
269   { 0x834a, 0x8397 }, /* 0x12577 = 0x1252B U+309A */
270   { 0x834c, 0x8398 }, /* 0x12578 = 0x1252D U+309A */
271   { 0x834e, 0x8399 }, /* 0x12579 = 0x1252F U+309A */
272   { 0x8350, 0x839a }, /* 0x1257A = 0x12531 U+309A */
273   { 0x8352, 0x839b }, /* 0x1257B = 0x12533 U+309A */
274   { 0x835a, 0x839c }, /* 0x1257C = 0x1253B U+309A */
275   { 0x8363, 0x839d }, /* 0x1257D = 0x12544 U+309A */
276   { 0x8367, 0x839e }, /* 0x1257E = 0x12548 U+309A */
277   { 0x83f3, 0x83f6 }, /* 0x12678 = 0x12675 U+309A */
278 };
279
280 #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
281 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
282 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
283 #define LOOPFCT                 TO_LOOP
284 #define BODY \
285   {                                                                           \
286     uint32_t ch = get32 (inptr);                                              \
287                                                                               \
288     if ((*statep >> 3) != 0)                                                  \
289       {                                                                       \
290         /* Attempt to combine the last character with this one.  */           \
291         uint16_t lasttwo = *statep >> 3;                                      \
292         unsigned int idx;                                                     \
293         unsigned int len;                                                     \
294                                                                               \
295         if (ch == 0x02e5)                                                     \
296           idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5;               \
297         else if (ch == 0x02e9)                                                \
298           idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9;               \
299         else if (ch == 0x0300)                                                \
300           idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300;               \
301         else if (ch == 0x0301)                                                \
302           idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301;               \
303         else if (ch == 0x309a)                                                \
304           idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A;               \
305         else                                                                  \
306           goto not_combining;                                                 \
307                                                                               \
308         do                                                                    \
309           if (comp_table_data[idx].base == lasttwo)                           \
310             break;                                                            \
311         while (++idx, --len > 0);                                             \
312                                                                               \
313         if (len > 0)                                                          \
314           {                                                                   \
315             /* Output the combined character.  */                             \
316             if (__builtin_expect (outptr + 1 >= outend, 0))                   \
317               {                                                               \
318                 result = __GCONV_FULL_OUTPUT;                                 \
319                 break;                                                        \
320               }                                                               \
321             lasttwo = comp_table_data[idx].composed;                          \
322             *outptr++ = (lasttwo >> 8) & 0xff;                                \
323             *outptr++ = lasttwo & 0xff;                                       \
324             *statep = 0;                                                      \
325             inptr += 4;                                                       \
326             continue;                                                         \
327           }                                                                   \
328                                                                               \
329       not_combining:                                                          \
330         /* Output the buffered character.  */                                 \
331         if (__builtin_expect (outptr + 1 >= outend, 0))                       \
332           {                                                                   \
333             result = __GCONV_FULL_OUTPUT;                                     \
334             break;                                                            \
335           }                                                                   \
336         *outptr++ = (lasttwo >> 8) & 0xff;                                    \
337         *outptr++ = lasttwo & 0xff;                                           \
338         *statep = 0;                                                          \
339         continue;                                                             \
340       }                                                                       \
341                                                                               \
342     if (ch < 0x80)                                                            \
343       /* Plain ISO646-JP character.  */                                       \
344       *outptr++ = ch;                                                         \
345     else if (ch == 0xa5)                                                      \
346       *outptr++ = 0x5c;                                                       \
347     else if (ch == 0x203e)                                                    \
348       *outptr++ = 0x7e;                                                       \
349     else if (ch >= 0xff61 && ch <= 0xff9f)                                    \
350       /* Half-width katakana.  */                                             \
351       *outptr++ = ch - 0xfec0;                                                \
352     else                                                                      \
353       {                                                                       \
354         unsigned int s1, s2;                                                  \
355         uint32_t jch = ucs4_to_jisx0213 (ch);                                 \
356         if (jch == 0)                                                         \
357           {                                                                   \
358             UNICODE_TAG_HANDLER (ch, 4);                                      \
359                                                                               \
360             /* Illegal character.  */                                         \
361             STANDARD_ERR_HANDLER (4);                                         \
362           }                                                                   \
363                                                                               \
364         /* Convert it to shifted representation.  */                          \
365         s1 = jch >> 8;                                                        \
366         s2 = jch & 0x7f;                                                              \
367         s1 -= 0x21;                                                           \
368         s2 -= 0x21;                                                           \
369         if (s1 >= 0x5e)                                                       \
370           {                                                                   \
371             /* Handling of JISX 0213 plane 2 rows.  */                        \
372             if (s1 >= 0xcd) /* rows 0x26E..0x27E */                           \
373               s1 -= 102;                                                      \
374             else if (s1 >= 0x8b || s1 == 0x87) /* rows 0x228, 0x22C..0x22F */ \
375               s1 -= 40;                                                       \
376             else /* rows 0x221, 0x223..0x225 */                               \
377               s1 -= 34;                                                       \
378             /* Now 0x5e <= s1 <= 0x77.  */                                    \
379           }                                                                   \
380         if (s1 & 1)                                                           \
381           s2 += 0x5e;                                                         \
382         s1 = s1 >> 1;                                                         \
383         if (s1 < 0x1f)                                                        \
384           s1 += 0x81;                                                         \
385         else                                                                  \
386           s1 += 0xc1;                                                         \
387         if (s2 < 0x3f)                                                        \
388           s2 += 0x40;                                                         \
389         else                                                                  \
390           s2 += 0x41;                                                         \
391                                                                               \
392         if (jch & 0x0080)                                                     \
393           {                                                                   \
394             /* A possible match in comp_table_data.  We have to buffer it.  */\
395                                                                               \
396             /* We know it's a JISX 0213 plane 1 character.  */                \
397             assert ((jch & 0x8000) == 0);                                     \
398                                                                               \
399             *statep = ((s1 << 8) | s2) << 3;                                  \
400             inptr += 4;                                                       \
401             continue;                                                         \
402           }                                                                   \
403                                                                               \
404         /* Output the shifted representation.  */                             \
405         if (__builtin_expect (outptr + 1 >= outend, 0))                       \
406           {                                                                   \
407             result = __GCONV_FULL_OUTPUT;                                     \
408             break;                                                            \
409           }                                                                   \
410         *outptr++ = s1;                                                       \
411         *outptr++ = s2;                                                       \
412       }                                                                       \
413                                                                               \
414     inptr += 4;                                                               \
415   }
416 #define LOOP_NEED_FLAGS
417 #define EXTRA_LOOP_DECLS        , int *statep
418 #include <iconv/loop.c>
419
420
421 /* Now define the toplevel functions.  */
422 #include <iconv/skeleton.c>