3a627245e33573491a07225d41c78b70c0035542
[kopensolaris-gnu/glibc.git] / iconvdata / shift_jisx0213.c
1 /* Conversion from and to Shift_JISX0213.
2    Copyright (C) 2002 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Bruno Haible <bruno@clisp.org>, 2002.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, write to the Free
18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19    02111-1307 USA.  */
20
21 #include <dlfcn.h>
22 #include <stdint.h>
23 #include <gconv.h>
24
25 /* The structure of Shift_JISX0213 is as follows:
26
27    0x00..0x7F: ISO646-JP, an ASCII variant
28
29    0x{A1..DF}: JISX0201 Katakana.
30
31    0x{81..9F,E0..EF}{40..7E,80..FC}: JISX0213 plane 1.
32
33    0x{F0..FC}{40..7E,80..FC}: JISX0213 plane 2, with irregular row mapping.
34
35    Note that some JISX0213 characters are not contained in Unicode 3.2
36    and are therefore best represented as sequences of Unicode characters.
37 */
38
39 #include "jisx0213.h"
40
41 /* Definitions used in the body of the `gconv' function.  */
42 #define CHARSET_NAME            "SHIFT_JISX0213//"
43 #define FROM_LOOP               from_shift_jisx0213
44 #define TO_LOOP                 to_shift_jisx0213
45 #define DEFINE_INIT             1
46 #define DEFINE_FINI             1
47 #define FROM_LOOP_MIN_NEEDED_FROM       1
48 #define FROM_LOOP_MAX_NEEDED_FROM       2
49 #define FROM_LOOP_MIN_NEEDED_TO         4
50 #define FROM_LOOP_MAX_NEEDED_TO         8
51 #define TO_LOOP_MIN_NEEDED_FROM         4
52 #define TO_LOOP_MAX_NEEDED_FROM         4
53 #define TO_LOOP_MIN_NEEDED_TO           1
54 #define TO_LOOP_MAX_NEEDED_TO           2
55 #define PREPARE_LOOP \
56   int saved_state;                                                            \
57   int *statep = &data->__statep->__count;
58 #define EXTRA_LOOP_ARGS         , statep
59
60
61 /* Since we might have to reset input pointer we must be able to save
62    and restore the state.  */
63 #define SAVE_RESET_STATE(Save) \
64   if (Save)                                                                   \
65     saved_state = *statep;                                                    \
66   else                                                                        \
67     *statep = saved_state
68
69
70 /* During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state
71    contains the last two bytes to be output, shifted by 3 bits.  */
72
73 /* Since this is a stateful encoding we have to provide code which resets
74    the output state to the initial state.  This has to be done during the
75    flushing.  */
76 #define EMIT_SHIFT_TO_INIT \
77   if (data->__statep->__count != 0)                                           \
78     {                                                                         \
79       if (FROM_DIRECTION)                                                     \
80         /* We don't use shift states in the FROM_DIRECTION.  */               \
81         data->__statep->__count = 0;                                          \
82       else                                                                    \
83         {                                                                     \
84           if (__builtin_expect (outbuf + 2 <= outend, 1))                     \
85             {                                                                 \
86               /* Write out the last character.  */                            \
87               uint32_t lasttwo = data->__statep->__count >> 3;                \
88               *outbuf++ = (lasttwo >> 8) & 0xff;                              \
89               *outbuf++ = lasttwo & 0xff;                                     \
90               data->__statep->__count = 0;                                    \
91             }                                                                 \
92           else                                                                \
93             /* We don't have enough room in the output buffer.  */            \
94             status = __GCONV_FULL_OUTPUT;                                     \
95         }                                                                     \
96     }
97
98
99 /* First define the conversion function from Shift_JISX0213 to UCS-4.  */
100 #define MIN_NEEDED_INPUT        FROM_LOOP_MIN_NEEDED_FROM
101 #define MAX_NEEDED_INPUT        FROM_LOOP_MAX_NEEDED_FROM
102 #define MIN_NEEDED_OUTPUT       FROM_LOOP_MIN_NEEDED_TO
103 #define MAX_NEEDED_OUTPUT       FROM_LOOP_MAX_NEEDED_TO
104 #define LOOPFCT                 FROM_LOOP
105 #define BODY \
106   {                                                                           \
107     uint32_t ch = *inptr;                                                     \
108                                                                               \
109     if (ch < 0x80)                                                            \
110       {                                                                       \
111         /* Plain ISO646-JP character.  */                                     \
112         if (__builtin_expect (ch == 0x5c, 0))                                 \
113           ch = 0xa5;                                                          \
114         else if (__builtin_expect (ch == 0x7e, 0))                            \
115           ch = 0x203e;                                                        \
116         ++inptr;                                                              \
117       }                                                                       \
118     else if (ch >= 0xa1 && ch <= 0xdf)                                        \
119       {                                                                       \
120         /* Half-width katakana.  */                                           \
121         ch += 0xfec0;                                                         \
122         ++inptr;                                                              \
123       }                                                                       \
124     else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc))        \
125       {                                                                       \
126         /* Two byte character.  */                                            \
127         uint32_t ch2;                                                         \
128                                                                               \
129         if (__builtin_expect (inptr + 1 >= inend, 0))                         \
130           {                                                                   \
131             /* The second byte is not available.  */                          \
132             result = __GCONV_INCOMPLETE_INPUT;                                \
133             break;                                                            \
134           }                                                                   \
135                                                                               \
136         ch2 = inptr[1];                                                       \
137                                                                               \
138         /* The second byte must be in the range 0x{40..7E,80..FC}.  */        \
139         if (__builtin_expect (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc, 0))    \
140           {                                                                   \
141             /* This is an illegal character.  */                              \
142             if (! ignore_errors_p ())                                         \
143               {                                                               \
144                 result = __GCONV_ILLEGAL_INPUT;                               \
145                 break;                                                        \
146               }                                                               \
147                                                                               \
148             ++inptr;                                                          \
149             ++*irreversible;                                                  \
150             break;                                                            \
151           }                                                                   \
152                                                                               \
153         /* Convert to row and column.  */                                     \
154         if (ch < 0xe0)                                                        \
155           ch -= 0x81;                                                         \
156         else                                                                  \
157           ch -= 0xc1;                                                         \
158         if (ch2 < 0x80)                                                       \
159           ch2 -= 0x40;                                                        \
160         else                                                                  \
161           ch2 -= 0x41;                                                        \
162         /* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb.  */                         \
163         ch = 2 * ch;                                                          \
164         if (ch2 >= 0x5e)                                                      \
165           ch2 -= 0x5e, ch++;                                                  \
166         ch2 += 0x21;                                                          \
167         if (ch >= 0x5e)                                                       \
168           {                                                                   \
169             /* Handling of JISX 0213 plane 2 rows.  */                        \
170             if (ch >= 0x67)                                                   \
171               ch += 230;                                                      \
172             else if (ch >= 0x63 || ch == 0x5f)                                \
173               ch += 168;                                                      \
174             else                                                              \
175               ch += 162;                                                      \
176           }                                                                   \
177                                                                               \
178         ch = jisx0213_to_ucs4 (0x121 + ch, ch2);                              \
179                                                                               \
180         if (ch == 0)                                                          \
181           {                                                                   \
182             /* This is an illegal character.  */                              \
183             if (! ignore_errors_p ())                                         \
184               {                                                               \
185                 result = __GCONV_ILLEGAL_INPUT;                               \
186                 break;                                                        \
187              }                                                                \
188                                                                               \
189             ++inptr;                                                          \
190             ++*irreversible;                                                  \
191             break;                                                            \
192           }                                                                   \
193                                                                               \
194         inptr += 2;                                                           \
195                                                                               \
196         if (ch < 0x80)                                                        \
197           {                                                                   \
198             /* It's a combining character.  */                                \
199             uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];             \
200             uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];             \
201                                                                               \
202             /* See whether we have room for two characters.  */               \
203             if (outptr + 8 <= outend)                                         \
204               {                                                               \
205                 put32 (outptr, u1);                                           \
206                 outptr += 4;                                                  \
207                 put32 (outptr, u2);                                           \
208                 outptr += 4;                                                  \
209                 continue;                                                     \
210               }                                                               \
211             else                                                              \
212               {                                                               \
213                 result = __GCONV_FULL_OUTPUT;                                 \
214                 break;                                                        \
215               }                                                               \
216           }                                                                   \
217       }                                                                       \
218     else                                                                      \
219       {                                                                       \
220         /* This is illegal.  */                                               \
221         if (! ignore_errors_p ())                                             \
222           {                                                                   \
223             result = __GCONV_ILLEGAL_INPUT;                                   \
224             break;                                                            \
225           }                                                                   \
226                                                                               \
227         ++inptr;                                                              \
228         ++*irreversible;                                                      \
229         continue;                                                             \
230       }                                                                       \
231                                                                               \
232     put32 (outptr, ch);                                                       \
233     outptr += 4;                                                              \
234   }
235 #define LOOP_NEED_FLAGS
236 #define EXTRA_LOOP_DECLS        , int *statep
237 #include <iconv/loop.c>
238
239
240 /* Next, define the other direction, from UCS-4 to Shift_JISX0213.  */
241
242 /* Composition tables for each of the relevant combining characters.  */
243 static const struct
244 {
245   uint16_t base;
246   uint16_t composed;
247 } comp_table_data[] =
248 {
249 #define COMP_TABLE_IDX_02E5 0
250 #define COMP_TABLE_LEN_02E5 1
251   { 0x8684, 0x8685 }, /* 0x12B65 = 0x12B64 U+02E5 */
252 #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5)
253 #define COMP_TABLE_LEN_02E9 1
254   { 0x8680, 0x8686 }, /* 0x12B66 = 0x12B60 U+02E9 */
255 #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9)
256 #define COMP_TABLE_LEN_0300 5
257   { 0x857b, 0x8663 }, /* 0x12B44 = 0x1295C U+0300 */
258   { 0x8657, 0x8667 }, /* 0x12B48 = 0x12B38 U+0300 */
259   { 0x8656, 0x8669 }, /* 0x12B4A = 0x12B37 U+0300 */
260   { 0x864f, 0x866b }, /* 0x12B4C = 0x12B30 U+0300 */
261   { 0x8662, 0x866d }, /* 0x12B4E = 0x12B43 U+0300 */
262 #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
263 #define COMP_TABLE_LEN_0301 4
264   { 0x8657, 0x8668 }, /* 0x12B49 = 0x12B38 U+0301 */
265   { 0x8656, 0x866a }, /* 0x12B4B = 0x12B37 U+0301 */
266   { 0x864f, 0x866c }, /* 0x12B4D = 0x12B30 U+0301 */
267   { 0x8662, 0x866e }, /* 0x12B4F = 0x12B43 U+0301 */
268 #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301)
269 #define COMP_TABLE_LEN_309A 14
270   { 0x82a9, 0x82f5 }, /* 0x12477 = 0x1242B U+309A */
271   { 0x82ab, 0x82f6 }, /* 0x12478 = 0x1242D U+309A */
272   { 0x82ad, 0x82f7 }, /* 0x12479 = 0x1242F U+309A */
273   { 0x82af, 0x82f8 }, /* 0x1247A = 0x12431 U+309A */
274   { 0x82b1, 0x82f9 }, /* 0x1247B = 0x12433 U+309A */
275   { 0x834a, 0x8397 }, /* 0x12577 = 0x1252B U+309A */
276   { 0x834c, 0x8398 }, /* 0x12578 = 0x1252D U+309A */
277   { 0x834e, 0x8399 }, /* 0x12579 = 0x1252F U+309A */
278   { 0x8350, 0x839a }, /* 0x1257A = 0x12531 U+309A */
279   { 0x8352, 0x839b }, /* 0x1257B = 0x12533 U+309A */
280   { 0x835a, 0x839c }, /* 0x1257C = 0x1253B U+309A */
281   { 0x8363, 0x839d }, /* 0x1257D = 0x12544 U+309A */
282   { 0x8367, 0x839e }, /* 0x1257E = 0x12548 U+309A */
283   { 0x83f3, 0x83f6 }, /* 0x12678 = 0x12675 U+309A */
284 };
285
286 #define MIN_NEEDED_INPUT        TO_LOOP_MIN_NEEDED_FROM
287 #define MAX_NEEDED_INPUT        TO_LOOP_MAX_NEEDED_FROM
288 #define MIN_NEEDED_OUTPUT       TO_LOOP_MIN_NEEDED_TO
289 #define MAX_NEEDED_OUTPUT       TO_LOOP_MAX_NEEDED_TO
290 #define LOOPFCT                 TO_LOOP
291 #define BODY \
292   {                                                                           \
293     uint32_t ch = get32 (inptr);                                              \
294                                                                               \
295     if ((*statep >> 3) != 0)                                                  \
296       {                                                                       \
297         /* Attempt to combine the last character with this one.  */           \
298         uint16_t lasttwo = *statep >> 3;                                      \
299         unsigned int idx;                                                     \
300         unsigned int len;                                                     \
301                                                                               \
302         if (ch == 0x02e5)                                                     \
303           idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5;               \
304         else if (ch == 0x02e9)                                                \
305           idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9;               \
306         else if (ch == 0x0300)                                                \
307           idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300;               \
308         else if (ch == 0x0301)                                                \
309           idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301;               \
310         else if (ch == 0x309a)                                                \
311           idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A;               \
312         else                                                                  \
313           goto not_combining;                                                 \
314                                                                               \
315         do                                                                    \
316           if (comp_table_data[idx].base == lasttwo)                           \
317             break;                                                            \
318         while (++idx, --len > 0);                                             \
319                                                                               \
320         if (len > 0)                                                          \
321           {                                                                   \
322             /* Output the combined character.  */                             \
323             if (__builtin_expect (outptr + 1 >= outend, 0))                   \
324               {                                                               \
325                 result = __GCONV_FULL_OUTPUT;                                 \
326                 break;                                                        \
327               }                                                               \
328             lasttwo = comp_table_data[idx].composed;                          \
329             *outptr++ = (lasttwo >> 8) & 0xff;                                \
330             *outptr++ = lasttwo & 0xff;                                       \
331             *statep = 0;                                                      \
332             inptr += 4;                                                       \
333             continue;                                                         \
334           }                                                                   \
335                                                                               \
336       not_combining:                                                          \
337         /* Output the buffered character.  */                                 \
338         if (__builtin_expect (outptr + 1 >= outend, 0))                       \
339           {                                                                   \
340             result = __GCONV_FULL_OUTPUT;                                     \
341             break;                                                            \
342           }                                                                   \
343         *outptr++ = (lasttwo >> 8) & 0xff;                                    \
344         *outptr++ = lasttwo & 0xff;                                           \
345         *statep = 0;                                                          \
346         continue;                                                             \
347       }                                                                       \
348                                                                               \
349     if (ch < 0x80)                                                            \
350       /* Plain ISO646-JP character.  */                                       \
351       *outptr++ = ch;                                                         \
352     else if (ch == 0xa5)                                                      \
353       *outptr++ = 0x5c;                                                       \
354     else if (ch == 0x203e)                                                    \
355       *outptr++ = 0x7e;                                                       \
356     else if (ch >= 0xff61 && ch <= 0xff9f)                                    \
357       /* Half-width katakana.  */                                             \
358       *outptr++ = ch - 0xfec0;                                                \
359     else                                                                      \
360       {                                                                       \
361         unsigned int s1, s2;                                                  \
362         uint32_t jch = ucs4_to_jisx0213 (ch);                                 \
363         if (jch == 0)                                                         \
364           {                                                                   \
365             UNICODE_TAG_HANDLER (ch, 4);                                      \
366                                                                               \
367             /* Illegal character.  */                                         \
368             STANDARD_ERR_HANDLER (4);                                         \
369           }                                                                   \
370                                                                               \
371         /* Convert it to shifted representation.  */                          \
372         s1 = jch >> 8;                                                        \
373         s2 = jch & 0x7f;                                                              \
374         s1 -= 0x21;                                                           \
375         s2 -= 0x21;                                                           \
376         if (s1 >= 0x5e)                                                       \
377           {                                                                   \
378             /* Handling of JISX 0213 plane 2 rows.  */                        \
379             if (s1 >= 0xcd) /* rows 0x26E..0x27E */                           \
380               s1 -= 102;                                                      \
381             else if (s1 >= 0x8b || s1 == 0x87) /* rows 0x228, 0x22C..0x22F */ \
382               s1 -= 40;                                                       \
383             else /* rows 0x221, 0x223..0x225 */                               \
384               s1 -= 34;                                                       \
385             /* Now 0x5e <= s1 <= 0x77.  */                                    \
386           }                                                                   \
387         if (s1 & 1)                                                           \
388           s2 += 0x5e;                                                         \
389         s1 = s1 >> 1;                                                         \
390         if (s1 < 0x1f)                                                        \
391           s1 += 0x81;                                                         \
392         else                                                                  \
393           s1 += 0xc1;                                                         \
394         if (s2 < 0x3f)                                                        \
395           s2 += 0x40;                                                         \
396         else                                                                  \
397           s2 += 0x41;                                                         \
398                                                                               \
399         if (jch & 0x0080)                                                     \
400           {                                                                   \
401             /* A possible match in comp_table_data.  We have to buffer it.  */\
402                                                                               \
403             /* We know it's a JISX 0213 plane 1 character.  */                \
404             assert ((jch & 0x8000) == 0);                                     \
405                                                                               \
406             *statep = ((s1 << 8) | s2) << 3;                                  \
407             inptr += 4;                                                       \
408             continue;                                                         \
409           }                                                                   \
410                                                                               \
411         /* Output the shifted representation.  */                             \
412         if (__builtin_expect (outptr + 1 >= outend, 0))                       \
413           {                                                                   \
414             result = __GCONV_FULL_OUTPUT;                                     \
415             break;                                                            \
416           }                                                                   \
417         *outptr++ = s1;                                                       \
418         *outptr++ = s2;                                                       \
419       }                                                                       \
420                                                                               \
421     inptr += 4;                                                               \
422   }
423 #define LOOP_NEED_FLAGS
424 #define EXTRA_LOOP_DECLS        , int *statep
425 #include <iconv/loop.c>
426
427
428 /* Now define the toplevel functions.  */
429 #include <iconv/skeleton.c>