(BODY for FROM_LOOP): Don't advance inptr when there is not enough room in the
[kopensolaris-gnu/glibc.git] / iconvdata / shift_jisx0213.c
1 /* Conversion from and to Shift_JISX0213.
2    Copyright (C) 2002 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Bruno Haible <bruno@clisp.org>, 2002.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, write to the Free
18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19    02111-1307 USA.  */
20
21 #include <dlfcn.h>
22 #include <stdint.h>
23 #include <gconv.h>
24
25 /* The structure of Shift_JISX0213 is as follows:
26
27    0x00..0x7F: ISO646-JP, an ASCII variant
28
29    0x{A1..DF}: JISX0201 Katakana.
30
31    0x{81..9F,E0..EF}{40..7E,80..FC}: JISX0213 plane 1.
32
33    0x{F0..FC}{40..7E,80..FC}: JISX0213 plane 2, with irregular row mapping.
34
35    Note that some JISX0213 characters are not contained in Unicode 3.2
36    and are therefore best represented as sequences of Unicode characters.
37 */
38
39 #include "jisx0213.h"
40
41 /* Definitions used in the body of the `gconv' function.  */
42 #define CHARSET_NAME            "SHIFT_JISX0213//"
43 #define FROM_LOOP               from_shift_jisx0213
44 #define TO_LOOP                 to_shift_jisx0213
45 #define DEFINE_INIT             1
46 #define DEFINE_FINI             1
47 #define FROM_LOOP_MIN_NEEDED_FROM       1
48 #define FROM_LOOP_MAX_NEEDED_FROM       2
49 #define FROM_LOOP_MIN_NEEDED_TO         4
50 #define FROM_LOOP_MAX_NEEDED_TO         8
51 #define TO_LOOP_MIN_NEEDED_FROM         4
52 #define TO_LOOP_MAX_NEEDED_FROM         4
53 #define TO_LOOP_MIN_NEEDED_TO           1
54 #define TO_LOOP_MAX_NEEDED_TO           2
55 #define PREPARE_LOOP \
56   int saved_state;                                                            \
57   int *statep = &data->__statep->__count;
58 #define EXTRA_LOOP_ARGS         , statep
59
60
61 /* Since we might have to reset input pointer we must be able to save
62    and restore the state.  */
63 #define SAVE_RESET_STATE(Save) \
64   if (Save)                                                                   \
65     saved_state = *statep;                                                    \
66   else                                                                        \
67     *statep = saved_state
68
69
70 /* During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state
71    contains the last two bytes to be output, shifted by 3 bits.  */
72
73 /* Since this is a stateful encoding we have to provide code which resets
74    the output state to the initial state.  This has to be done during the
75    flushing.  */
76 #define EMIT_SHIFT_TO_INIT \
77   if (data->__statep->__count != 0)                                           \
78     {                                                                         \
79       if (FROM_DIRECTION)                                                     \
80         /* We don't use shift states in the FROM_DIRECTION.  */               \
81         data->__statep->__count = 0;                                          \
82       else                                                                    \
83         {                                                                     \
84           if (__builtin_expect (outbuf + 2 <= outend, 1))                     \
85             {                                                                 \
86               /* Write out the last character.  */                            \
87               uint32_t lasttwo = data->__statep->__count >> 3;                \
88               *outbuf++ = (lasttwo >> 8) & 0xff;                              \
89               *outbuf++ = lasttwo & 0xff;                                     \
90               data->__statep->__count = 0;                                    \
91             }                                                                 \
92           else                                                                \
93             /* We don't have enough room in the output buffer.  */            \
94             status = __GCONV_FULL_OUTPUT;                                     \
95         }                                                                     \
96     }
97
98
99 /* First define the conversion function from Shift_JISX0213 to UCS-4.  */
100 #define MIN_NEEDED_INPUT        FROM_LOOP_MIN_NEEDED_FROM
101 #define MAX_NEEDED_INPUT        FROM_LOOP_MAX_NEEDED_FROM
102 #define MIN_NEEDED_OUTPUT       FROM_LOOP_MIN_NEEDED_TO
103 #define MAX_NEEDED_OUTPUT       FROM_LOOP_MAX_NEEDED_TO
104 #define LOOPFCT                 FROM_LOOP
105 #define BODY \
106   {                                                                           \
107     uint32_t ch = *inptr;                                                     \
108                                                                               \
109     if (ch < 0x80)                                                            \
110       {                                                                       \
111         /* Plain ISO646-JP character.  */                                     \
112         if (__builtin_expect (ch == 0x5c, 0))                                 \
113           ch = 0xa5;                                                          \
114         else if (__builtin_expect (ch == 0x7e, 0))                            \
115           ch = 0x203e;                                                        \
116         ++inptr;                                                              \
117       }                                                                       \
118     else if (ch >= 0xa1 && ch <= 0xdf)                                        \
119       {                                                                       \
120         /* Half-width katakana.  */                                           \
121         ch += 0xfec0;                                                         \
122         ++inptr;                                                              \
123       }                                                                       \
124     else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc))        \
125       {                                                                       \
126         /* Two byte character.  */                                            \
127         uint32_t ch2;                                                         \
128                                                                               \
129         if (__builtin_expect (inptr + 1 >= inend, 0))                         \
130           {                                                                   \
131             /* The second byte is not available.  */                          \
132             result = __GCONV_INCOMPLETE_INPUT;                                \
133             break;                                                            \
134           }                                                                   \
135                                                                               \
136         ch2 = inptr[1];                                                       \
137                                                                               \
138         /* The second byte must be in the range 0x{40..7E,80..FC}.  */        \
139         if (__builtin_expect (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc, 0))    \
140           {                                                                   \
141             /* This is an illegal character.  */                              \
142             if (! ignore_errors_p ())                                         \
143               {                                                               \
144                 result = __GCONV_ILLEGAL_INPUT;                               \
145                 break;                                                        \
146               }                                                               \
147                                                                               \
148             ++inptr;                                                          \
149             ++*irreversible;                                                  \
150             break;                                                            \
151           }                                                                   \
152                                                                               \
153         /* Convert to row and column.  */                                     \
154         if (ch < 0xe0)                                                        \
155           ch -= 0x81;                                                         \
156         else                                                                  \
157           ch -= 0xc1;                                                         \
158         if (ch2 < 0x80)                                                       \
159           ch2 -= 0x40;                                                        \
160         else                                                                  \
161           ch2 -= 0x41;                                                        \
162         /* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb.  */                         \
163         ch = 2 * ch;                                                          \
164         if (ch2 >= 0x5e)                                                      \
165           ch2 -= 0x5e, ch++;                                                  \
166         ch2 += 0x21;                                                          \
167         if (ch >= 0x5e)                                                       \
168           {                                                                   \
169             /* Handling of JISX 0213 plane 2 rows.  */                        \
170             if (ch >= 0x67)                                                   \
171               ch += 230;                                                      \
172             else if (ch >= 0x63 || ch == 0x5f)                                \
173               ch += 168;                                                      \
174             else                                                              \
175               ch += 162;                                                      \
176           }                                                                   \
177                                                                               \
178         ch = jisx0213_to_ucs4 (0x121 + ch, ch2);                              \
179                                                                               \
180         if (ch == 0)                                                          \
181           {                                                                   \
182             /* This is an illegal character.  */                              \
183             if (! ignore_errors_p ())                                         \
184               {                                                               \
185                 result = __GCONV_ILLEGAL_INPUT;                               \
186                 break;                                                        \
187              }                                                                \
188                                                                               \
189             ++inptr;                                                          \
190             ++*irreversible;                                                  \
191             break;                                                            \
192           }                                                                   \
193                                                                               \
194         if (ch < 0x80)                                                        \
195           {                                                                   \
196             /* It's a combining character.  */                                \
197             uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];             \
198             uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];             \
199                                                                               \
200             /* See whether we have room for two characters.  */               \
201             if (outptr + 8 <= outend)                                         \
202               {                                                               \
203                 inptr += 2;                                                   \
204                 put32 (outptr, u1);                                           \
205                 outptr += 4;                                                  \
206                 put32 (outptr, u2);                                           \
207                 outptr += 4;                                                  \
208                 continue;                                                     \
209               }                                                               \
210             else                                                              \
211               {                                                               \
212                 result = __GCONV_FULL_OUTPUT;                                 \
213                 break;                                                        \
214               }                                                               \
215           }                                                                   \
216                                                                               \
217         inptr += 2;                                                           \
218       }                                                                       \
219     else                                                                      \
220       {                                                                       \
221         /* This is illegal.  */                                               \
222         if (! ignore_errors_p ())                                             \
223           {                                                                   \
224             result = __GCONV_ILLEGAL_INPUT;                                   \
225             break;                                                            \
226           }                                                                   \
227                                                                               \
228         ++inptr;                                                              \
229         ++*irreversible;                                                      \
230         continue;                                                             \
231       }                                                                       \
232                                                                               \
233     put32 (outptr, ch);                                                       \
234     outptr += 4;                                                              \
235   }
236 #define LOOP_NEED_FLAGS
237 #define EXTRA_LOOP_DECLS        , int *statep
238 #include <iconv/loop.c>
239
240
241 /* Next, define the other direction, from UCS-4 to Shift_JISX0213.  */
242
243 /* Composition tables for each of the relevant combining characters.  */
244 static const struct
245 {
246   uint16_t base;
247   uint16_t composed;
248 } comp_table_data[] =
249 {
250 #define COMP_TABLE_IDX_02E5 0
251 #define COMP_TABLE_LEN_02E5 1
252   { 0x8684, 0x8685 }, /* 0x12B65 = 0x12B64 U+02E5 */
253 #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5)
254 #define COMP_TABLE_LEN_02E9 1
255   { 0x8680, 0x8686 }, /* 0x12B66 = 0x12B60 U+02E9 */
256 #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9)
257 #define COMP_TABLE_LEN_0300 5
258   { 0x857b, 0x8663 }, /* 0x12B44 = 0x1295C U+0300 */
259   { 0x8657, 0x8667 }, /* 0x12B48 = 0x12B38 U+0300 */
260   { 0x8656, 0x8669 }, /* 0x12B4A = 0x12B37 U+0300 */
261   { 0x864f, 0x866b }, /* 0x12B4C = 0x12B30 U+0300 */
262   { 0x8662, 0x866d }, /* 0x12B4E = 0x12B43 U+0300 */
263 #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
264 #define COMP_TABLE_LEN_0301 4
265   { 0x8657, 0x8668 }, /* 0x12B49 = 0x12B38 U+0301 */
266   { 0x8656, 0x866a }, /* 0x12B4B = 0x12B37 U+0301 */
267   { 0x864f, 0x866c }, /* 0x12B4D = 0x12B30 U+0301 */
268   { 0x8662, 0x866e }, /* 0x12B4F = 0x12B43 U+0301 */
269 #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301)
270 #define COMP_TABLE_LEN_309A 14
271   { 0x82a9, 0x82f5 }, /* 0x12477 = 0x1242B U+309A */
272   { 0x82ab, 0x82f6 }, /* 0x12478 = 0x1242D U+309A */
273   { 0x82ad, 0x82f7 }, /* 0x12479 = 0x1242F U+309A */
274   { 0x82af, 0x82f8 }, /* 0x1247A = 0x12431 U+309A */
275   { 0x82b1, 0x82f9 }, /* 0x1247B = 0x12433 U+309A */
276   { 0x834a, 0x8397 }, /* 0x12577 = 0x1252B U+309A */
277   { 0x834c, 0x8398 }, /* 0x12578 = 0x1252D U+309A */
278   { 0x834e, 0x8399 }, /* 0x12579 = 0x1252F U+309A */
279   { 0x8350, 0x839a }, /* 0x1257A = 0x12531 U+309A */
280   { 0x8352, 0x839b }, /* 0x1257B = 0x12533 U+309A */
281   { 0x835a, 0x839c }, /* 0x1257C = 0x1253B U+309A */
282   { 0x8363, 0x839d }, /* 0x1257D = 0x12544 U+309A */
283   { 0x8367, 0x839e }, /* 0x1257E = 0x12548 U+309A */
284   { 0x83f3, 0x83f6 }, /* 0x12678 = 0x12675 U+309A */
285 };
286
287 #define MIN_NEEDED_INPUT        TO_LOOP_MIN_NEEDED_FROM
288 #define MAX_NEEDED_INPUT        TO_LOOP_MAX_NEEDED_FROM
289 #define MIN_NEEDED_OUTPUT       TO_LOOP_MIN_NEEDED_TO
290 #define MAX_NEEDED_OUTPUT       TO_LOOP_MAX_NEEDED_TO
291 #define LOOPFCT                 TO_LOOP
292 #define BODY \
293   {                                                                           \
294     uint32_t ch = get32 (inptr);                                              \
295                                                                               \
296     if ((*statep >> 3) != 0)                                                  \
297       {                                                                       \
298         /* Attempt to combine the last character with this one.  */           \
299         uint16_t lasttwo = *statep >> 3;                                      \
300         unsigned int idx;                                                     \
301         unsigned int len;                                                     \
302                                                                               \
303         if (ch == 0x02e5)                                                     \
304           idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5;               \
305         else if (ch == 0x02e9)                                                \
306           idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9;               \
307         else if (ch == 0x0300)                                                \
308           idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300;               \
309         else if (ch == 0x0301)                                                \
310           idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301;               \
311         else if (ch == 0x309a)                                                \
312           idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A;               \
313         else                                                                  \
314           goto not_combining;                                                 \
315                                                                               \
316         do                                                                    \
317           if (comp_table_data[idx].base == lasttwo)                           \
318             break;                                                            \
319         while (++idx, --len > 0);                                             \
320                                                                               \
321         if (len > 0)                                                          \
322           {                                                                   \
323             /* Output the combined character.  */                             \
324             if (__builtin_expect (outptr + 1 >= outend, 0))                   \
325               {                                                               \
326                 result = __GCONV_FULL_OUTPUT;                                 \
327                 break;                                                        \
328               }                                                               \
329             lasttwo = comp_table_data[idx].composed;                          \
330             *outptr++ = (lasttwo >> 8) & 0xff;                                \
331             *outptr++ = lasttwo & 0xff;                                       \
332             *statep = 0;                                                      \
333             inptr += 4;                                                       \
334             continue;                                                         \
335           }                                                                   \
336                                                                               \
337       not_combining:                                                          \
338         /* Output the buffered character.  */                                 \
339         if (__builtin_expect (outptr + 1 >= outend, 0))                       \
340           {                                                                   \
341             result = __GCONV_FULL_OUTPUT;                                     \
342             break;                                                            \
343           }                                                                   \
344         *outptr++ = (lasttwo >> 8) & 0xff;                                    \
345         *outptr++ = lasttwo & 0xff;                                           \
346         *statep = 0;                                                          \
347         continue;                                                             \
348       }                                                                       \
349                                                                               \
350     if (ch < 0x80)                                                            \
351       /* Plain ISO646-JP character.  */                                       \
352       *outptr++ = ch;                                                         \
353     else if (ch == 0xa5)                                                      \
354       *outptr++ = 0x5c;                                                       \
355     else if (ch == 0x203e)                                                    \
356       *outptr++ = 0x7e;                                                       \
357     else if (ch >= 0xff61 && ch <= 0xff9f)                                    \
358       /* Half-width katakana.  */                                             \
359       *outptr++ = ch - 0xfec0;                                                \
360     else                                                                      \
361       {                                                                       \
362         unsigned int s1, s2;                                                  \
363         uint32_t jch = ucs4_to_jisx0213 (ch);                                 \
364         if (jch == 0)                                                         \
365           {                                                                   \
366             UNICODE_TAG_HANDLER (ch, 4);                                      \
367                                                                               \
368             /* Illegal character.  */                                         \
369             STANDARD_ERR_HANDLER (4);                                         \
370           }                                                                   \
371                                                                               \
372         /* Convert it to shifted representation.  */                          \
373         s1 = jch >> 8;                                                        \
374         s2 = jch & 0x7f;                                                              \
375         s1 -= 0x21;                                                           \
376         s2 -= 0x21;                                                           \
377         if (s1 >= 0x5e)                                                       \
378           {                                                                   \
379             /* Handling of JISX 0213 plane 2 rows.  */                        \
380             if (s1 >= 0xcd) /* rows 0x26E..0x27E */                           \
381               s1 -= 102;                                                      \
382             else if (s1 >= 0x8b || s1 == 0x87) /* rows 0x228, 0x22C..0x22F */ \
383               s1 -= 40;                                                       \
384             else /* rows 0x221, 0x223..0x225 */                               \
385               s1 -= 34;                                                       \
386             /* Now 0x5e <= s1 <= 0x77.  */                                    \
387           }                                                                   \
388         if (s1 & 1)                                                           \
389           s2 += 0x5e;                                                         \
390         s1 = s1 >> 1;                                                         \
391         if (s1 < 0x1f)                                                        \
392           s1 += 0x81;                                                         \
393         else                                                                  \
394           s1 += 0xc1;                                                         \
395         if (s2 < 0x3f)                                                        \
396           s2 += 0x40;                                                         \
397         else                                                                  \
398           s2 += 0x41;                                                         \
399                                                                               \
400         if (jch & 0x0080)                                                     \
401           {                                                                   \
402             /* A possible match in comp_table_data.  We have to buffer it.  */\
403                                                                               \
404             /* We know it's a JISX 0213 plane 1 character.  */                \
405             assert ((jch & 0x8000) == 0);                                     \
406                                                                               \
407             *statep = ((s1 << 8) | s2) << 3;                                  \
408             inptr += 4;                                                       \
409             continue;                                                         \
410           }                                                                   \
411                                                                               \
412         /* Output the shifted representation.  */                             \
413         if (__builtin_expect (outptr + 1 >= outend, 0))                       \
414           {                                                                   \
415             result = __GCONV_FULL_OUTPUT;                                     \
416             break;                                                            \
417           }                                                                   \
418         *outptr++ = s1;                                                       \
419         *outptr++ = s2;                                                       \
420       }                                                                       \
421                                                                               \
422     inptr += 4;                                                               \
423   }
424 #define LOOP_NEED_FLAGS
425 #define EXTRA_LOOP_DECLS        , int *statep
426 #include <iconv/loop.c>
427
428
429 /* Now define the toplevel functions.  */
430 #include <iconv/skeleton.c>