(BODY for FROM_LOOP): Don't advance inptr when there is not enough room in the
[kopensolaris-gnu/glibc.git] / iconvdata / euc-jisx0213.c
1 /* Conversion from and to EUC-JISX0213.
2    Copyright (C) 2002 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Bruno Haible <bruno@clisp.org>, 2002.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, write to the Free
18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19    02111-1307 USA.  */
20
21 #include <dlfcn.h>
22 #include <stdint.h>
23 #include <gconv.h>
24
25 /* The structure of EUC-JISX0213 is as follows:
26
27    0x00..0x7F: ASCII
28
29    0x8E{A1..FE}: JISX0201 Katakana, with prefix 0x8E, offset by +0x80.
30
31    0x8F{A1..FE}{A1..FE}: JISX0213 plane 2, with prefix 0x8F, offset by +0x8080.
32
33    0x{A1..FE}{A1..FE}: JISX0213 plane 1, offset by +0x8080.
34
35    Note that some JISX0213 characters are not contained in Unicode 3.2
36    and are therefore best represented as sequences of Unicode characters.
37 */
38
39 #include "jisx0213.h"
40
41 /* Definitions used in the body of the `gconv' function.  */
42 #define CHARSET_NAME            "EUC-JISX0213//"
43 #define FROM_LOOP               from_euc_jisx0213
44 #define TO_LOOP                 to_euc_jisx0213
45 #define DEFINE_INIT             1
46 #define DEFINE_FINI             1
47 #define FROM_LOOP_MIN_NEEDED_FROM       1
48 #define FROM_LOOP_MAX_NEEDED_FROM       3
49 #define FROM_LOOP_MIN_NEEDED_TO         4
50 #define FROM_LOOP_MAX_NEEDED_TO         8
51 #define TO_LOOP_MIN_NEEDED_FROM         4
52 #define TO_LOOP_MAX_NEEDED_FROM         4
53 #define TO_LOOP_MIN_NEEDED_TO           1
54 #define TO_LOOP_MAX_NEEDED_TO           3
55 #define PREPARE_LOOP \
56   int saved_state;                                                            \
57   int *statep = &data->__statep->__count;
58 #define EXTRA_LOOP_ARGS         , statep
59
60
61 /* Since we might have to reset input pointer we must be able to save
62    and restore the state.  */
63 #define SAVE_RESET_STATE(Save) \
64   if (Save)                                                                   \
65     saved_state = *statep;                                                    \
66   else                                                                        \
67     *statep = saved_state
68
69
70 /* During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state
71    contains the last two bytes to be output, shifted by 3 bits.  */
72
73 /* Since this is a stateful encoding we have to provide code which resets
74    the output state to the initial state.  This has to be done during the
75    flushing.  */
76 #define EMIT_SHIFT_TO_INIT \
77   if (data->__statep->__count != 0)                                           \
78     {                                                                         \
79       if (FROM_DIRECTION)                                                     \
80         /* We don't use shift states in the FROM_DIRECTION.  */               \
81         data->__statep->__count = 0;                                          \
82       else                                                                    \
83         {                                                                     \
84           if (__builtin_expect (outbuf + 2 <= outend, 1))                     \
85             {                                                                 \
86               /* Write out the last character.  */                            \
87               uint32_t lasttwo = data->__statep->__count >> 3;                \
88               *outbuf++ = (lasttwo >> 8) & 0xff;                              \
89               *outbuf++ = lasttwo & 0xff;                                     \
90               data->__statep->__count = 0;                                    \
91             }                                                                 \
92           else                                                                \
93             /* We don't have enough room in the output buffer.  */            \
94             status = __GCONV_FULL_OUTPUT;                                     \
95         }                                                                     \
96     }
97
98
99 /* First define the conversion function from EUC-JISX0213 to UCS-4.  */
100 #define MIN_NEEDED_INPUT        FROM_LOOP_MIN_NEEDED_FROM
101 #define MAX_NEEDED_INPUT        FROM_LOOP_MAX_NEEDED_FROM
102 #define MIN_NEEDED_OUTPUT       FROM_LOOP_MIN_NEEDED_TO
103 #define MAX_NEEDED_OUTPUT       FROM_LOOP_MAX_NEEDED_TO
104 #define LOOPFCT                 FROM_LOOP
105 #define BODY \
106   {                                                                           \
107     uint32_t ch = *inptr;                                                     \
108                                                                               \
109     if (ch < 0x80)                                                            \
110       /* Plain ASCII character.  */                                           \
111       ++inptr;                                                                \
112     else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f)          \
113       {                                                                       \
114         /* Two or three byte character.  */                                   \
115         uint32_t ch2;                                                         \
116                                                                               \
117         if (__builtin_expect (inptr + 1 >= inend, 0))                         \
118           {                                                                   \
119             /* The second byte is not available.  */                          \
120             result = __GCONV_INCOMPLETE_INPUT;                                \
121             break;                                                            \
122           }                                                                   \
123                                                                               \
124         ch2 = inptr[1];                                                       \
125                                                                               \
126         /* The second byte must be >= 0xa1 and <= 0xfe.  */                   \
127         if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0))                   \
128           {                                                                   \
129             /* This is an illegal character.  */                              \
130             if (! ignore_errors_p ())                                         \
131               {                                                               \
132                 result = __GCONV_ILLEGAL_INPUT;                               \
133                 break;                                                        \
134               }                                                               \
135                                                                               \
136             ++inptr;                                                          \
137             ++*irreversible;                                                  \
138             break;                                                            \
139           }                                                                   \
140                                                                               \
141         if (ch == 0x8e)                                                       \
142           {                                                                   \
143             /* Half-width katakana.  */                                       \
144             if (__builtin_expect (ch2 > 0xdf, 0))                             \
145               {                                                               \
146                 /* This is an illegal character.  */                          \
147                 if (! ignore_errors_p ())                                     \
148                   {                                                           \
149                     result = __GCONV_ILLEGAL_INPUT;                           \
150                     break;                                                    \
151                   }                                                           \
152                                                                               \
153                 ++inptr;                                                      \
154                 ++*irreversible;                                              \
155                 break;                                                        \
156               }                                                               \
157                                                                               \
158             ch = ch2 + 0xfec0;                                                \
159             inptr += 2;                                                       \
160           }                                                                   \
161         else                                                                  \
162           {                                                                   \
163             const unsigned char *endp;                                        \
164                                                                               \
165             if (ch == 0x8f)                                                   \
166               {                                                               \
167                 /* JISX 0213 plane 2.  */                                     \
168                 uint32_t ch3;                                                 \
169                                                                               \
170                 if (__builtin_expect (inptr + 2 >= inend, 0))                 \
171                   {                                                           \
172                     /* The third byte is not available.  */                   \
173                     result = __GCONV_INCOMPLETE_INPUT;                        \
174                     break;                                                    \
175                   }                                                           \
176                                                                               \
177                 ch3 = inptr[2];                                               \
178                 endp = inptr + 3;                                             \
179                                                                               \
180                 ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80);       \
181               }                                                               \
182             else                                                              \
183               {                                                               \
184                 /* JISX 0213 plane 1.  */                                     \
185                 endp = inptr + 2;                                             \
186                                                                               \
187                 ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80);        \
188               }                                                               \
189                                                                               \
190             if (ch == 0)                                                      \
191               {                                                               \
192                 /* This is an illegal character.  */                          \
193                 if (! ignore_errors_p ())                                     \
194                   {                                                           \
195                     result = __GCONV_ILLEGAL_INPUT;                           \
196                     break;                                                    \
197                   }                                                           \
198                                                                               \
199                 ++inptr;                                                      \
200                 ++*irreversible;                                              \
201                 break;                                                        \
202               }                                                               \
203                                                                               \
204             if (ch < 0x80)                                                    \
205               {                                                               \
206                 /* It's a combining character.  */                            \
207                 uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];         \
208                 uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];         \
209                                                                               \
210                 /* See whether we have room for two characters.  */           \
211                 if (outptr + 8 <= outend)                                     \
212                   {                                                           \
213                     inptr = endp;                                             \
214                     put32 (outptr, u1);                                       \
215                     outptr += 4;                                              \
216                     put32 (outptr, u2);                                       \
217                     outptr += 4;                                              \
218                     continue;                                                 \
219                   }                                                           \
220                 else                                                          \
221                   {                                                           \
222                     result = __GCONV_FULL_OUTPUT;                             \
223                     break;                                                    \
224                   }                                                           \
225               }                                                               \
226                                                                               \
227             inptr = endp;                                                     \
228           }                                                                   \
229       }                                                                       \
230     else                                                                      \
231       {                                                                       \
232         /* This is illegal.  */                                               \
233         if (! ignore_errors_p ())                                             \
234           {                                                                   \
235             result = __GCONV_ILLEGAL_INPUT;                                   \
236             break;                                                            \
237           }                                                                   \
238                                                                               \
239         ++inptr;                                                              \
240         ++*irreversible;                                                      \
241         continue;                                                             \
242       }                                                                       \
243                                                                               \
244     put32 (outptr, ch);                                                       \
245     outptr += 4;                                                              \
246   }
247 #define LOOP_NEED_FLAGS
248 #define EXTRA_LOOP_DECLS        , int *statep
249 #include <iconv/loop.c>
250
251
252 /* Next, define the other direction, from UCS-4 to EUC-JISX0213.  */
253
254 /* Composition tables for each of the relevant combining characters.  */
255 static const struct
256 {
257   uint16_t base;
258   uint16_t composed;
259 } comp_table_data[] =
260 {
261 #define COMP_TABLE_IDX_02E5 0
262 #define COMP_TABLE_LEN_02E5 1
263   { 0xabe4, 0xabe5 }, /* 0x12B65 = 0x12B64 U+02E5 */
264 #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5)
265 #define COMP_TABLE_LEN_02E9 1
266   { 0xabe0, 0xabe6 }, /* 0x12B66 = 0x12B60 U+02E9 */
267 #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9)
268 #define COMP_TABLE_LEN_0300 5
269   { 0xa9dc, 0xabc4 }, /* 0x12B44 = 0x1295C U+0300 */
270   { 0xabb8, 0xabc8 }, /* 0x12B48 = 0x12B38 U+0300 */
271   { 0xabb7, 0xabca }, /* 0x12B4A = 0x12B37 U+0300 */
272   { 0xabb0, 0xabcc }, /* 0x12B4C = 0x12B30 U+0300 */
273   { 0xabc3, 0xabce }, /* 0x12B4E = 0x12B43 U+0300 */
274 #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
275 #define COMP_TABLE_LEN_0301 4
276   { 0xabb8, 0xabc9 }, /* 0x12B49 = 0x12B38 U+0301 */
277   { 0xabb7, 0xabcb }, /* 0x12B4B = 0x12B37 U+0301 */
278   { 0xabb0, 0xabcd }, /* 0x12B4D = 0x12B30 U+0301 */
279   { 0xabc3, 0xabcf }, /* 0x12B4F = 0x12B43 U+0301 */
280 #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301)
281 #define COMP_TABLE_LEN_309A 14
282   { 0xa4ab, 0xa4f7 }, /* 0x12477 = 0x1242B U+309A */
283   { 0xa4ad, 0xa4f8 }, /* 0x12478 = 0x1242D U+309A */
284   { 0xa4af, 0xa4f9 }, /* 0x12479 = 0x1242F U+309A */
285   { 0xa4b1, 0xa4fa }, /* 0x1247A = 0x12431 U+309A */
286   { 0xa4b3, 0xa4fb }, /* 0x1247B = 0x12433 U+309A */
287   { 0xa5ab, 0xa5f7 }, /* 0x12577 = 0x1252B U+309A */
288   { 0xa5ad, 0xa5f8 }, /* 0x12578 = 0x1252D U+309A */
289   { 0xa5af, 0xa5f9 }, /* 0x12579 = 0x1252F U+309A */
290   { 0xa5b1, 0xa5fa }, /* 0x1257A = 0x12531 U+309A */
291   { 0xa5b3, 0xa5fb }, /* 0x1257B = 0x12533 U+309A */
292   { 0xa5bb, 0xa5fc }, /* 0x1257C = 0x1253B U+309A */
293   { 0xa5c4, 0xa5fd }, /* 0x1257D = 0x12544 U+309A */
294   { 0xa5c8, 0xa5fe }, /* 0x1257E = 0x12548 U+309A */
295   { 0xa6f5, 0xa6f8 }, /* 0x12678 = 0x12675 U+309A */
296 };
297
298 #define MIN_NEEDED_INPUT        TO_LOOP_MIN_NEEDED_FROM
299 #define MAX_NEEDED_INPUT        TO_LOOP_MAX_NEEDED_FROM
300 #define MIN_NEEDED_OUTPUT       TO_LOOP_MIN_NEEDED_TO
301 #define MAX_NEEDED_OUTPUT       TO_LOOP_MAX_NEEDED_TO
302 #define LOOPFCT                 TO_LOOP
303 #define BODY \
304   {                                                                           \
305     uint32_t ch = get32 (inptr);                                              \
306                                                                               \
307     if ((*statep >> 3) != 0)                                                  \
308       {                                                                       \
309         /* Attempt to combine the last character with this one.  */           \
310         uint16_t lasttwo = *statep >> 3;                                      \
311         unsigned int idx;                                                     \
312         unsigned int len;                                                     \
313                                                                               \
314         if (ch == 0x02e5)                                                     \
315           idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5;               \
316         else if (ch == 0x02e9)                                                \
317           idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9;               \
318         else if (ch == 0x0300)                                                \
319           idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300;               \
320         else if (ch == 0x0301)                                                \
321           idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301;               \
322         else if (ch == 0x309a)                                                \
323           idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A;               \
324         else                                                                  \
325           goto not_combining;                                                 \
326                                                                               \
327         do                                                                    \
328           if (comp_table_data[idx].base == lasttwo)                           \
329             break;                                                            \
330         while (++idx, --len > 0);                                             \
331                                                                               \
332         if (len > 0)                                                          \
333           {                                                                   \
334             /* Output the combined character.  */                             \
335             if (__builtin_expect (outptr + 1 >= outend, 0))                   \
336               {                                                               \
337                 result = __GCONV_FULL_OUTPUT;                                 \
338                 break;                                                        \
339               }                                                               \
340             lasttwo = comp_table_data[idx].composed;                          \
341             *outptr++ = (lasttwo >> 8) & 0xff;                                \
342             *outptr++ = lasttwo & 0xff;                                       \
343             *statep = 0;                                                      \
344             inptr += 4;                                                       \
345             continue;                                                         \
346           }                                                                   \
347                                                                               \
348       not_combining:                                                          \
349         /* Output the buffered character.  */                                 \
350         if (__builtin_expect (outptr + 1 >= outend, 0))                       \
351           {                                                                   \
352             result = __GCONV_FULL_OUTPUT;                                     \
353             break;                                                            \
354           }                                                                   \
355         *outptr++ = (lasttwo >> 8) & 0xff;                                    \
356         *outptr++ = lasttwo & 0xff;                                           \
357         *statep = 0;                                                          \
358         continue;                                                             \
359       }                                                                       \
360                                                                               \
361     if (ch < 0x80)                                                            \
362       /* Plain ASCII character.  */                                           \
363       *outptr++ = ch;                                                         \
364     else if (ch >= 0xff61 && ch <= 0xff9f)                                    \
365       {                                                                       \
366         /* Half-width katakana.  */                                           \
367         if (__builtin_expect (outptr + 1 >= outend, 0))                       \
368           {                                                                   \
369             result = __GCONV_FULL_OUTPUT;                                     \
370             break;                                                            \
371           }                                                                   \
372         *outptr++ = 0x8e;                                                     \
373         *outptr++ = ch - 0xfec0;                                              \
374       }                                                                       \
375     else                                                                      \
376       {                                                                       \
377         uint32_t jch = ucs4_to_jisx0213 (ch);                                 \
378         if (jch == 0)                                                         \
379           {                                                                   \
380             UNICODE_TAG_HANDLER (ch, 4);                                      \
381                                                                               \
382             /* Illegal character.  */                                         \
383             STANDARD_ERR_HANDLER (4);                                         \
384           }                                                                   \
385                                                                               \
386         if (jch & 0x0080)                                                     \
387           {                                                                   \
388             /* A possible match in comp_table_data.  We have to buffer it.  */\
389                                                                               \
390             /* We know it's a JISX 0213 plane 1 character.  */                \
391             assert ((jch & 0x8000) == 0);                                     \
392                                                                               \
393             *statep = (jch | 0x8080) << 3;                                    \
394             inptr += 4;                                                       \
395             continue;                                                         \
396           }                                                                   \
397                                                                               \
398         if (jch & 0x8000)                                                     \
399           {                                                                   \
400             /* JISX 0213 plane 2.  */                                         \
401             if (__builtin_expect (outptr + 2 >= outend, 0))                   \
402               {                                                               \
403                 result = __GCONV_FULL_OUTPUT;                                 \
404                 break;                                                        \
405               }                                                               \
406             *outptr++ = 0x8f;                                                 \
407           }                                                                   \
408         else                                                                  \
409           {                                                                   \
410             /* JISX 0213 plane 1.  */                                         \
411             if (__builtin_expect (outptr + 1 >= outend, 0))                   \
412               {                                                               \
413                 result = __GCONV_FULL_OUTPUT;                                 \
414                 break;                                                        \
415               }                                                               \
416           }                                                                   \
417         *outptr++ = (jch >> 8) | 0x80;                                        \
418         *outptr++ = (jch & 0xff) | 0x80;                                      \
419       }                                                                       \
420                                                                               \
421     inptr += 4;                                                               \
422   }
423 #define LOOP_NEED_FLAGS
424 #define EXTRA_LOOP_DECLS        , int *statep
425 #include <iconv/loop.c>
426
427
428 /* Now define the toplevel functions.  */
429 #include <iconv/skeleton.c>