caf24e27e0fc3f98aecf27bf66193b574808ca2a
[kopensolaris-gnu/glibc.git] / iconvdata / eucjp.c
1 /* Mapping tables for EUC-JP handling.
2    Copyright (C) 1998 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <gconv.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <wchar.h>
26 #include <jis0201.h>
27 #include <jis0208.h>
28 #include <jis0212.h>
29
30 /* Direction of the transformation.  */
31 enum direction
32 {
33   illegal,
34   to_eucjp,
35   from_eucjp
36 };
37
38 struct eucjp_data
39 {
40   enum direction dir;
41 };
42
43
44 int
45 gconv_init (struct gconv_step *step)
46 {
47   /* Determine which direction.  */
48   struct eucjp_data *new_data;
49   enum direction dir;
50   int result;
51
52   if (strcasestr (step->from_name, "EUC-JP") != NULL)
53     dir = from_eucjp;
54   else if (strcasestr (step->to_name, "EUC-JP") != NULL)
55     dir = to_eucjp;
56   else
57     dir = illegal;
58
59   result = GCONV_NOCONV;
60   if (dir != illegal
61       && ((new_data
62            = (struct eucjp_data *) malloc (sizeof (struct eucjp_data)))
63           != NULL))
64     {
65       new_data->dir = dir;
66       step->data = new_data;
67       result = GCONV_OK;
68     }
69
70   return result;
71 }
72
73
74 void
75 gconv_end (struct gconv_step *data)
76 {
77   free (data->data);
78 }
79
80
81 int
82 gconv (struct gconv_step *step, struct gconv_step_data *data,
83        const char *inbuf, size_t *inbufsize, size_t *written, int do_flush)
84 {
85   struct gconv_step *next_step = step + 1;
86   struct gconv_step_data *next_data = data + 1;
87   gconv_fct fct = next_step->fct;
88   size_t do_write;
89   int result;
90
91   /* If the function is called with no input this means we have to reset
92      to the initial state.  The possibly partly converted input is
93      dropped.  */
94   if (do_flush)
95     {
96       do_write = 0;
97
98       /* Call the steps down the chain if there are any.  */
99       if (data->is_last)
100         result = GCONV_OK;
101       else
102         {
103           struct gconv_step *next_step = step + 1;
104           struct gconv_step_data *next_data = data + 1;
105
106           result = (*fct) (next_step, next_data, NULL, 0, written, 1);
107
108           /* Clear output buffer.  */
109           data->outbufavail = 0;
110         }
111     }
112   else
113     {
114       enum direction dir = ((struct eucjp_data *) step->data)->dir;
115
116       do_write = 0;
117
118       do
119         {
120           result = GCONV_OK;
121
122           if (dir == from_eucjp)
123             {
124               size_t inchars = *inbufsize;
125               size_t outwchars = data->outbufavail;
126               char *outbuf = data->outbuf;
127               size_t cnt = 0;
128
129               while (cnt < inchars
130                      && (outwchars + sizeof (wchar_t) <= data->outbufsize))
131                 {
132                   int inchar = (unsigned char) inbuf[cnt];
133                   wchar_t ch;
134
135                   if (inchar <= 0x7f)
136                     ch = (wchar_t) inchar;
137                   else if ((inchar <= 0xa0 || inchar > 0xfe)
138                            && inchar != 0x8e && inchar != 0x8f)
139                       /* This is illegal.  */
140                       ch = L'\0';
141                   else
142                     {
143                       /* Two or more byte character.  First test whether the
144                          next character is also available.  */
145                       int inchar2;
146
147                       if (cnt + 1 >= inchars)
148                         {
149                           /* The second character is not available.  Store
150                              the intermediate result.  */
151                           result = GCONV_INCOMPLETE_INPUT;
152                           break;
153                         }
154
155                       inchar2 = (unsigned char) inbuf[++cnt];
156
157                       /* All second bytes of a multibyte character must be
158                          >= 0xa1. */
159                       if (inchar2 < 0xa1)
160                         {
161                           /* This is an illegal character.  */
162                           --cnt;
163                           result = GCONV_ILLEGAL_INPUT;
164                           break;
165                         }
166
167                       if (inchar == '\x8e')
168                         /* This is code set 2: half-width katakana.  */
169                         ch = jisx0201_to_ucs4 (inchar2);
170                       else if (inchar == '\x8f')
171                         {
172                           /* This is code set 3: JIS X 0212-1990.  */
173                           const char *endp = &inbuf[cnt];
174
175                           ch = jisx0212_to_ucs4 (&endp, 1 + inchars - cnt,
176                                                  0x80);
177                           cnt = endp - inbuf;
178                         }
179                       else
180                         {
181                           /* This is code set 1: JIS X 0208.  */
182                           const char *endp = &inbuf[cnt - 1];
183
184                           ch = jisx0208_to_ucs4 (&endp, 2 + inchars - cnt,
185                                                  0x80);
186                           if (ch != L'\0')
187                             ++cnt;
188                         }
189
190                       if (ch == UNKNOWN_10646_CHAR)
191                          ch = L'\0';
192
193                       if (ch == L'\0')
194                         --cnt;
195                     }
196
197                   if (ch == L'\0' && inbuf[cnt] != '\0')
198                     {
199                       /* This is an illegal character.  */
200                       result = GCONV_ILLEGAL_INPUT;
201                       break;
202                     }
203
204                   *((wchar_t *) (outbuf + outwchars)) = ch;
205                   ++do_write;
206                   outwchars += sizeof (wchar_t);
207                   ++cnt;
208                 }
209               *inbufsize -= cnt;
210               data->outbufavail = outwchars;
211             }
212           else
213             {
214               size_t inwchars = *inbufsize;
215               size_t outchars = data->outbufavail;
216               char *outbuf = data->outbuf;
217               size_t cnt = 0;
218               int extra = 0;
219
220               while (inwchars >= cnt + sizeof (wchar_t)
221                      && outchars < data->outbufsize)
222                 {
223                   wchar_t ch = *((wchar_t *) (inbuf + cnt));
224
225                   if (ch <= L'\x7f')
226                     /* It's plain ASCII.  */
227                     outbuf[outchars] = ch;
228                   else
229                     {
230                       /* Try the JIS character sets.  */
231                       size_t found;
232
233                       found = ucs4_to_jisx0201 (ch, &outbuf[outchars]);
234
235                       if (found == UNKNOWN_10646_CHAR)
236                         {
237                           /* No JIS 0201 character.  */
238                           found = ucs4_to_jisx0208 (ch, &outbuf[outchars],
239                                                     (data->outbufsize
240                                                      - outchars));
241                           if (found == 0)
242                             {
243                               /* We ran out of space.  */
244                               extra = 2;
245                               break;
246                             }
247                           else if (found != UNKNOWN_10646_CHAR)
248                             {
249                               /* It's a JIS 0208 character, adjust it for
250                                  EUC-JP.  */
251                               outbuf[outchars++] += 0x80;
252                               outbuf[outchars] += 0x80;
253                             }
254                           else
255                             {
256                               /* No JIS 0208 character.  */
257                               found = ucs4_to_jisx0212 (ch, &outbuf[outchars],
258                                                         (data->outbufsize
259                                                          - outchars));
260
261                               if (found == 0)
262                                 {
263                                   /* We ran out of space.  */
264                                   extra = 2;
265                                   break;
266                                 }
267                               else if (found != UNKNOWN_10646_CHAR)
268                                 {
269                                   /* It's a JIS 0212 character, adjust it for
270                                      EUC-JP.  */
271                                   outbuf[outchars++] += 0x80;
272                                   outbuf[outchars] += 0x80;
273                                 }
274                               else
275                                 /* Illegal character.  */
276                                 break;
277                             }
278                         }
279                     }
280
281                   ++do_write;
282                   ++outchars;
283                   cnt += sizeof (wchar_t);
284                 }
285               *inbufsize -= cnt;
286               data->outbufavail = outchars;
287
288               if (outchars + extra < data->outbufsize)
289                 {
290                   /* If there is still room in the output buffer something
291                      is wrong with the input.  */
292                   if (inwchars >= cnt + sizeof (wchar_t))
293                     {
294                       /* An error occurred.  */
295                       result = GCONV_ILLEGAL_INPUT;
296                       break;
297                     }
298                   if (inwchars != cnt)
299                     {
300                       /* There are some unprocessed bytes at the end of the
301                          input buffer.  */
302                       result = GCONV_INCOMPLETE_INPUT;
303                       break;
304                     }
305                 }
306             }
307
308           if (result != GCONV_OK)
309             break;
310
311           if (data->is_last)
312             {
313               /* This is the last step.  */
314               result = (*inbufsize > (dir == from_eucjp
315                                       ? 0 : sizeof (wchar_t) - 1)
316                         ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
317               break;
318             }
319
320           /* Status so far.  */
321           result = GCONV_EMPTY_INPUT;
322
323           if (data->outbufavail > 0)
324             {
325               /* Call the functions below in the chain.  */
326               size_t newavail = data->outbufavail;
327
328               result = (*fct) (next_step, next_data, data->outbuf, &newavail,
329                                written, 0);
330
331               /* Correct the output buffer.  */
332               if (newavail != data->outbufavail && newavail > 0)
333                 {
334                   memmove (data->outbuf,
335                            &data->outbuf[data->outbufavail - newavail],
336                            newavail);
337                   data->outbufavail = newavail;
338                 }
339             }
340         }
341       while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT);
342     }
343
344   if (written != NULL && data->is_last)
345     *written = do_write;
346
347   return result;
348 }