Simplify step data handling.
[kopensolaris-gnu/glibc.git] / iconvdata / eucjp.c
1 /* Mapping tables for EUC-JP handling.
2    Copyright (C) 1998 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <gconv.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include <wchar.h>
25 #include <jis0201.h>
26 #include <jis0208.h>
27 #include <jis0212.h>
28
29 /* Direction of the transformation.  */
30 static int to_eucjp_object;
31 static int from_eucjp_object;
32
33
34 int
35 gconv_init (struct gconv_step *step)
36 {
37   /* Determine which direction.  */
38   if (strcasestr (step->from_name, "EUC-JP") != NULL)
39     step->data = &from_eucjp_object;
40   else if (strcasestr (step->to_name, "EUC-JP") != NULL)
41     step->data = &to_eucjp_object;
42   else
43     return GCONV_NOCONV;
44
45   return GCONV_OK;
46 }
47
48
49 void
50 gconv_end (struct gconv_step *data)
51 {
52   /* Nothing to do.  */
53 }
54
55
56 int
57 gconv (struct gconv_step *step, struct gconv_step_data *data,
58        const char *inbuf, size_t *inbufsize, size_t *written, int do_flush)
59 {
60   struct gconv_step *next_step = step + 1;
61   struct gconv_step_data *next_data = data + 1;
62   gconv_fct fct = next_step->fct;
63   size_t do_write;
64   int result;
65
66   /* If the function is called with no input this means we have to reset
67      to the initial state.  The possibly partly converted input is
68      dropped.  */
69   if (do_flush)
70     {
71       do_write = 0;
72
73       /* Call the steps down the chain if there are any.  */
74       if (data->is_last)
75         result = GCONV_OK;
76       else
77         {
78           struct gconv_step *next_step = step + 1;
79           struct gconv_step_data *next_data = data + 1;
80
81           result = (*fct) (next_step, next_data, NULL, 0, written, 1);
82
83           /* Clear output buffer.  */
84           data->outbufavail = 0;
85         }
86     }
87   else
88     {
89       do_write = 0;
90
91       do
92         {
93           result = GCONV_OK;
94
95           if (step->data == &from_eucjp_object)
96             {
97               size_t inchars = *inbufsize;
98               size_t outwchars = data->outbufavail;
99               char *outbuf = data->outbuf;
100               size_t cnt = 0;
101
102               while (cnt < inchars
103                      && (outwchars + sizeof (wchar_t) <= data->outbufsize))
104                 {
105                   int inchar = (unsigned char) inbuf[cnt];
106                   wchar_t ch;
107
108                   if (inchar <= 0x7f)
109                     ch = (wchar_t) inchar;
110                   else if ((inchar <= 0xa0 || inchar > 0xfe)
111                            && inchar != 0x8e && inchar != 0x8f)
112                       /* This is illegal.  */
113                       ch = L'\0';
114                   else
115                     {
116                       /* Two or more byte character.  First test whether the
117                          next character is also available.  */
118                       int inchar2;
119
120                       if (cnt + 1 >= inchars)
121                         {
122                           /* The second character is not available.  Store
123                              the intermediate result.  */
124                           result = GCONV_INCOMPLETE_INPUT;
125                           break;
126                         }
127
128                       inchar2 = (unsigned char) inbuf[++cnt];
129
130                       /* All second bytes of a multibyte character must be
131                          >= 0xa1. */
132                       if (inchar2 < 0xa1)
133                         {
134                           /* This is an illegal character.  */
135                           --cnt;
136                           result = GCONV_ILLEGAL_INPUT;
137                           break;
138                         }
139
140                       if (inchar == '\x8e')
141                         /* This is code set 2: half-width katakana.  */
142                         ch = jisx0201_to_ucs4 (inchar2);
143                       else if (inchar == '\x8f')
144                         {
145                           /* This is code set 3: JIS X 0212-1990.  */
146                           const char *endp = &inbuf[cnt];
147
148                           ch = jisx0212_to_ucs4 (&endp, 1 + inchars - cnt,
149                                                  0x80);
150                           cnt = endp - inbuf;
151                         }
152                       else
153                         {
154                           /* This is code set 1: JIS X 0208.  */
155                           const char *endp = &inbuf[cnt - 1];
156
157                           ch = jisx0208_to_ucs4 (&endp, 2 + inchars - cnt,
158                                                  0x80);
159                           if (ch != L'\0')
160                             ++cnt;
161                         }
162
163                       if (ch == UNKNOWN_10646_CHAR)
164                          ch = L'\0';
165
166                       if (ch == L'\0')
167                         --cnt;
168                     }
169
170                   if (ch == L'\0' && inbuf[cnt] != '\0')
171                     {
172                       /* This is an illegal character.  */
173                       result = GCONV_ILLEGAL_INPUT;
174                       break;
175                     }
176
177                   *((wchar_t *) (outbuf + outwchars)) = ch;
178                   ++do_write;
179                   outwchars += sizeof (wchar_t);
180                   ++cnt;
181                 }
182               *inbufsize -= cnt;
183               data->outbufavail = outwchars;
184             }
185           else
186             {
187               size_t inwchars = *inbufsize;
188               size_t outchars = data->outbufavail;
189               char *outbuf = data->outbuf;
190               size_t cnt = 0;
191               int extra = 0;
192
193               while (inwchars >= cnt + sizeof (wchar_t)
194                      && outchars < data->outbufsize)
195                 {
196                   wchar_t ch = *((wchar_t *) (inbuf + cnt));
197
198                   if (ch <= L'\x7f')
199                     /* It's plain ASCII.  */
200                     outbuf[outchars] = ch;
201                   else
202                     {
203                       /* Try the JIS character sets.  */
204                       size_t found;
205
206                       found = ucs4_to_jisx0201 (ch, &outbuf[outchars]);
207
208                       if (found == UNKNOWN_10646_CHAR)
209                         {
210                           /* No JIS 0201 character.  */
211                           found = ucs4_to_jisx0208 (ch, &outbuf[outchars],
212                                                     (data->outbufsize
213                                                      - outchars));
214                           if (found == 0)
215                             {
216                               /* We ran out of space.  */
217                               extra = 2;
218                               break;
219                             }
220                           else if (found != UNKNOWN_10646_CHAR)
221                             {
222                               /* It's a JIS 0208 character, adjust it for
223                                  EUC-JP.  */
224                               outbuf[outchars++] += 0x80;
225                               outbuf[outchars] += 0x80;
226                             }
227                           else
228                             {
229                               /* No JIS 0208 character.  */
230                               found = ucs4_to_jisx0212 (ch, &outbuf[outchars],
231                                                         (data->outbufsize
232                                                          - outchars));
233
234                               if (found == 0)
235                                 {
236                                   /* We ran out of space.  */
237                                   extra = 2;
238                                   break;
239                                 }
240                               else if (found != UNKNOWN_10646_CHAR)
241                                 {
242                                   /* It's a JIS 0212 character, adjust it for
243                                      EUC-JP.  */
244                                   outbuf[outchars++] += 0x80;
245                                   outbuf[outchars] += 0x80;
246                                 }
247                               else
248                                 /* Illegal character.  */
249                                 break;
250                             }
251                         }
252                     }
253
254                   ++do_write;
255                   ++outchars;
256                   cnt += sizeof (wchar_t);
257                 }
258               *inbufsize -= cnt;
259               data->outbufavail = outchars;
260
261               if (outchars + extra < data->outbufsize)
262                 {
263                   /* If there is still room in the output buffer something
264                      is wrong with the input.  */
265                   if (inwchars >= cnt + sizeof (wchar_t))
266                     {
267                       /* An error occurred.  */
268                       result = GCONV_ILLEGAL_INPUT;
269                       break;
270                     }
271                   if (inwchars != cnt)
272                     {
273                       /* There are some unprocessed bytes at the end of the
274                          input buffer.  */
275                       result = GCONV_INCOMPLETE_INPUT;
276                       break;
277                     }
278                 }
279             }
280
281           if (result != GCONV_OK)
282             break;
283
284           if (data->is_last)
285             {
286               /* This is the last step.  */
287               result = (*inbufsize > (step->data == &from_eucjp_object
288                                       ? 0 : sizeof (wchar_t) - 1)
289                         ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
290               break;
291             }
292
293           /* Status so far.  */
294           result = GCONV_EMPTY_INPUT;
295
296           if (data->outbufavail > 0)
297             {
298               /* Call the functions below in the chain.  */
299               size_t newavail = data->outbufavail;
300
301               result = (*fct) (next_step, next_data, data->outbuf, &newavail,
302                                written, 0);
303
304               /* Correct the output buffer.  */
305               if (newavail != data->outbufavail && newavail > 0)
306                 {
307                   memmove (data->outbuf,
308                            &data->outbuf[data->outbufavail - newavail],
309                            newavail);
310                   data->outbufavail = newavail;
311                 }
312             }
313         }
314       while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT);
315     }
316
317   if (written != NULL && data->is_last)
318     *written = do_write;
319
320   return result;
321 }