Adjust input buffer pointer for output buffer overflow.
[kopensolaris-gnu/glibc.git] / iconvdata / eucjp.c
1 /* Mapping tables for EUC-JP handling.
2    Copyright (C) 1998 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <gconv.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include <wchar.h>
25 #include <jis0201.h>
26 #include <jis0208.h>
27 #include <jis0212.h>
28
29 /* Direction of the transformation.  */
30 static int to_eucjp_object;
31 static int from_eucjp_object;
32
33
34 int
35 gconv_init (struct gconv_step *step)
36 {
37   /* Determine which direction.  */
38   if (strcasestr (step->from_name, "EUC-JP") != NULL)
39     step->data = &from_eucjp_object;
40   else if (strcasestr (step->to_name, "EUC-JP") != NULL)
41     step->data = &to_eucjp_object;
42   else
43     return GCONV_NOCONV;
44
45   return GCONV_OK;
46 }
47
48
49 void
50 gconv_end (struct gconv_step *data)
51 {
52   /* Nothing to do.  */
53 }
54
55
56 int
57 gconv (struct gconv_step *step, struct gconv_step_data *data,
58        const char *inbuf, size_t *inbufsize, size_t *written, int do_flush)
59 {
60   struct gconv_step *next_step = step + 1;
61   struct gconv_step_data *next_data = data + 1;
62   gconv_fct fct = next_step->fct;
63   size_t do_write;
64   int result;
65
66   /* If the function is called with no input this means we have to reset
67      to the initial state.  The possibly partly converted input is
68      dropped.  */
69   if (do_flush)
70     {
71       do_write = 0;
72
73       /* Call the steps down the chain if there are any.  */
74       if (data->is_last)
75         result = GCONV_OK;
76       else
77         {
78           struct gconv_step *next_step = step + 1;
79           struct gconv_step_data *next_data = data + 1;
80
81           result = (*fct) (next_step, next_data, NULL, 0, written, 1);
82
83           /* Clear output buffer.  */
84           data->outbufavail = 0;
85         }
86     }
87   else
88     {
89       do_write = 0;
90
91       do
92         {
93           result = GCONV_OK;
94
95           if (step->data == &from_eucjp_object)
96             {
97               size_t inchars = *inbufsize;
98               size_t outwchars = data->outbufavail;
99               char *outbuf = data->outbuf;
100               size_t cnt = 0;
101
102               while (cnt < inchars
103                      && (outwchars + sizeof (wchar_t) <= data->outbufsize))
104                 {
105                   int inchar = (unsigned char) inbuf[cnt];
106                   wchar_t ch;
107
108                   if (inchar <= 0x7f)
109                     ch = (wchar_t) inchar;
110                   else if ((inchar <= 0xa0 || inchar > 0xfe)
111                            && inchar != 0x8e && inchar != 0x8f)
112                       /* This is illegal.  */
113                       ch = L'\0';
114                   else
115                     {
116                       /* Two or more byte character.  First test whether the
117                          next character is also available.  */
118                       int inchar2;
119
120                       if (cnt + 1 >= inchars)
121                         {
122                           /* The second character is not available.  Store
123                              the intermediate result.  */
124                           result = GCONV_INCOMPLETE_INPUT;
125                           break;
126                         }
127
128                       inchar2 = (unsigned char) inbuf[++cnt];
129
130                       /* All second bytes of a multibyte character must be
131                          >= 0xa1. */
132                       if (inchar2 < 0xa1)
133                         {
134                           /* This is an illegal character.  */
135                           --cnt;
136                           result = GCONV_ILLEGAL_INPUT;
137                           break;
138                         }
139
140                       if (inchar == '\x8e')
141                         /* This is code set 2: half-width katakana.  */
142                         ch = jisx0201_to_ucs4 (inchar2);
143                       else if (inchar == '\x8f')
144                         {
145                           /* This is code set 3: JIS X 0212-1990.  */
146                           const char *endp = &inbuf[cnt];
147
148                           ch = jisx0212_to_ucs4 (&endp, 1 + inchars - cnt,
149                                                  0x80);
150                           cnt = endp - inbuf;
151                         }
152                       else
153                         {
154                           /* This is code set 1: JIS X 0208.  */
155                           const char *endp = &inbuf[cnt - 1];
156
157                           ch = jisx0208_to_ucs4 (&endp, 2 + inchars - cnt,
158                                                  0x80);
159                           if (ch != L'\0')
160                             ++cnt;
161                         }
162
163                       if (ch == UNKNOWN_10646_CHAR)
164                          ch = L'\0';
165
166                       if (ch == L'\0')
167                         --cnt;
168                     }
169
170                   if (ch == L'\0' && inbuf[cnt] != '\0')
171                     {
172                       /* This is an illegal character.  */
173                       result = GCONV_ILLEGAL_INPUT;
174                       break;
175                     }
176
177                   *((wchar_t *) (outbuf + outwchars)) = ch;
178                   ++do_write;
179                   outwchars += sizeof (wchar_t);
180                   ++cnt;
181                 }
182               *inbufsize -= cnt;
183               inbuf += cnt;
184               data->outbufavail = outwchars;
185             }
186           else
187             {
188               size_t inwchars = *inbufsize;
189               size_t outchars = data->outbufavail;
190               char *outbuf = data->outbuf;
191               size_t cnt = 0;
192               int extra = 0;
193
194               while (inwchars >= cnt + sizeof (wchar_t)
195                      && outchars < data->outbufsize)
196                 {
197                   wchar_t ch = *((wchar_t *) (inbuf + cnt));
198
199                   if (ch <= L'\x7f')
200                     /* It's plain ASCII.  */
201                     outbuf[outchars] = ch;
202                   else
203                     {
204                       /* Try the JIS character sets.  */
205                       size_t found;
206
207                       found = ucs4_to_jisx0201 (ch, &outbuf[outchars]);
208
209                       if (found == UNKNOWN_10646_CHAR)
210                         {
211                           /* No JIS 0201 character.  */
212                           found = ucs4_to_jisx0208 (ch, &outbuf[outchars],
213                                                     (data->outbufsize
214                                                      - outchars));
215                           if (found == 0)
216                             {
217                               /* We ran out of space.  */
218                               extra = 2;
219                               break;
220                             }
221                           else if (found != UNKNOWN_10646_CHAR)
222                             {
223                               /* It's a JIS 0208 character, adjust it for
224                                  EUC-JP.  */
225                               outbuf[outchars++] += 0x80;
226                               outbuf[outchars] += 0x80;
227                             }
228                           else
229                             {
230                               /* No JIS 0208 character.  */
231                               found = ucs4_to_jisx0212 (ch, &outbuf[outchars],
232                                                         (data->outbufsize
233                                                          - outchars));
234
235                               if (found == 0)
236                                 {
237                                   /* We ran out of space.  */
238                                   extra = 2;
239                                   break;
240                                 }
241                               else if (found != UNKNOWN_10646_CHAR)
242                                 {
243                                   /* It's a JIS 0212 character, adjust it for
244                                      EUC-JP.  */
245                                   outbuf[outchars++] += 0x80;
246                                   outbuf[outchars] += 0x80;
247                                 }
248                               else
249                                 /* Illegal character.  */
250                                 break;
251                             }
252                         }
253                     }
254
255                   ++do_write;
256                   ++outchars;
257                   cnt += sizeof (wchar_t);
258                 }
259               *inbufsize -= cnt;
260               inbuf += cnt;
261               data->outbufavail = outchars;
262
263               if (outchars + extra < data->outbufsize)
264                 {
265                   /* If there is still room in the output buffer something
266                      is wrong with the input.  */
267                   if (inwchars >= cnt + sizeof (wchar_t))
268                     {
269                       /* An error occurred.  */
270                       result = GCONV_ILLEGAL_INPUT;
271                       break;
272                     }
273                   if (inwchars != cnt)
274                     {
275                       /* There are some unprocessed bytes at the end of the
276                          input buffer.  */
277                       result = GCONV_INCOMPLETE_INPUT;
278                       break;
279                     }
280                 }
281             }
282
283           if (result != GCONV_OK)
284             break;
285
286           if (data->is_last)
287             {
288               /* This is the last step.  */
289               result = (*inbufsize > (step->data == &from_eucjp_object
290                                       ? 0 : sizeof (wchar_t) - 1)
291                         ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
292               break;
293             }
294
295           /* Status so far.  */
296           result = GCONV_EMPTY_INPUT;
297
298           if (data->outbufavail > 0)
299             {
300               /* Call the functions below in the chain.  */
301               size_t newavail = data->outbufavail;
302
303               result = (*fct) (next_step, next_data, data->outbuf, &newavail,
304                                written, 0);
305
306               /* Correct the output buffer.  */
307               if (newavail != data->outbufavail && newavail > 0)
308                 {
309                   memmove (data->outbuf,
310                            &data->outbuf[data->outbufavail - newavail],
311                            newavail);
312                   data->outbufavail = newavail;
313                 }
314             }
315         }
316       while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT);
317     }
318
319   if (written != NULL && data->is_last)
320     *written = do_write;
321
322   return result;
323 }