Simplify step data handling.
[kopensolaris-gnu/glibc.git] / iconvdata / euctw.c
1 /* Mapping tables for EUC-TW handling.
2    Copyright (C) 1998 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public License as
8    published by the Free Software Foundation; either version 2 of the
9    License, or (at your option) any later version.
10
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19    Boston, MA 02111-1307, USA.  */
20
21 #include <gconv.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include <wchar.h>
25 #include <cns11643l1.h>
26 #include <cns11643.h>
27
28 /* Direction of the transformation.  */
29 static int to_euctw_object;
30 static int from_euctw_object;
31
32
33 int
34 gconv_init (struct gconv_step *step)
35 {
36   /* Determine which direction.  */
37   if (strcasestr (step->from_name, "EUC-TW") != NULL)
38     step->data = &from_euctw_object;
39   else if (strcasestr (step->to_name, "EUC-TW") != NULL)
40     step->data = &to_euctw_object;
41   else
42     return GCONV_NOCONV;
43
44   return GCONV_OK;
45 }
46
47
48 void
49 gconv_end (struct gconv_step *data)
50 {
51   /* Nothing to do.  */
52 }
53
54
55 int
56 gconv (struct gconv_step *step, struct gconv_step_data *data,
57        const char *inbuf, size_t *inbufsize, size_t *written, int do_flush)
58 {
59   struct gconv_step *next_step = step + 1;
60   struct gconv_step_data *next_data = data + 1;
61   gconv_fct fct = next_step->fct;
62   size_t do_write;
63   int result;
64
65   /* If the function is called with no input this means we have to reset
66      to the initial state.  The possibly partly converted input is
67      dropped.  */
68   if (do_flush)
69     {
70       do_write = 0;
71
72       /* Call the steps down the chain if there are any.  */
73       if (data->is_last)
74         result = GCONV_OK;
75       else
76         {
77           struct gconv_step *next_step = step + 1;
78           struct gconv_step_data *next_data = data + 1;
79
80           result = (*fct) (next_step, next_data, NULL, 0, written, 1);
81
82           /* Clear output buffer.  */
83           data->outbufavail = 0;
84         }
85     }
86   else
87     {
88       do_write = 0;
89
90       do
91         {
92           result = GCONV_OK;
93
94           if (step->data == &from_euctw_object)
95             {
96               size_t inchars = *inbufsize;
97               size_t outwchars = data->outbufavail;
98               char *outbuf = data->outbuf;
99               size_t cnt = 0;
100
101               while (cnt < inchars
102                      && (outwchars + sizeof (wchar_t) <= data->outbufsize))
103                 {
104                   int inchar = (unsigned char) inbuf[cnt];
105                   wchar_t ch;
106
107                   if (inchar <= 0x7f)
108                     ch = (wchar_t) inchar;
109                   else if ((inchar <= 0xa0 || inchar > 0xfe)
110                            && inchar != 0x8e)
111                       /* This is illegal.  */
112                       ch = L'\0';
113                   else
114                     {
115                       /* Two or more byte character.  First test whether the
116                          next character is also available.  */
117                       int inchar2;
118
119                       if (cnt + 1 + (inchar == 0x8e ? 2 : 0) >= inchars)
120                         {
121                           /* The second character is not available.  Store
122                              the intermediate result.  */
123                           result = GCONV_INCOMPLETE_INPUT;
124                           break;
125                         }
126
127                       inchar2 = (unsigned char) inbuf[++cnt];
128
129                       /* All second bytes of a multibyte character must be
130                          >= 0xa1. */
131                       if (inchar2 < 0xa1 && inchar2 == 0xff)
132                         {
133                           /* This is an illegal character.  */
134                           --cnt;
135                           result = GCONV_ILLEGAL_INPUT;
136                           break;
137                         }
138
139                       if (inchar == '\x8e')
140                         {
141                           /* This is code set 2: CNS 11643, planes 1 to 16.  */
142                           const char *endp = &inbuf[cnt];
143
144                           ch = cns11643_to_ucs4 (&endp, 2 + inchars - cnt,
145                                                  0x80);
146
147                           if (ch == UNKNOWN_10646_CHAR)
148                             ch = L'\0';
149                           if (ch != L'\0')
150                             cnt += 2;
151                         }
152                       else
153                         {
154                           /* This is code set 1: CNS 11643, plane 1.  */
155                           const char *endp = &inbuf[cnt - 1];
156
157                           ch = cns11643l1_to_ucs4 (&endp, 2 + inchars - cnt,
158                                                    0x80);
159
160                           if (ch == UNKNOWN_10646_CHAR)
161                             ch = L'\0';
162                           if (ch != L'\0')
163                             ++cnt;
164                         }
165
166                       if (ch == L'\0')
167                         --cnt;
168                     }
169
170                   if (ch == L'\0' && inbuf[cnt] != '\0')
171                     {
172                       /* This is an illegal character.  */
173                       result = GCONV_ILLEGAL_INPUT;
174                       break;
175                     }
176
177                   *((wchar_t *) (outbuf + outwchars)) = ch;
178                   ++do_write;
179                   outwchars += sizeof (wchar_t);
180                   ++cnt;
181                 }
182               *inbufsize -= cnt;
183               data->outbufavail = outwchars;
184             }
185           else
186             {
187               size_t inwchars = *inbufsize;
188               size_t outchars = data->outbufavail;
189               char *outbuf = data->outbuf;
190               size_t cnt = 0;
191               int extra = 0;
192
193               while (inwchars >= cnt + sizeof (wchar_t)
194                      && outchars < data->outbufsize)
195                 {
196                   wchar_t ch = *((wchar_t *) (inbuf + cnt));
197
198                   if (ch <= L'\x7f')
199                     /* It's plain ASCII.  */
200                     outbuf[outchars] = ch;
201                   else
202                     {
203                       /* Try the JIS character sets.  */
204                       size_t found;
205
206                       found = ucs4_to_cns11643l1 (ch, &outbuf[outchars],
207                                                   (data->outbufsize
208                                                      - outchars));
209                       if (found == 0)
210                         {
211                           /* We ran out of space.  */
212                           extra = 2;
213                           break;
214                         }
215                       else if (found != UNKNOWN_10646_CHAR)
216                         {
217                           /* It's a CNS 11643, plane 1 character, adjust it
218                              for EUC-TW.  */
219                           outbuf[outchars++] += 0x80;
220                           outbuf[outchars] += 0x80;
221                         }
222                       else
223                         {
224                           /* No CNS 11643, plane 1 character.  */
225                           outbuf[outchars] = '\x8e';
226
227                           found = ucs4_to_cns11643 (ch, &outbuf[outchars + 1],
228                                                     (data->outbufsize
229                                                      - outchars - 1));
230                           if (found > 0)
231                             {
232                               /* It's a CNS 11643 character, adjust it for
233                                  EUC-TW.  */
234                               outbuf[++outchars] += 0xa0;
235                               outbuf[++outchars] += 0x80;
236                               outbuf[outchars] += 0x80;
237                             }
238                           else if (found == 0)
239                             {
240                               /* We ran out of space.  */
241                               extra = 4;
242                               break;
243                             }
244                           else
245                             /* Illegal character.  */
246                             break;
247                         }
248                     }
249
250                   ++do_write;
251                   ++outchars;
252                   cnt += sizeof (wchar_t);
253                 }
254               *inbufsize -= cnt;
255               data->outbufavail = outchars;
256
257               if (outchars + extra < data->outbufsize)
258                 {
259                   /* If there is still room in the output buffer something
260                      is wrong with the input.  */
261                   if (inwchars >= cnt + sizeof (wchar_t))
262                     {
263                       /* An error occurred.  */
264                       result = GCONV_ILLEGAL_INPUT;
265                       break;
266                     }
267                   if (inwchars != cnt)
268                     {
269                       /* There are some unprocessed bytes at the end of the
270                          input buffer.  */
271                       result = GCONV_INCOMPLETE_INPUT;
272                       break;
273                     }
274                 }
275             }
276
277           if (result != GCONV_OK)
278             break;
279
280           if (data->is_last)
281             {
282               /* This is the last step.  */
283               result = (*inbufsize > (step->data == &from_euctw_object
284                                       ? 0 : sizeof (wchar_t) - 1)
285                         ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
286               break;
287             }
288
289           /* Status so far.  */
290           result = GCONV_EMPTY_INPUT;
291
292           if (data->outbufavail > 0)
293             {
294               /* Call the functions below in the chain.  */
295               size_t newavail = data->outbufavail;
296
297               result = (*fct) (next_step, next_data, data->outbuf, &newavail,
298                                written, 0);
299
300               /* Correct the output buffer.  */
301               if (newavail != data->outbufavail && newavail > 0)
302                 {
303                   memmove (data->outbuf,
304                            &data->outbuf[data->outbufavail - newavail],
305                            newavail);
306                   data->outbufavail = newavail;
307                 }
308             }
309         }
310       while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT);
311     }
312
313   if (written != NULL && data->is_last)
314     *written = do_write;
315
316   return result;
317 }