Handle Yen in overscore conversion from ISO 10646 correctly.
[kopensolaris-gnu/glibc.git] / iconvdata / eucjp.c
index 708640d..4936e40 100644 (file)
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
-#include <gconv.h>
 #include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <wchar.h>
+#include <gconv.h>
 #include <jis0201.h>
 #include <jis0208.h>
 #include <jis0212.h>
 
-/* Direction of the transformation.  */
-enum direction
-{
-  illegal,
-  to_eucjp,
-  from_eucjp
-};
-
-struct eucjp_data
-{
-  enum direction dir;
-};
-
-
-int
-gconv_init (struct gconv_step *step)
-{
-  /* Determine which direction.  */
-  struct eucjp_data *new_data;
-  enum direction dir;
-  int result;
-
-  if (strcasestr (step->from_name, "EUC-JP") != NULL)
-    dir = from_eucjp;
-  else if (strcasestr (step->to_name, "EUC-JP") != NULL)
-    dir = to_eucjp;
-  else
-    dir = illegal;
-
-  result = GCONV_NOCONV;
-  if (dir != illegal
-      && ((new_data
-          = (struct eucjp_data *) malloc (sizeof (struct eucjp_data)))
-         != NULL))
-    {
-      new_data->dir = dir;
-      step->data = new_data;
-      result = GCONV_OK;
-    }
-
-  return result;
-}
-
-
-void
-gconv_end (struct gconv_step *data)
-{
-  free (data->data);
-}
-
-
-int
-gconv (struct gconv_step *step, struct gconv_step_data *data,
-       const char *inbuf, size_t *inbufsize, size_t *written, int do_flush)
-{
-  struct gconv_step *next_step = step + 1;
-  struct gconv_step_data *next_data = data + 1;
-  gconv_fct fct = next_step->fct;
-  size_t do_write;
-  int result;
-
-  /* If the function is called with no input this means we have to reset
-     to the initial state.  The possibly partly converted input is
-     dropped.  */
-  if (do_flush)
-    {
-      do_write = 0;
-
-      /* Call the steps down the chain if there are any.  */
-      if (data->is_last)
-       result = GCONV_OK;
-      else
-       {
-         struct gconv_step *next_step = step + 1;
-         struct gconv_step_data *next_data = data + 1;
-
-         result = (*fct) (next_step, next_data, NULL, 0, written, 1);
-
-         /* Clear output buffer.  */
-         data->outbufavail = 0;
-       }
-    }
-  else
-    {
-      enum direction dir = ((struct eucjp_data *) step->data)->dir;
-
-      do_write = 0;
-
-      do
-       {
-         result = GCONV_OK;
-
-         if (dir == from_eucjp)
-           {
-             size_t inchars = *inbufsize;
-             size_t outwchars = data->outbufavail;
-             char *outbuf = data->outbuf;
-             size_t cnt = 0;
-
-             while (cnt < inchars
-                    && (outwchars + sizeof (wchar_t) <= data->outbufsize))
-               {
-                 int inchar = (unsigned char) inbuf[cnt];
-                 wchar_t ch;
-
-                 if (inchar <= 0x7f)
-                   ch = (wchar_t) inchar;
-                  else if ((inchar <= 0xa0 || inchar > 0xfe)
-                          && inchar != 0x8e && inchar != 0x8f)
-                      /* This is illegal.  */
-                      ch = L'\0';
-                 else
-                   {
-                     /* Two or more byte character.  First test whether the
-                        next character is also available.  */
-                     int inchar2;
-
-                     if (cnt + 1 >= inchars)
-                       {
-                         /* The second character is not available.  Store
-                            the intermediate result.  */
-                         result = GCONV_INCOMPLETE_INPUT;
-                         break;
-                       }
-
-                     inchar2 = (unsigned char) inbuf[++cnt];
-
-                     /* All second bytes of a multibyte character must be
-                        >= 0xa1. */
-                     if (inchar2 < 0xa1)
-                       {
-                         /* This is an illegal character.  */
-                         --cnt;
-                         result = GCONV_ILLEGAL_INPUT;
-                         break;
-                       }
-
-                     if (inchar == '\x8e')
-                       /* This is code set 2: half-width katakana.  */
-                       ch = jisx0201_to_ucs4 (inchar2);
-                     else if (inchar == '\x8f')
-                       {
-                         /* This is code set 3: JIS X 0212-1990.  */
-                         const char *endp = &inbuf[cnt];
-
-                         ch = jisx0212_to_ucs4 (&endp, 1 + inchars - cnt,
-                                                0x80);
-                         cnt = endp - inbuf;
-                       }
-                     else
-                       {
-                         /* This is code set 1: JIS X 0208.  */
-                         const char *endp = &inbuf[cnt - 1];
-
-                         ch = jisx0208_to_ucs4 (&endp, 2 + inchars - cnt,
-                                                0x80);
-                         if (ch != L'\0')
-                           ++cnt;
-                       }
-
-                     if (ch == UNKNOWN_10646_CHAR)
-                         ch = L'\0';
-
-                     if (ch == L'\0')
-                       --cnt;
-                   }
-
-                 if (ch == L'\0' && inbuf[cnt] != '\0')
-                   {
-                     /* This is an illegal character.  */
-                     result = GCONV_ILLEGAL_INPUT;
-                     break;
-                   }
-
-                 *((wchar_t *) (outbuf + outwchars)) = ch;
-                 ++do_write;
-                 outwchars += sizeof (wchar_t);
-                 ++cnt;
-               }
-             *inbufsize -= cnt;
-             data->outbufavail = outwchars;
-           }
-         else
-           {
-             size_t inwchars = *inbufsize;
-             size_t outchars = data->outbufavail;
-             char *outbuf = data->outbuf;
-             size_t cnt = 0;
-             int extra = 0;
-
-             while (inwchars >= cnt + sizeof (wchar_t)
-                    && outchars < data->outbufsize)
-               {
-                 wchar_t ch = *((wchar_t *) (inbuf + cnt));
-
-                 if (ch <= L'\x7f')
-                   /* It's plain ASCII.  */
-                   outbuf[outchars] = ch;
-                 else
-                   {
-                     /* Try the JIS character sets.  */
-                     size_t found;
-
-                     found = ucs4_to_jisx0201 (ch, &outbuf[outchars], 1);
-
-                     if (found == 0)
-                       {
-                         /* No JIS 0201 character.  */
-                         found = ucs4_to_jisx0208 (ch, &outbuf[outchars],
-                                                   (data->outbufsize
-                                                    - outchars));
-                         if (found > 0)
-                           {
-                             /* It's a JIS 0208 character, adjust it for
-                                EUC-JP.  */
-                             outbuf[outchars++] += 0x80;
-                             outbuf[outchars] += 0x80;
-                           }
-                         else if (found == 0)
-                           {
-                             /* We ran out of space.  */
-                             extra = 2;
-                             break;
-                           }
-                         else
-                           {
-                             /* No JIS 0208 character.  */
-                             found = ucs4_to_jisx0212 (ch, &outbuf[outchars],
-                                                       (data->outbufsize
-                                                        - outchars));
-
-                             if (found > 0)
-                               {
-                                 /* It's a JIS 0212 character, adjust it for
-                                    EUC-JP.  */
-                                 outbuf[outchars++] += 0x80;
-                                 outbuf[outchars] += 0x80;
-                               }
-                             else if (found == 0)
-                               {
-                                 /* We ran out of space.  */
-                                 extra = 2;
-                                 break;
-                               }
-                             else
-                               /* Illegal character.  */
-                               break;
-                           }
-                       }
-                   }
-
-                 ++do_write;
-                 ++outchars;
-                 cnt += sizeof (wchar_t);
-               }
-             *inbufsize -= cnt;
-             data->outbufavail = outchars;
-
-             if (outchars + extra < data->outbufsize)
-               {
-                 /* If there is still room in the output buffer something
-                    is wrong with the input.  */
-                 if (inwchars >= cnt + sizeof (wchar_t))
-                   {
-                     /* An error occurred.  */
-                     result = GCONV_ILLEGAL_INPUT;
-                     break;
-                   }
-                 if (inwchars != cnt)
-                   {
-                     /* There are some unprocessed bytes at the end of the
-                        input buffer.  */
-                     result = GCONV_INCOMPLETE_INPUT;
-                     break;
-                   }
-               }
-           }
-
-         if (result != GCONV_OK)
-           break;
-
-         if (data->is_last)
-           {
-             /* This is the last step.  */
-             result = (*inbufsize > (dir == from_eucjp
-                                     ? 0 : sizeof (wchar_t) - 1)
-                       ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
-             break;
-           }
-
-         /* Status so far.  */
-         result = GCONV_EMPTY_INPUT;
-
-         if (data->outbufavail > 0)
-           {
-             /* Call the functions below in the chain.  */
-             size_t newavail = data->outbufavail;
-
-             result = (*fct) (next_step, next_data, data->outbuf, &newavail,
-                              written, 0);
-
-             /* Correct the output buffer.  */
-             if (newavail != data->outbufavail && newavail > 0)
-               {
-                 memmove (data->outbuf,
-                          &data->outbuf[data->outbufavail - newavail],
-                          newavail);
-                 data->outbufavail = newavail;
-               }
-           }
-       }
-      while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT);
-    }
-
-  if (written != NULL && data->is_last)
-    *written = do_write;
-
-  return result;
-}
+/* Definitions used in the body of the `gconv' function.  */
+#define CHARSET_NAME           "EUC-JP//"
+#define FROM_LOOP              from_euc_jp
+#define TO_LOOP                        to_euc_jp
+#define DEFINE_INIT            1
+#define DEFINE_FINI            1
+#define MIN_NEEDED_FROM                1
+#define MAX_NEEDED_FROM                3
+#define MIN_NEEDED_TO          4
+
+
+/* First define the conversion function from EUC-JP to UCS4.  */
+#define MIN_NEEDED_INPUT       MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT       MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT      MIN_NEEDED_TO
+#define LOOPFCT                        FROM_LOOP
+#define BODY \
+  {                                                                          \
+    uint32_t ch = *inptr;                                                    \
+                                                                             \
+    if (ch <= 0x7f)                                                          \
+      ++inptr;                                                               \
+    else if ((ch <= 0xa0 || ch > 0xfe) && ch != 0x8e && ch != 0x8f)          \
+      {                                                                              \
+       /* This is illegal.  */                                               \
+       result = GCONV_ILLEGAL_INPUT;                                         \
+       break;                                                                \
+      }                                                                              \
+    else                                                                     \
+      {                                                                              \
+       /* Two or more byte character.  First test whether the next           \
+          character is also available.  */                                   \
+       int ch2;                                                              \
+                                                                             \
+       if (NEED_LENGTH_TEST && inptr + 1 >= inend)                           \
+         {                                                                   \
+           /* The second character is not available.  Store the              \
+              intermediate result.  */                                       \
+           result = GCONV_INCOMPLETE_INPUT;                                  \
+           break;                                                            \
+         }                                                                   \
+                                                                             \
+       ch2 = inptr[1];                                                       \
+                                                                             \
+       /* All second bytes of a multibyte character must be >= 0xa1. */      \
+       if (ch2 < 0xa1)                                                       \
+         {                                                                   \
+           /* This is an illegal character.  */                              \
+           result = GCONV_ILLEGAL_INPUT;                                     \
+           break;                                                            \
+         }                                                                   \
+                                                                             \
+       if (ch == 0x8e)                                                       \
+         {                                                                   \
+           /* This is code set 2: half-width katakana.  */                   \
+           ch = jisx0201_to_ucs4 (ch2);                                      \
+           inptr += 2;                                                       \
+         }                                                                   \
+       else                                                                  \
+         {                                                                   \
+           const unsigned char *endp;                                        \
+                                                                             \
+           if (ch == 0x8f)                                                   \
+             {                                                               \
+               /* This is code set 3: JIS X 0212-1990.  */                   \
+               endp = inptr + 1;                                             \
+                                                                             \
+               ch = jisx0212_to_ucs4 (&endp,                                 \
+                                      NEED_LENGTH_TEST ? inend - endp : 2,   \
+                                      0x80);                                 \
+             }                                                               \
+           else                                                              \
+             {                                                               \
+               /* This is code set 1: JIS X 0208.  */                        \
+               endp = inptr;                                                 \
+                                                                             \
+               ch = jisx0208_to_ucs4 (&endp,                                 \
+                                      NEED_LENGTH_TEST ? inend - inptr : 2,  \
+                                      0x80);                                 \
+             }                                                               \
+                                                                             \
+           if (NEED_LENGTH_TEST && ch == 0)                                  \
+             {                                                               \
+               /* Not enough input available.  */                            \
+               result = GCONV_INCOMPLETE_INPUT;                              \
+               break;                                                        \
+             }                                                               \
+           if (ch == UNKNOWN_10646_CHAR)                                     \
+             {                                                               \
+               /* Illegal character.  */                                     \
+               result = GCONV_ILLEGAL_INPUT;                                 \
+               break;                                                        \
+             }                                                               \
+           inptr = endp;                                                     \
+         }                                                                   \
+      }                                                                              \
+                                                                             \
+    *((uint32_t *) outptr)++ = ch;                                           \
+  }
+#include <iconv/loop.c>
+
+
+/* Next, define the other direction.  */
+#define MIN_NEEDED_INPUT       MIN_NEEDED_TO
+#define MIN_NEEDED_OUTPUT      MIN_NEEDED_FROM
+#define MAX_NEEDED_OUTPUT      MAX_NEEDED_FROM
+#define LOOPFCT                        TO_LOOP
+#define BODY \
+  {                                                                          \
+    uint32_t ch = *((uint32_t *) inptr);                                     \
+                                                                             \
+    if (ch <= 0x7f)                                                          \
+      /* It's plain ASCII.  */                                               \
+      *outptr++ = ch;                                                        \
+    else if (ch == 0xa5)                                                     \
+      /* YEN sign => backslash  */                                           \
+      *outptr++ = 0x5c;                                                              \
+    else if (ch == 0x203e)                                                   \
+      /* overscore => asciitilde */                                          \
+      *outptr++ = 0x7e;                                                              \
+    else                                                                     \
+      {                                                                              \
+       /* Try the JIS character sets.  */                                    \
+       size_t found;                                                         \
+                                                                             \
+       /* See whether we have room for at least two characters.  */          \
+       if (NEED_LENGTH_TEST && outptr + 1 >= outend)                         \
+         {                                                                   \
+           result = GCONV_FULL_OUTPUT;                                       \
+           break;                                                            \
+         }                                                                   \
+                                                                             \
+       found = ucs4_to_jisx0201 (ch, outptr + 1);                            \
+       if (found != UNKNOWN_10646_CHAR)                                      \
+         {                                                                   \
+           /* Yes, it's a JIS 0201 character.  Store the shift byte.  */     \
+           *outptr = 0x8e;                                                   \
+           outptr += 2;                                                      \
+         }                                                                   \
+       else                                                                  \
+         {                                                                   \
+           /* No JIS 0201 character.  */                                     \
+           found = ucs4_to_jisx0208 (ch, outptr, 2);                         \
+           /* Please note that we always have enough room for the output. */ \
+           if (found != UNKNOWN_10646_CHAR)                                  \
+             {                                                               \
+               /* It's a JIS 0208 character, adjust it for EUC-JP.  */       \
+               *outptr++ += 0x80;                                            \
+               *outptr++ += 0x80;                                            \
+             }                                                               \
+           else                                                              \
+             {                                                               \
+               /* No JIS 0208 character.  */                                 \
+               found = ucs4_to_jisx0212 (ch, outptr + 1,                     \
+                                         (NEED_LENGTH_TEST                   \
+                                          ? outend - outptr - 1 : 2));       \
+                                                                             \
+               if (found == 0)                                               \
+                 {                                                           \
+                   /* We ran out of space.  */                               \
+                   result = GCONV_FULL_OUTPUT;                               \
+                   break;                                                    \
+                 }                                                           \
+               else if (found != UNKNOWN_10646_CHAR)                         \
+                 {                                                           \
+                   /* It's a JIS 0212 character, adjust it for EUC-JP.  */   \
+                   *outptr++ = 0x8f;                                         \
+                   *outptr++ += 0x80;                                        \
+                   *outptr++ += 0x80;                                        \
+                 }                                                           \
+               else                                                          \
+                 {                                                           \
+                   /* Illegal character.  */                                 \
+                   result = GCONV_ILLEGAL_INPUT;                             \
+                   break;                                                    \
+                 }                                                           \
+             }                                                               \
+         }                                                                   \
+      }                                                                              \
+                                                                             \
+    inptr += 4;                                                                      \
+  }
+#include <iconv/loop.c>
+
+
+/* Now define the toplevel functions.  */
+#include <iconv/skeleton.c>