From 787c3d1abe09b2d7bf65ca8f69278d7d905c27c4 Mon Sep 17 00:00:00 2001
From: echristo <echristo@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Fri, 16 Jan 2004 22:37:49 +0000
Subject: [PATCH] 2004-01-16  Eric Christopher  <echristo@redhat.com> 	   
 Chandrakala Chavva <cchavva@redhat.com>

	* cppcharset.c (one_iso88591_to_utf8): New function.
	(convert_iso88591_utf8): Ditto. Use.
	(conversion_tab): Use.
	(_cpp_input_to_utf8): New function.
	(_cpp_init_iconv_buffer): Ditto.
	(_cpp_close_iconv_buffer): Ditto.
	* cpphash.h: Prototype new functions.
	(cpp_buffer): Add input_cset_desc.
	* cppinit.c: Add input_charset default.
	* cpplib.c (cpp_push_buffer): Support init and
	close of iconv.
	* cpplib.h (cpp_options): Add input_charset.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@76000 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog    | 24 ++++++++++---
 gcc/cppcharset.c | 91 ++++++++++++++++++++++++++++++++++++++++++++----
 gcc/cpphash.h    |  9 ++++-
 gcc/cppinit.c    |  3 ++
 gcc/cpplib.c     | 13 ++++---
 gcc/cpplib.h     |  7 ++--
 6 files changed, 129 insertions(+), 18 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d7afc1937c2d..8a3d1fdfaca3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2004-01-16  Eric Christopher  <echristo@redhat.com>
+	    Chandrakala Chavva <cchavva@redhat.com>
+
+	* cppcharset.c (one_iso88591_to_utf8): New function.
+	(convert_iso88591_utf8): Ditto. Use.
+	(conversion_tab): Use.
+	(_cpp_input_to_utf8): New function.
+	(_cpp_init_iconv_buffer): Ditto.
+	(_cpp_close_iconv_buffer): Ditto.
+	* cpphash.h: Prototype new functions.
+	(cpp_buffer): Add input_cset_desc.
+	* cppinit.c: Add input_charset default.
+	* cpplib.c (cpp_push_buffer): Support init and
+	close of iconv.
+	* cpplib.h (cpp_options): Add input_charset.
+
 2004-01-16  Kazu Hirata  <kazu@cs.umass.edu>
 
 	* system.h (ASM_OUTPUT_SECTION_NAME): Poison.
@@ -14,23 +30,23 @@
 	* fixinc/tests/base/sys/stat.h: Adapt for new hackname.
 
 	* fixinc/inclhack.def (alpha___extern_prefix,
-	alpha___extern_prefix_standards): New hacks to obey 
+	alpha___extern_prefix_standards): New hacks to obey
  	__PRAGMA_EXTERN_PREFIX.
 	* fixinc/tests/base/testing.h [ALPHA___EXTERN_PREFIX_CHECK]: New
 	test.
 	* fixinc/tests/base/standards.h: Likewise.
-	
+
 	* fixincl/inclhack.def (alpha_pthread): Tweak to match more
 	variations.
 	New testcase.
 	* fixinc/tests/base/pthread.h: Handle it.
-	
+
 	* fixincl/inclhack.def (bad_lval): Sort file list.
 	Add many missing files up to Tru64 UNIX V5.1B.
 	* gcc/fixinc/tests/base/libgen.h: Renamed to ...
 	* gcc/fixinc/tests/base/dirent.h: ... this to match new file list
 	order.
-	
+
 	* fixinc/fixincl.x: Regenerate.
 
 2004-01-16  Mark Mitchell  <mark@codesourcery.com>
diff --git a/gcc/cppcharset.c b/gcc/cppcharset.c
index 1b2d0b2a091e..5070366e3a8c 100644
--- a/gcc/cppcharset.c
+++ b/gcc/cppcharset.c
@@ -170,7 +170,7 @@ one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
 {
   static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
   static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-  
+
   cppchar_t c;
   const uchar *inbuf = *inbufp;
   size_t nbytes, i;
@@ -274,7 +274,7 @@ one_cppchar_to_utf8 (cppchar_t c, uchar **outbufp, size_t *outbytesleftp)
    The return value is either 0 for success, or an errno value for
    failure, which may be E2BIG (need more space), EILSEQ (ill-formed
    input sequence), ir EINVAL (incomplete input sequence).  */
-   
+
 static inline int
 one_utf8_to_utf32 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
 		   uchar **outbufp, size_t *outbytesleftp)
@@ -446,6 +446,31 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
   return 0;
 }
 
+/* The first 256 code points of ISO 8859.1 have the same numeric
+   values as the first 256 code points of Unicode, therefore the
+   incoming ISO 8859.1 character can be passed directly to
+   one_cppchar_to_utf8 (which expects a Unicode value).  */
+
+static int
+one_iso88591_to_utf8 (iconv_t bigend ATTRIBUTE_UNUSED, const uchar **inbufp,
+		      size_t *inbytesleftp, uchar **outbufp, size_t *outbytesleftp)
+{
+  const uchar *inbuf = *inbufp;
+  int rval;
+
+  if (*inbytesleftp > 1)
+    return EINVAL;
+
+  rval = one_cppchar_to_utf8 ((cppchar_t)*inbuf, outbufp, outbytesleftp);
+  if (rval)
+    return rval;
+
+  *inbufp += 1;
+  *inbytesleftp -= 1;
+
+  return 0;
+}
+
 /* Helper routine for the next few functions.  The 'const' on
    one_conversion means that we promise not to modify what function is
    pointed to, which lets the inliner see through it.  */
@@ -489,7 +514,7 @@ conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
       outbuf = to->text + to->asize - outbytesleft;
     }
 }
-		 
+
 
 /* These functions convert entire strings between character sets.
    They all have the signature
@@ -529,6 +554,14 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
   return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
 }
 
+static bool
+convert_iso88591_utf8 (iconv_t cd, const uchar *from, size_t flen,
+                       struct _cpp_strbuf *to)
+{
+  return conversion_loop (one_iso88591_to_utf8, cd, from, flen, to);
+}
+
+
 /* Identity conversion, used when we have no alternative.  */
 static bool
 convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@@ -606,6 +639,7 @@ static const struct conversion conversion_tab[] = {
   { "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
   { "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
   { "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
+  { "ISO-8859-1/UTF-8", convert_iso88591_utf8, (iconv_t)0 },
 };
 
 /* Subroutine of cpp_init_iconv: initialize and return a
@@ -619,7 +653,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
   struct cset_converter ret;
   char *pair;
   size_t i;
-  
+
   if (!strcasecmp (to, from))
     {
       ret.func = convert_no_conversion;
@@ -649,7 +683,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
       if (ret.cd == (iconv_t) -1)
 	{
 	  if (errno == EINVAL)
-	    cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
+	    cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
 		       "conversion from %s to %s not supported by iconv",
 		       from, to);
 	  else
@@ -660,7 +694,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
     }
   else
     {
-      cpp_error (pfile, CPP_DL_ERROR, /* XXX should be DL_SORRY */
+      cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
 		 "no iconv implementation, cannot convert from %s to %s",
 		 from, to);
       ret.func = convert_no_conversion;
@@ -1270,7 +1304,7 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
   *unsignedp = unsigned_p;
   return result;
 }
-			 
+
 /* Subroutine of cpp_interpret_charconst which performs the conversion
    to a number, for wide strings.  STR is the string structure returned
    by cpp_interpret_string.  PCHARS_SEEN and UNSIGNEDP are as for
@@ -1352,3 +1386,46 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
 
   return result;
 }
+
+uchar *
+_cpp_input_to_utf8 (cpp_reader *pfile, const uchar *input, cppchar_t length)
+{
+  struct _cpp_strbuf tbuf;
+  struct cset_converter cvt = pfile->buffer->input_cset_desc;
+
+  tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, length);
+  tbuf.text = xmalloc (tbuf.asize);
+  tbuf.len = 0;
+
+  if (!APPLY_CONVERSION (cvt, input, length, &tbuf))
+   {
+      cpp_error (pfile, CPP_DL_ERROR, "converting input to source character set.");
+      return NULL;
+   }
+
+  if (length)
+    tbuf.text[tbuf.len] = '\n';
+  else
+    tbuf.text[0] = '\n';
+
+  return tbuf.text;
+}
+
+  /* Check the input file format. At present assuming the input file
+     is in iso-8859-1 format. Convert this input character set to
+     source character set format (UTF-8). */
+
+void
+_cpp_init_iconv_buffer (cpp_reader *pfile, const char *from)
+{
+  pfile->buffer->input_cset_desc = init_iconv_desc (pfile, SOURCE_CHARSET,
+						    from);
+}
+
+void
+_cpp_close_iconv_buffer (cpp_reader *pfile)
+{
+  if (HAVE_ICONV
+      && pfile->buffer->input_cset_desc.func == convert_using_iconv)
+    iconv_close (pfile->buffer->input_cset_desc.cd);
+}
diff --git a/gcc/cpphash.h b/gcc/cpphash.h
index 80cb04c5f529..6c13ea1c0b17 100644
--- a/gcc/cpphash.h
+++ b/gcc/cpphash.h
@@ -270,7 +270,7 @@ struct cpp_buffer
   const uchar *cur;		/* Current location.  */
   const uchar *line_base;	/* Start of current physical line.  */
   const uchar *next_line;	/* Start of to-be-cleaned logical line.  */
-  
+
   const uchar *buf;		/* Entire character buffer.  */
   const uchar *rlimit;		/* Writable byte at end of file.  */
 
@@ -313,6 +313,10 @@ struct cpp_buffer
 
   /* Used for buffer overlays by cpptrad.c.  */
   const uchar *saved_cur, *saved_rlimit;
+
+  /* Descriptor for converting from the input character set to the
+     source character set.  */
+  struct cset_converter input_cset_desc;
 };
 
 /* A cpp_reader encapsulates the "state" of a pre-processor run.
@@ -557,6 +561,9 @@ extern void _cpp_init_internal_pragmas (cpp_reader *);
 extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
 				 unsigned int, unsigned int);
 extern void _cpp_pop_buffer (cpp_reader *);
+extern uchar *_cpp_input_to_utf8 (cpp_reader *, const unsigned char *, cppchar_t);
+extern void _cpp_init_iconv_buffer (cpp_reader *, const char *);
+extern void _cpp_close_iconv_buffer (cpp_reader *);
 
 /* In cpptrad.c.  */
 extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *);
diff --git a/gcc/cppinit.c b/gcc/cppinit.c
index 13326886778a..629da2734a7d 100644
--- a/gcc/cppinit.c
+++ b/gcc/cppinit.c
@@ -161,6 +161,9 @@ cpp_create_reader (enum c_lang lang, hash_table *table)
   CPP_OPTION (pfile, narrow_charset) = 0;
   CPP_OPTION (pfile, wide_charset) = 0;
 
+  /* Default the input character set to iso-8859-1 for now. */
+  CPP_OPTION (pfile, input_charset) = "ISO-8859-1";
+
   /* A fake empty "directory" used as the starting point for files
      looked up without a search path.  Name cannot be '/' because we
      don't want to prepend anything at all to filenames using it.  All
diff --git a/gcc/cpplib.c b/gcc/cpplib.c
index 2b213cb461a8..feb8717745b8 100644
--- a/gcc/cpplib.c
+++ b/gcc/cpplib.c
@@ -549,14 +549,14 @@ do_undef (cpp_reader *pfile)
 /* Undefine a single macro/assertion/whatever.  */
 
 static int
-undefine_macros (cpp_reader *pfile, cpp_hashnode *h, 
+undefine_macros (cpp_reader *pfile, cpp_hashnode *h,
 		 void *data_p ATTRIBUTE_UNUSED)
 {
   switch (h->type)
     {
     case NT_VOID:
       break;
-      
+
     case NT_MACRO:
       if (pfile->cb.undef)
         (*pfile->cb.undef) (pfile, pfile->directive_line, h);
@@ -855,7 +855,7 @@ do_linemarker (cpp_reader *pfile)
       cpp_string s = { 0, 0 };
       if (_cpp_interpret_string_notranslate (pfile, &token->val.str, &s))
 	new_file = (const char *)s.text;
-      
+
       new_sysp = 0;
       flag = read_flag (pfile, 0);
       if (flag == 1)
@@ -1159,7 +1159,7 @@ do_pragma (cpp_reader *pfile)
       (*p->u.handler) (pfile);
       if (pfile->cb.line_change)
 	(*pfile->cb.line_change) (pfile, pfile->cur_token, false);
-      
+
     }
   else if (pfile->cb.def_pragma)
     {
@@ -1925,6 +1925,7 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
 		 int from_stage3)
 {
   cpp_buffer *new = xobnew (&pfile->buffer_ob, cpp_buffer);
+  const char *input = CPP_OPTION (pfile, input_charset);
 
   /* Clears, amongst other things, if_stack and mi_cmacro.  */
   memset (new, 0, sizeof (cpp_buffer));
@@ -1936,6 +1937,8 @@ cpp_push_buffer (cpp_reader *pfile, const uchar *buffer, size_t len,
   new->need_line = true;
 
   pfile->buffer = new;
+  _cpp_init_iconv_buffer (pfile, input);
+
   return new;
 }
 
@@ -1957,6 +1960,8 @@ _cpp_pop_buffer (cpp_reader *pfile)
   /* In case of a missing #endif.  */
   pfile->state.skipping = 0;
 
+  _cpp_close_iconv_buffer (pfile);
+
   /* _cpp_do_file_change expects pfile->buffer to be the new one.  */
   pfile->buffer = buffer->prev;
 
diff --git a/gcc/cpplib.h b/gcc/cpplib.h
index 5f189245eb58..f7e12d200b7c 100644
--- a/gcc/cpplib.h
+++ b/gcc/cpplib.h
@@ -332,6 +332,9 @@ struct cpp_options
   /* Holds the name of the target wide character set.  */
   const char *wide_charset;
 
+  /* Holds the name of the input character set.  */
+  const char *input_charset;
+
   /* True to warn about precompiled header files we couldn't use.  */
   bool warn_invalid_pch;
 
@@ -417,7 +420,7 @@ struct cpp_dir
   /* Mapping of file names for this directory for MS-DOS and related
      platforms.  A NULL-terminated array of (from, to) pairs.  */
   const char **name_map;
-    
+
   /* The C front end uses these to recognize duplicated
      directories in the search path.  */
   ino_t ino;
@@ -481,7 +484,7 @@ struct cpp_hashnode GTY(())
 {
   struct ht_identifier ident;
   unsigned int is_directive : 1;
-  unsigned int directive_index : 7;	/* If is_directive, 
+  unsigned int directive_index : 7;	/* If is_directive,
 					   then index into directive table.
 					   Otherwise, a NODE_OPERATOR.  */
   unsigned char rid_code;		/* Rid code - for front ends.  */
-- 
GitLab