[Aptitude-svn-commit] r3453 - in branches/aptitude-0.3/aptitude: . src/vscreen

Daniel Burrows dburrows@costa.debian.org
Sun, 26 Jun 2005 16:37:33 +0000


Author: dburrows
Date: Sun Jun 26 16:37:30 2005
New Revision: 3453

Modified:
   branches/aptitude-0.3/aptitude/ChangeLog
   branches/aptitude-0.3/aptitude/src/vscreen/transcode.cc
   branches/aptitude-0.3/aptitude/src/vscreen/transcode.h
Log:
  Add an inverse transcoding function (goes from the system wide character
format to the locale's multibyte encoding).


Modified: branches/aptitude-0.3/aptitude/ChangeLog
==============================================================================
--- branches/aptitude-0.3/aptitude/ChangeLog	(original)
+++ branches/aptitude-0.3/aptitude/ChangeLog	Sun Jun 26 16:37:30 2005
@@ -1,5 +1,10 @@
 2005-06-26  Daniel Burrows  <dburrows@debian.org>
 
+	* src/vscreen/transcode.cc, src/vscreen/transcode.h:
+
+	  Add a transcode variant that changes strings from wide character
+	  codings to multibyte codings.
+
 	* src/pkg_view.cc:
 
 	  When altering the column format of a view, transcode the input

Modified: branches/aptitude-0.3/aptitude/src/vscreen/transcode.cc
==============================================================================
--- branches/aptitude-0.3/aptitude/src/vscreen/transcode.cc	(original)
+++ branches/aptitude-0.3/aptitude/src/vscreen/transcode.cc	Sun Jun 26 16:37:30 2005
@@ -25,35 +25,53 @@
 
 using namespace std;
 
-bool transcode(const char *s,
-	       wstring &out,
-	       const char *encoding)
+/** Does the dirty iconv work, given that an iconv session has been
+ *  opened and we want to fully decode the "inbuf".  If the outbuf
+ *  isn't large enough, it will be repeatedly doubled.
+ *
+ *  \param state the iconv state to be used
+ *  \param outbuf the buffer to which the string should be decoded.
+ *         If \b null, a new buffer will be allocated.
+ *  \param outbufsize the initial size of "outbuf", updated if
+ *      outbuf is increased.  If this value is 0, an arbitrary small
+ *      starting value will be used.
+ *  \param inbuf the string to be decoded.
+ *  \param inbufsize the size of inbuf.
+ *  \param decoded location to write the number of bytes in the decoded string.
+ */
+static bool transcode_buffer(iconv_t &state,
+			     char *&outbuf,
+			     size_t &outbufsize,
+			     const char *inbuf,
+			     size_t inbufsize,
+			     size_t &decoded)
 {
-  if(encoding == NULL)
-    encoding=nl_langinfo(CODESET);
-
-  iconv_t converter=iconv_open("WCHAR_T", encoding);
-
-  if(converter==((iconv_t)-1))
-    return false;
-
-  // arbitrary initial starting size; expected to be large enough for
-  // most "small" strings.
-  size_t outbufsize=1024, outbufremaining=1024;
-  size_t inremaining=strlen(s);
-
-  char *outbufhead=(char *) malloc(outbufsize), *outbufcur=outbufhead;
-  if(!outbufhead)
+  if(outbufsize == 0 || outbuf == NULL)
     {
-      errno=ENOMEM;
-      return false;
+      free(outbuf);
+      // arbitrary initial starting size; expected to be large enough
+      // for most "small" strings.
+      if(outbufsize == 0)
+	outbufsize = 1024;
+      outbuf = (char *) malloc(outbufsize);
+      if(outbuf == NULL)
+	{
+	  errno = ENOMEM;
+	  decoded=0;
+	  return false;
+	}
     }
 
+  char *outbufcur = outbuf;
+
+  size_t outremaining = outbufsize;
+  size_t inremaining  = inbufsize;
+
   while(inremaining>0)
     {
-      if(iconv(converter,
-	       const_cast<char **>(&s), &inremaining,
-	       &outbufcur, &outbufremaining) == ((size_t)-1))
+      if(iconv(state,
+	       const_cast<char **>(&inbuf), &inremaining,
+	       &outbufcur, &outremaining) == ((size_t)-1))
 	{
 	  // Some error conditions can be corrected.  There are three
 	  // reasons iconv can terminate abnormally:
@@ -71,31 +89,90 @@
 
 	  if(errno != E2BIG)
 	    {
-	      if(outbufremaining<outbufsize)
-		out=wstring((wchar_t *) outbufhead,
-			    outbufsize-outbufremaining);
-
-	      free(outbufhead);
+	      decoded=outbufsize-outremaining;
 	      return false;
 	    }
 	  else
 	    {
-	      size_t idx=outbufcur-outbufhead;
-	      outbufremaining+=outbufsize;
-	      outbufsize*=2;
-	      outbufhead=(char *) realloc(outbufhead, outbufsize);
-	      outbufcur=outbufhead+idx;
+	      size_t idx = outbufcur-outbuf;
+	      outremaining += outbufsize;
+	      outbufsize *= 2;
+	      outbuf = (char *) realloc(outbuf, outbufsize);
+	      outbufcur = outbuf + idx;
 	    }
 	}
       else
 	// if this fails, my understanding of iconv is wrong: the
 	// iconv docs say that if it doesn't fail, then the whole
 	// input sequence was converted.
-	assert(inremaining==0);
+	assert(inremaining == 0);
     }
 
-  out=wstring((wchar_t *) outbufhead, (outbufsize-outbufremaining)/sizeof(wchar_t));
-  free(outbufhead);
+  decoded=outbufsize-outremaining;
+
   return true;
 }
 
+bool transcode(const char *s,
+	       wstring &out,
+	       const char *encoding)
+{
+  if(encoding == NULL)
+    encoding = nl_langinfo(CODESET);
+
+  iconv_t converter=iconv_open("WCHAR_T", encoding);
+
+  if(converter == ((iconv_t)-1))
+    return false;
+
+  char *outbuf = NULL;
+  size_t outbufsize = 0;
+  size_t result_size = 0;
+
+  bool rval = transcode_buffer(converter, outbuf, outbufsize,
+			       s, strlen(s), result_size);
+
+  if(outbuf != NULL)
+    {
+      out = wstring((wchar_t *) outbuf, result_size/sizeof(wchar_t));
+      free(outbuf);
+    }
+
+  if(iconv_close(converter) == -1)
+    rval = false;
+
+  return rval;
+}
+
+bool transcode(const wchar_t *s,
+	       string &out,
+	       const char *encoding)
+{
+  if(encoding == NULL)
+    encoding = nl_langinfo(CODESET);
+
+  iconv_t converter = iconv_open(encoding, "WCHAR_T");
+
+  if(converter == ((iconv_t)-1))
+    return false;
+
+  char *outbuf = NULL;
+  size_t outbufsize = 0;
+  size_t result_size = 0;
+
+  bool rval = transcode_buffer(converter, outbuf, outbufsize,
+			       (char *) s,
+			       wcslen(s)*sizeof(wchar_t),
+			       result_size);
+
+  if(outbuf != NULL)
+    {
+      out = string(outbuf, result_size);
+      free(outbuf);
+    }
+
+  if(iconv_close(converter) == -1)
+    rval = false;
+
+  return rval;
+}

Modified: branches/aptitude-0.3/aptitude/src/vscreen/transcode.h
==============================================================================
--- branches/aptitude-0.3/aptitude/src/vscreen/transcode.h	(original)
+++ branches/aptitude-0.3/aptitude/src/vscreen/transcode.h	Sun Jun 26 16:37:30 2005
@@ -39,4 +39,21 @@
 
 // Note: would it be saner to express errors via exceptions?
 
+/** Convenience function to convert the native wide character encoding
+ *  to a multibyte encoding.  This is a wrapper around iconv.
+ *
+ *  \param s the wide string to encode
+ *  \param out the location to write the multibyte string
+ *  \param encoding the encoding of out; if \b null or unspecified,
+ *         the value of LC_CTYPE is used.
+ *
+ *  \return \b true if the entire string was successfully transcoded;
+ *  if transcoding failed, returns \b false and sets errno.
+ */
+#if 0
+bool transcode(const wchar_t *s,
+	       std::string &out,
+	       const char *encoding=NULL);
+#endif
+
 #endif // TRANSCODE_H