pandas-dev · wesm · Nov 27, 2012 · Nov 11, 2012 · Nov 11, 2012 · Nov 11, 2012
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -36,7 +36,7 @@
         string representation of NAN to use, default 'NaN'
     formatters : list or dict of one-parameter functions, optional
         formatter functions to apply to columns' elements by position or name,
-        default None
+        default None, if the result is a string , it must be a unicode string.
     float_format : one-parameter function, optional
         formatter function to apply to columns' elements if they are floats
         default None
@@ -62,7 +62,7 @@ class SeriesFormatter(object):
     def __init__(self, series, buf=None, header=True, length=True,
                  na_rep='NaN', name=False, float_format=None):
         self.series = series
-        self.buf = buf if buf is not None else StringIO()
+        self.buf = buf if buf is not None else StringIO(u"")
         self.name = name
         self.na_rep = na_rep
         self.length = length
@@ -112,7 +112,7 @@ def to_string(self):
         series = self.series
 
         if len(series) == 0:
-            return ''
+            return u''
 
         fmt_index, have_header = self._get_formatted_index()
         fmt_values = self._get_formatted_values()
@@ -135,9 +135,7 @@ def to_string(self):
         if footer:
             result.append(footer)
 
-        if py3compat.PY3:
-            return unicode(u'\n'.join(result))
-        return com.console_encode(u'\n'.join(result))
+        return unicode(u'\n'.join(result))
 
 if py3compat.PY3:  # pragma: no cover
     _encode_diff = lambda x: 0
@@ -200,10 +198,15 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
         else:
             self.columns = frame.columns
 
-    def _to_str_columns(self, force_unicode=False):
+    def _to_str_columns(self, force_unicode=None):
         """
         Render a DataFrame to a list of columns (as lists of strings).
         """
+        import warnings
+        if force_unicode is not None:  # pragma: no cover
+            warnings.warn("force_unicode is deprecated, it will have no effect",
+                          FutureWarning)
+
         # may include levels names also
         str_index = self._get_formatted_index()
         str_columns = self._get_formatted_column_labels()
@@ -237,32 +240,17 @@ def _to_str_columns(self, force_unicode=False):
         if self.index:
             strcols.insert(0, str_index)
 
-        if not py3compat.PY3:
-            if force_unicode:
-                def make_unicode(x):
-                    if isinstance(x, unicode):
-                        return x
-                    return x.decode('utf-8')
-                strcols = map(lambda col: map(make_unicode, col), strcols)
-            else:
-                # Generally everything is plain strings, which has ascii
-                # encoding.  Problem is when there is a char with value over
-                # 127. Everything then gets converted to unicode.
-                try:
-                    map(lambda col: map(str, col), strcols)
-                except UnicodeError:
-                    def make_unicode(x):
-                        if isinstance(x, unicode):
-                            return x
-                        return x.decode('utf-8')
-                    strcols = map(lambda col: map(make_unicode, col), strcols)
-
         return strcols
 
-    def to_string(self, force_unicode=False):
+    def to_string(self, force_unicode=None):
         """
         Render a DataFrame to a console-friendly tabular output.
         """
+        import warnings
+        if force_unicode is not None:  # pragma: no cover
+            warnings.warn("force_unicode is deprecated, it will have no effect",
+                          FutureWarning)
+
         frame = self.frame
 
         if len(frame.columns) == 0 or len(frame.index) == 0:
@@ -272,15 +260,20 @@ def to_string(self, force_unicode=False):
                             com.pprint_thing(frame.index)))
             text = info_line
         else:
-            strcols = self._to_str_columns(force_unicode)
+            strcols = self._to_str_columns()
             text = adjoin(1, *strcols)
 
         self.buf.writelines(text)
 
-    def to_latex(self, force_unicode=False, column_format=None):
+    def to_latex(self, force_unicode=None, column_format=None):
         """
         Render a DataFrame to a LaTeX tabular environment output.
         """
+        import warnings
+        if force_unicode is not None:  # pragma: no cover
+            warnings.warn("force_unicode is deprecated, it will have no effect",
+                          FutureWarning)
+
         frame = self.frame
 
         if len(frame.columns) == 0 or len(frame.index) == 0:
@@ -289,7 +282,7 @@ def to_latex(self, force_unicode=False, column_format=None):
                             frame.columns, frame.index))
             strcols = [[info_line]]
         else:
-            strcols = self._to_str_columns(force_unicode)
+            strcols = self._to_str_columns()
 
         if column_format is None:
             column_format = '|l|%s|' % '|'.join('c' for _ in strcols)
@@ -726,18 +719,10 @@ def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
         self.justify = justify
 
     def get_result(self):
-        if self._have_unicode():
-            fmt_values = self._format_strings(use_unicode=True)
-        else:
-            fmt_values = self._format_strings(use_unicode=False)
-
+        fmt_values = self._format_strings()
         return _make_fixed_width(fmt_values, self.justify)
 
-    def _have_unicode(self):
-        mask = lib.map_infer(self.values, lambda x: isinstance(x, unicode))
-        return mask.any()
-
-    def _format_strings(self, use_unicode=False):
+    def _format_strings(self):
         if self.float_format is None:
             float_format = print_config.float_format
             if float_format is None:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -612,20 +612,51 @@ def _need_info_repr_(self):
                 else:
                     return False
 
-    def __repr__(self):
+    def __str__(self):
+        """
+        Return a string representation for a particular DataFrame
+
+        Invoked by str(df) in both py2/py3.
+        Yields Bytestring in Py2, Unicode String in py3.
+        """
+
+        if py3compat.PY3:
+            return self.__unicode__()
+        return self.__bytes__()
+
+    def __bytes__(self):
         """
         Return a string representation for a particular DataFrame
+
+        Invoked by bytes(df) in py3 only.
+        Yields a bytestring in both py2/py3.
+        """
+        return com.console_encode(self.__unicode__())
+
+    def __unicode__(self):
+        """
+        Return a string representation for a particular DataFrame
+
+        Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
         """
-        buf = StringIO()
+        buf = StringIO(u"")
         if self._need_info_repr_():
             self.info(buf=buf, verbose=self._verbose_info)
         else:
             self.to_string(buf=buf)
+
         value = buf.getvalue()
+        assert type(value) == unicode
 
-        if py3compat.PY3:
-            return unicode(value)
-        return com.console_encode(value)
+        return value
+
+    def __repr__(self):
+        """
+        Return a string representation for a particular DataFrame
+
+        Yields Bytestring in Py2, Unicode String in py3.
+        """
+        return str(self)
 
     def _repr_html_(self):
         """
@@ -1379,19 +1410,21 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
     def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
                   header=True, index=True, na_rep='NaN', formatters=None,
                   float_format=None, sparsify=None, nanRep=None,
-                  index_names=True, justify=None, force_unicode=False):
+                  index_names=True, justify=None, force_unicode=None):
         """
         Render a DataFrame to a console-friendly tabular output.
         """
+        import warnings
+        if force_unicode is not None:  # pragma: no cover
+            warnings.warn("force_unicode is deprecated, it will have no effect",
+                          FutureWarning)
 
         if nanRep is not None:  # pragma: no cover
-            import warnings
             warnings.warn("nanRep is deprecated, use na_rep",
                           FutureWarning)
             na_rep = nanRep
 
         if colSpace is not None:  # pragma: no cover
-            import warnings
             warnings.warn("colSpace is deprecated, use col_space",
                           FutureWarning)
             col_space = colSpace
@@ -1404,15 +1437,10 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
                                            justify=justify,
                                            index_names=index_names,
                                            header=header, index=index)
-        formatter.to_string(force_unicode=force_unicode)
+        formatter.to_string()
 
         if buf is None:
             result = formatter.buf.getvalue()
-            if not force_unicode:
-                try:
-                    result = str(result)
-                except ValueError:
-                    pass
             return result
 
     @Appender(fmt.docstring_to_string, indents=1)

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -132,12 +132,48 @@ def __array_finalize__(self, obj):
     def _shallow_copy(self):
         return self.view()
 
-    def __repr__(self):
+    def __str__(self):
+        """
+        Return a string representation for a particular Index
+
+        Invoked by str(df) in both py2/py3.
+        Yields Bytestring in Py2, Unicode String in py3.
+        """
+
         if py3compat.PY3:
-            prepr = com.pprint_thing(self)
+            return self.__unicode__()
+        return self.__bytes__()
+
+    def __bytes__(self):
+        """
+        Return a string representation for a particular Index
+
+        Invoked by bytes(df) in py3 only.
+        Yields a bytestring in both py2/py3.
+        """
+        return com.console_encode(self.__unicode__())
+
+    def __unicode__(self):
+        """
+        Return a string representation for a particular Index
+
+        Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
+        """
+        if len(self) > 6 and len(self) > np.get_printoptions()['threshold']:
+            data = self[:3].tolist() + ["..."] + self[-3:].tolist()
         else:
-            prepr = com.pprint_thing_encoded(self)
-        return 'Index(%s, dtype=%s)' % (prepr, self.dtype)
+            data = self
+
+        prepr = com.pprint_thing(data)
+        return '%s(%s, dtype=%s)' % (type(self).__name__, prepr, self.dtype)
+
+    def __repr__(self):
+        """
+        Return a string representation for a particular Index
+
+        Yields Bytestring in Py2, Unicode String in py3.
+        """
+        return str(self)
 
     def astype(self, dtype):
         return Index(self.values.astype(dtype), name=self.name,
@@ -207,15 +243,6 @@ def summary(self, name=None):
             name = type(self).__name__
         return '%s: %s entries%s' % (name, len(self), index_summary)
 
-    def __str__(self):
-        try:
-            return np.array_repr(self.values)
-        except UnicodeError:
-            converted = u','.join(com.pprint_thing(x) for x in self.values)
-            result = u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted,
-                                              str(self.values.dtype))
-            return com.console_encode(result)
-
     def _mpl_repr(self):
         # how to represent ourselves to matplotlib
         return self.values
@@ -394,8 +421,8 @@ def format(self, name=False):
             result = []
             for dt in self:
                 if dt.time() != zero_time or dt.tzinfo is not None:
-                    return header + ['%s' % x for x in self]
-                result.append('%d-%.2d-%.2d' % (dt.year, dt.month, dt.day))
+                    return header + [u'%s' % x for x in self]
+                result.append(u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day))
             return header + result
 
         values = self.values
@@ -1319,7 +1346,33 @@ def _array_values(self):
     def dtype(self):
         return np.dtype('O')
 
-    def __repr__(self):
+    def __str__(self):
+        """
+        Return a string representation for a particular Index
+
+        Invoked by str(df) in both py2/py3.
+        Yields Bytestring in Py2, Unicode String in py3.
+        """
+
+        if py3compat.PY3:
+            return self.__unicode__()
+        return self.__bytes__()
+
+    def __bytes__(self):
+        """
+        Return a string representation for a particular Index
+
+        Invoked by bytes(df) in py3 only.
+        Yields a bytestring in both py2/py3.
+        """
+        return com.console_encode(self.__unicode__())
+
+    def __unicode__(self):
+        """
+        Return a string representation for a particular Index
+
+        Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
+        """
         output = 'MultiIndex\n%s'
 
         options = np.get_printoptions()
@@ -1335,10 +1388,15 @@ def __repr__(self):
 
         np.set_printoptions(threshold=options['threshold'])
 
-        if py3compat.PY3:
-            return output % summary
-        else:
-            return com.console_encode(output % summary)
+        return output % summary
+
+    def __repr__(self):
+        """
+        Return a string representation for a particular Index
+
+        Yields Bytestring in Py2, Unicode String in py3.
+        """
+        return str(self)
 
     def __len__(self):
         return len(self.labels[0])
@@ -1496,7 +1554,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
                 formatted = lev.take(lab).format()
             else:
                 # weird all NA case
-                formatted = [str(x) for x in com.take_1d(lev.values, lab)]
+                formatted = [com.pprint_thing(x) for x in com.take_1d(lev.values, lab)]
             stringified_levels.append(formatted)
 
         result_levels = []