rapidsai · galipremsagar · Jun 14, 2023 · Jun 2, 2023 · Jun 9, 2023 · Jun 9, 2023
diff --git a/docs/cudf/source/user_guide/pandas-comparison.md b/docs/cudf/source/user_guide/pandas-comparison.md
@@ -139,6 +139,27 @@ module, which allow you to compare values up to a desired precision.
 Unlike Pandas, cuDF does not support duplicate column names.
 It is best to use unique strings for column names.
 
+## Writing dataframe to parquet with mixed column names
+
+When there is a dataframe with mixed column names, pandas type-casts each
+column name to `string` before writing to parquet file. `cudf` raises an
+error by default if this is attempted. However, to achieve similar behavior
+as pandas you can enable pandas compatibility mode option, which will
+enable `cudf` to type-cast the column names to `string` just like pandas.
+
+```python
+>>> import cudf
+>>> df = cudf.DataFrame({1: [1, 2, 3], "1": ["a", "b", "c"]})
+>>> df.to_parquet("df.parquet")
+
+Traceback (most recent call last):
+ValueError: parquet must have string column names
+>>> cudf.set_option("mode.pandas_compatible", True)
+>>> df.to_parquet("df.parquet")
+
+UserWarning: The DataFrame has column names of mixed type. They will be converted to strings and not roundtrip correctly.
+```
+
 ## No true `"object"` data type
 
 In Pandas and NumPy, the `"object"` data type is used for

@@ -361,9 +361,12 @@ def write_parquet(
 
     for i, name in enumerate(table._column_names, num_index_cols_meta):
         if not isinstance(name, str):
-            raise ValueError("parquet must have string column names")
-
-        tbl_meta.get().column_metadata[i].set_name(name.encode())
+            if cudf.get_option("mode.pandas_compatible"):
+                tbl_meta.get().column_metadata[i].set_name(str(name).encode())
+            else:
+                raise ValueError("parquet must have string column names")
+        else:
+            tbl_meta.get().column_metadata[i].set_name(name.encode())
         _set_col_metadata(
             table[name]._column,
             tbl_meta.get().column_metadata[i],

@@ -174,7 +174,7 @@ cpdef generate_pandas_metadata(table, index):
             for col in table._columns
         ],
         df=table,
-        column_names=col_names,
+        column_names=map(str, col_names),
         index_levels=index_levels,
         index_descriptors=index_descriptors,
         preserve_index=index,

@@ -30,7 +30,6 @@
 from cudf.testing._utils import (
     TIMEDELTA_TYPES,
     assert_eq,
-    assert_exceptions_equal,
     expect_warning_if,
     set_random_null_mask_inplace,
 )
@@ -2528,15 +2527,29 @@ def test_parquet_writer_decimal(decimal_type, data):
 
 
 def test_parquet_writer_column_validation():
-    df = cudf.DataFrame({1: [1, 2, 3], "1": ["a", "b", "c"]})
+    df = cudf.DataFrame({1: [1, 2, 3], "a": ["a", "b", "c"]})
     pdf = df.to_pandas()
 
-    assert_exceptions_equal(
-        lfunc=df.to_parquet,
-        rfunc=pdf.to_parquet,
-        lfunc_args_and_kwargs=(["cudf.parquet"],),
-        rfunc_args_and_kwargs=(["pandas.parquet"],),
-    )
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.warns(UserWarning):
+            df.to_parquet("cudf.parquet")
+
+    if PANDAS_GE_200:
+        with pytest.warns(UserWarning):
+            pdf.to_parquet("pandas.parquet")
+
+        assert_eq(
+            pd.read_parquet("cudf.parquet"),
+            cudf.read_parquet("pandas.parquet"),
+        )
+        assert_eq(
+            cudf.read_parquet("cudf.parquet"),
+            pd.read_parquet("pandas.parquet"),
+        )
+
+    with cudf.option_context("mode.pandas_compatible", False):
+        with pytest.raises(ValueError):
+            df.to_parquet("cudf.parquet")
 
 
 def test_parquet_writer_nulls_pandas_read(tmpdir, pdf):