fix

snowflakedb · Oct 23, 2024 · 473650b · 473650b
1 parent 9dd7a23
commit 473650b
Show file tree

Hide file tree

Showing 24 changed files with 165 additions and 215 deletions.
diff --git a/.github/workflows/daily_modin_precommit.yml b/.github/workflows/daily_modin_precommit.yml
@@ -242,8 +242,8 @@ jobs:
             .tox/.coverage
             .tox/coverage.xml
 
-  test-enable-cte-optimization:
-    name: Test Enable CTE Optimization modin-${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
+  test-disable-cte-optimization:
+    name: Test Disable CTE Optimization modin-${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
     needs: build
     runs-on: ${{ matrix.os.image_name }}
     strategy:
@@ -287,15 +287,15 @@ jobs:
         env:
           PYTHON_VERSION: ${{ matrix.python-version }}
           cloud_provider: ${{ matrix.cloud-provider }}
-          PYTEST_ADDOPTS: --color=yes --tb=short --enable_cte_optimization
+          PYTEST_ADDOPTS: --color=yes --tb=short --disable_cte_optimization
           TOX_PARALLEL_NO_SPINNER: 1
         shell: bash
       - name: Run Snowpark pandas API tests (excluding doctests)
         run: python -m tox -e "py${PYTHON_VERSION/\./}-snowparkpandasdailynotdoctest-modin-ci"
         env:
           PYTHON_VERSION: ${{ matrix.python-version }}
           cloud_provider: ${{ matrix.cloud-provider }}
-          PYTEST_ADDOPTS: --color=yes --tb=short --enable_cte_optimization --skip_sql_count_check --ignore=tests/integ/modin/test_sql_counter.py
+          PYTEST_ADDOPTS: --color=yes --tb=short --disable_cte_optimization --skip_sql_count_check --ignore=tests/integ/modin/test_sql_counter.py
           TOX_PARALLEL_NO_SPINNER: 1
         shell: bash
       - name: Combine coverages
@@ -306,7 +306,7 @@ jobs:
       - uses: actions/upload-artifact@v4
         with:
           include-hidden-files: true
-          name: coverage_${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}-enable-cte-optimization
+          name: coverage_${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}-disable-cte-optimization
           path: |
             .tox/.coverage
             .tox/coverage.xml

diff --git a/.github/workflows/daily_precommit.yml b/.github/workflows/daily_precommit.yml
@@ -383,8 +383,8 @@ jobs:
             .tox/.coverage
             .tox/coverage.xml
 
-  test-enable-cte-optimization:
-    name: Test Enable CTE Optimization py-${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
+  test-disable-cte-optimization:
+    name: Test Disable CTE Optimization py-${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
     needs: build
     runs-on: ${{ matrix.os.image_name }}
     strategy:
@@ -428,15 +428,15 @@ jobs:
         env:
           PYTHON_VERSION: ${{ matrix.python-version }}
           cloud_provider: ${{ matrix.cloud-provider }}
-          PYTEST_ADDOPTS: --color=yes --tb=short --enable_cte_optimization
+          PYTEST_ADDOPTS: --color=yes --tb=short --disable_cte_optimization
           TOX_PARALLEL_NO_SPINNER: 1
         shell: bash
       - name: Run tests (excluding doctests)
         run: python -m tox -e "py${PYTHON_VERSION/\./}-dailynotdoctest-ci"
         env:
           PYTHON_VERSION: ${{ matrix.python-version }}
           cloud_provider: ${{ matrix.cloud-provider }}
-          PYTEST_ADDOPTS: --color=yes --tb=short --enable_cte_optimization
+          PYTEST_ADDOPTS: --color=yes --tb=short --disable_cte_optimization
           TOX_PARALLEL_NO_SPINNER: 1
         shell: bash
       - name: Combine coverages
@@ -447,7 +447,7 @@ jobs:
       - uses: actions/upload-artifact@v4
         with:
           include-hidden-files: true
-          name: coverage_${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}-enable-cte-optimization
+          name: coverage_${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}-disable-cte-optimization
           path: |
             .tox/.coverage
             .tox/coverage.xml

diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml
@@ -246,64 +246,6 @@ jobs:
             .tox/.coverage
             .tox/coverage.xml
 
-  test-enable-cte-optimization:
-    name: Test Enable CTE Optimization py-${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
-    needs: build
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest-64-cores]
-        python-version: ["3.9"]
-        cloud-provider: [aws]
-    steps:
-      - name: Checkout Code
-        uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Display Python version
-        run: python -c "import sys; print(sys.version)"
-      - name: Decrypt parameters.py
-        shell: bash
-        run: .github/scripts/decrypt_parameters.sh
-        env:
-          PARAMETER_PASSWORD: ${{ secrets.PARAMETER_PASSWORD }}
-          CLOUD_PROVIDER: ${{ matrix.cloud-provider }}
-      - name: Download wheel(s)
-        uses: actions/download-artifact@v4
-        with:
-          name: wheel
-          path: dist
-      - name: Show wheels downloaded
-        run: ls -lh dist
-        shell: bash
-      - name: Upgrade setuptools, pip and wheel
-        run: python -m pip install -U setuptools pip wheel
-      - name: Install tox
-        run: python -m pip install tox
-      - name: Run tests (excluding doctests)
-        run: python -m tox -e "py${PYTHON_VERSION/\./}-notdoctest-ci"
-        env:
-          PYTHON_VERSION: ${{ matrix.python-version }}
-          cloud_provider: ${{ matrix.cloud-provider }}
-          PYTEST_ADDOPTS: --color=yes --tb=short --enable_cte_optimization
-          TOX_PARALLEL_NO_SPINNER: 1
-        shell: bash
-      - name: Combine coverages
-        run: python -m tox -e coverage --skip-missing-interpreters false
-        shell: bash
-        env:
-          SNOWFLAKE_IS_PYTHON_RUNTIME_TEST: 1
-      - uses: actions/upload-artifact@v4
-        with:
-          include-hidden-files: true
-          name: coverage_${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}-enable-cte-optimization
-          path: |
-            .tox/.coverage
-            .tox/coverage.xml
-
   test-snowpark-pandas:
     name: Test modin-${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
     needs: build
@@ -398,7 +340,8 @@ jobs:
         env:
           PYTHON_VERSION: ${{ matrix.python-version }}
           cloud_provider: ${{ matrix.cloud-provider }}
-          PYTEST_ADDOPTS: --color=yes --tb=short
+          # TODO SNOW-1733956: Remove --disable_cte_optimization when sql counts are fixed
+          PYTEST_ADDOPTS: --color=yes --tb=short --disable_cte_optimization
           TOX_PARALLEL_NO_SPINNER: 1
         shell: bash
       - name: Combine coverages
@@ -604,7 +547,6 @@ jobs:
     needs:
       - test
       - test-local-testing
-      - test-enable-cte-optimization
       - test-snowpark-pandas
       - test-modin-extra-without-pandas-extra
       - test-snowpark-multithreading-mode

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -28,7 +28,7 @@ def is_excluded_frontend_file(path):
 def pytest_addoption(parser):
     parser.addoption("--disable_sql_simplifier", action="store_true", default=False)
     parser.addoption("--local_testing_mode", action="store_true", default=False)
-    parser.addoption("--enable_cte_optimization", action="store_true", default=False)
+    parser.addoption("--disable_cte_optimization", action="store_true", default=False)
     parser.addoption("--multithreading_mode", action="store_true", default=False)
     parser.addoption("--skip_sql_count_check", action="store_true", default=False)
 
@@ -84,7 +84,7 @@ def local_testing_telemetry_setup():
 
 @pytest.fixture(scope="session")
 def cte_optimization_enabled(pytestconfig):
-    return pytestconfig.getoption("enable_cte_optimization")
+    return not pytestconfig.getoption("disable_cte_optimization")
 
 
 MULTITHREADING_TEST_MODE_ENABLED = False

diff --git a/tests/integ/modin/crosstab/test_crosstab.py b/tests/integ/modin/crosstab/test_crosstab.py
@@ -53,7 +53,7 @@ def test_basic_crosstab_with_numpy_arrays_different_lengths(self, dropna, a, b,
     def test_basic_crosstab_with_series_objs_full_overlap(self, dropna, a, b, c):
         # In this case, all indexes are identical - hence "full" overlap.
         query_count = 2
-        join_count = 5 if dropna else 10
+        join_count = 4 if dropna else 5
 
         def eval_func(lib):
             if lib is pd:
@@ -80,7 +80,7 @@ def test_basic_crosstab_with_series_objs_some_overlap(self, dropna, a, b, c):
         # of the Series objects. This test case passes because we pass in arrays that
         # are the length of the intersection rather than the length of each of the Series.
         query_count = 2
-        join_count = 5 if dropna else 10
+        join_count = 4 if dropna else 5
         b = native_pd.Series(
             b,
             index=list(range(len(a))),
@@ -206,7 +206,7 @@ def test_basic_crosstab_with_df_and_series_objs_pandas_errors_columns(
         self, dropna, a, b, c
     ):
         query_count = 4
-        join_count = 1 if dropna else 3
+        join_count = 1 if dropna else 2
         a = native_pd.Series(
             a,
             dtype=object,
@@ -252,7 +252,7 @@ def test_basic_crosstab_with_df_and_series_objs_pandas_errors_index(
         self, dropna, a, b, c
     ):
         query_count = 6
-        join_count = 5 if dropna else 17
+        join_count = 5 if dropna else 11
         a = native_pd.Series(
             a,
             dtype=object,
@@ -319,7 +319,7 @@ def test_margins(self, dropna, a, b, c):
     @pytest.mark.parametrize("normalize", [0, 1, True, "all", "index", "columns"])
     def test_normalize(self, dropna, normalize, a, b, c):
         query_count = 1 if normalize in (0, "index") else 2
-        join_count = 3 if normalize in (0, "index") else 2
+        join_count = 3 if normalize in (0, "index") and dropna else 2
         if dropna:
             join_count -= 2
 
@@ -340,9 +340,9 @@ def test_normalize(self, dropna, normalize, a, b, c):
     @pytest.mark.parametrize("normalize", [0, 1, True, "all", "index", "columns"])
     def test_normalize_and_margins(self, dropna, normalize, a, b, c):
         counts = {
-            "columns": [3, 5 if dropna else 9, 4],
-            "index": [1, 5 if dropna else 8, 3],
-            "all": [3, 12 if dropna else 19, 7],
+            "columns": [3, 4 if dropna else 7, 3],
+            "index": [1, 3 if dropna else 4, 1],
+            "all": [3, 7 if dropna else 10, 3],
         }
         counts[0] = counts["index"]
         counts[1] = counts["columns"]
@@ -374,8 +374,8 @@ def test_normalize_and_margins(self, dropna, normalize, a, b, c):
     @pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"])
     def test_normalize_margins_and_values(self, dropna, normalize, aggfunc, a, b, c):
         counts = {
-            "columns": [3, 29 if dropna else 41, 4],
-            "index": [1, 23 if dropna else 32, 3],
+            "columns": [3, 10 if dropna else 13, 3],
+            "index": [1, 5 if dropna else 6, 1],
             "all": [3, 54 if dropna else 75, 7],
         }
         counts[0] = counts["index"]
@@ -438,7 +438,7 @@ def eval_func(lib):
 
         with SqlCounter(
             query_count=1,
-            join_count=7 if dropna else 10,
+            join_count=3 if dropna else 4,
             union_count=1,
         ):
             eval_snowpark_pandas_result(
@@ -451,9 +451,9 @@ def eval_func(lib):
     @pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"])
     def test_normalize_and_values(self, dropna, normalize, aggfunc, a, b, c):
         counts = {
-            "columns": [2, 4 if dropna else 10],
-            "index": [1, 5 if dropna else 11],
-            "all": [2, 4 if dropna else 10],
+            "columns": [2, 4 if dropna else 6],
+            "index": [1, 3 if dropna else 4],
+            "all": [2, 4 if dropna else 6],
         }
         counts[0] = counts["index"]
         counts[1] = counts["columns"]
@@ -520,7 +520,7 @@ def test_normalize_margins_and_values_not_supported(
     @pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"])
     def test_values(self, dropna, aggfunc, basic_crosstab_dfs):
         query_count = 1
-        join_count = 2 if dropna else 5
+        join_count = 2 if dropna else 3
         native_df = basic_crosstab_dfs[0]
 
         with SqlCounter(query_count=query_count, join_count=join_count):
@@ -539,7 +539,7 @@ def test_values(self, dropna, aggfunc, basic_crosstab_dfs):
     @pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"])
     def test_values_series_like(self, dropna, aggfunc, basic_crosstab_dfs):
         query_count = 5
-        join_count = 2 if dropna else 5
+        join_count = 2 if dropna else 3
         native_df, snow_df = basic_crosstab_dfs
 
         def eval_func(df):

diff --git a/tests/integ/modin/frame/test_assign.py b/tests/integ/modin/frame/test_assign.py
@@ -60,7 +60,7 @@ def assign_func(df):
 
 @pytest.mark.parametrize("new_col_value", [2, [10, 11, 12], "x"])
 def test_assign_basic_non_pandas_object(new_col_value):
-    join_count = 4 if isinstance(new_col_value, list) else 1
+    join_count = 3 if isinstance(new_col_value, list) else 1
     with SqlCounter(query_count=1, join_count=join_count):
         snow_df, native_df = create_test_dfs(
             [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
@@ -74,7 +74,7 @@ def test_assign_basic_non_pandas_object(new_col_value):
         )
 
 
-@sql_count_checker(query_count=1, join_count=4)
+@sql_count_checker(query_count=1, join_count=3)
 def test_assign_invalid_long_column_length_negative():
     # pandas errors out in this test, since we are attempting to assign a column of length 5 to a DataFrame with length 3.
     # Snowpark pandas on the other hand, just truncates the last element of the new column so that it is the correct length. If we wanted
@@ -98,7 +98,7 @@ def test_assign_invalid_long_column_length_negative():
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
 
 
-@sql_count_checker(query_count=1, join_count=4)
+@sql_count_checker(query_count=1, join_count=3)
 def test_assign_invalid_short_column_length_negative():
     # pandas errors out in this test, since we are attempting to assign a column of length 2 to a DataFrame with length 3.
     # Snowpark pandas on the other hand, just broadcasts the last element of the new column so that it is filled. If we wanted
@@ -226,7 +226,7 @@ def test_assign_self_columns():
     )
 
 
-@sql_count_checker(query_count=1, join_count=4)
+@sql_count_checker(query_count=1, join_count=3)
 def test_overwrite_columns_via_assign():
     snow_df, native_df = create_test_dfs(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9]],

diff --git a/tests/integ/modin/frame/test_cache_result.py b/tests/integ/modin/frame/test_cache_result.py
@@ -130,7 +130,7 @@ def test_cache_result_simple(self, inplace):
         native_df = perform_chained_operations(
             native_pd.DataFrame(np.arange(15).reshape((3, 5))), native_pd
         )
-        with SqlCounter(query_count=1, union_count=29):
+        with SqlCounter(query_count=1, union_count=11):
             snow_df = perform_chained_operations(snow_df, pd)
             assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
                 snow_df, native_df
@@ -160,7 +160,7 @@ def test_cache_result_post_pivot(self, inplace, simple_test_data):
         native_df = perform_chained_operations(
             native_df.pivot_table(**pivot_kwargs), native_pd
         )
-        with SqlCounter(query_count=1, join_count=10, union_count=9):
+        with SqlCounter(query_count=1, join_count=1, union_count=9):
             snow_df = perform_chained_operations(snow_df, pd)
             assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
                 snow_df, native_df
@@ -213,7 +213,7 @@ def test_cache_result_post_applymap(self, inplace, simple_test_data):
         with SqlCounter(
             query_count=11,
             union_count=9,
-            udf_count=2,
+            udf_count=1,
             high_count_expected=True,
             high_count_reason="applymap requires additional queries to setup the UDF.",
         ):