spark-rapids icon indicating copy to clipboard operation
spark-rapids copied to clipboard

Fix tests failures in hash_aggregate_test.py

Open razajafri opened this issue 1 year ago • 1 comments

FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_agg_count
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_agg_nested_array
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_agg_nested_map
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_agg_nested_struct
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_arithmetic_reductions
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_computation_in_grpby_columns
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_count
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_count_distinct_with_nan_floats
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_decimal128_count_group_by
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_decimal128_count_reduction
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_distinct_count_reductions
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_distinct_float_count_reductions
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_exceptAll
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_generic_reductions
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_agg_force_pre_sort
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_agg_with_nan_keys
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_agg_with_struct_keys
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_aggregate_complete_with_grouping_expressions
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_avg_nulls_partial_only
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_count_with_filter
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_groupby_approx_percentile_byte_scalar
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_groupby_approx_percentile_decimal128_single
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_groupby_approx_percentile_decimal32_single
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_groupby_approx_percentile_decimal64_single
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_groupby_approx_percentile_long_single
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_groupby_collect_partial_replace_with_distinct_fallback
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_groupby_collect_set
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_groupby_collect_with_multi_distinct
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_groupby_collect_with_single_distinct
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_groupby_single_distinct_collect
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_grpby_avg
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_grpby_avg_nulls
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_grpby_pivot
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_grpby_sum
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_grpby_sum_count_action
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_grpby_sum_full_decimal
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_multiple_filters
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_multiple_grpby_pivot
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_multiple_mode_query
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_multiple_mode_query_avg_distincts
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_pivot_groupby_duplicates_fallback
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_query_max_with_multiple_distincts
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_query_multiple_distincts_with_non_distinct
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_reduction_avg_nulls
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_reduction_collect_set
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_reduction_decimal_overflow_sum
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_reduction_pivot
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_reduction_sum
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_reduction_sum_count_action
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_reduction_sum_full_decimal
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_intersectAll
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_reduction_nested_array
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_reduction_nested_map
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_reduction_nested_struct
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_struct_cast_groupby_count
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_struct_count_distinct_cast
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_subquery_in_agg

razajafri avatar Jun 08 '24 05:06 razajafri

Trying to tackle the biggish ones first. It looks like the majority of the problems here are with spark.sql.ansi.enabled=true. The tests are passing, with ANSI mode disabled:

=============== 1661 passed, 435 warnings in 1137.02s (0:18:57) ================

mythrocks avatar Jun 11 '24 21:06 mythrocks

New failures on this test with Spark-4.0 release jar:

FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_reduction_sum[false-{'spark.rapids.sql.variableFloatAgg.enabled': 'true', 'spark.rapids.sql.castStringToFloat.enabled': 'true', 'spark.rapids.sql.batchSizeBytes': '250', 'spark.rapids.sql.agg.skipAggPassReductionRatio': '0'}-Long][DATAGEN_SEED_OVERRIDE=0, TZ=UTC, IGNORE_ORDER, INCOMPAT, APPROXIMATE_FLOAT] - pyspark.errors.exceptions.captured.ArithmeticException: [ARITHMETIC_OVERFLOW] long overflow. Use 'try_add' to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. SQLSTATE: 22003
FAILED ../../../../integration_tests/src/main/python/hash_aggregate_test.py::test_hash_groupby_approx_percentile_partial_fallback_to_cpu[true-true][DATAGEN_SEED=1749849332, TZ=UTC, INJECT_OOM, IGNORE_ORDER({'local': True}), INCOMPAT, ALLOW_NON_GPU(TakeOrderedAndProjectExec,Alias,Cast,ObjectHashAggregateExec,AggregateExpression,ApproximatePercentile,Literal,ShuffleExchangeExec,HashPartitioning,CollectLimitExec)] - py4j.protocol.Py4JJavaError: An error occurred while calling z:org.apache.spark.sql.rapids.ExecutionPlanCaptureCallback.assertDidFallBack.
: java.lang.AssertionError: assertion failed: Could not find ApproximatePercentile in the GPU plans:
ResultQueryStage 1
+- ObjectHashAggregate(keys=[k#86480], functions=[approx_percentile(v#86481, [0.1,0.2], 10000, 0, 0)], output=[k#86480, approx_percentile(v, array(0.1, 0.2), 10000)#86483])
   +- AQEShuffleRead coalesced
      +- ShuffleQueryStage 0
         +- Exchange hashpartitioning(k#86480, 4), ENSURE_REQUIREMENTS, [plan_id=145464]
            +- ObjectHashAggregate(keys=[k#86480], functions=[partial_approx_percentile(v#86481, [0.1,0.2], 10000, 0, 0)], output=[k#86480, buf#86485])
               +- *(1) Scan ExistingRDD[k#86480,v#86481]

	at scala.Predef$.assert(Predef.scala:279)
	at org.apache.spark.sql.rapids.ShimmedExecutionPlanCaptureCallbackImpl.assertDidFallBack(ShimmedExecutionPlanCaptureCallbackImpl.scala:153)
	at org.apache.spark.sql.rapids.ShimmedExecutionPlanCaptureCallbackImpl.assertDidFallBack(ShimmedExecutionPlanCaptureCallbackImpl.scala:165)
	at org.apache.spark.sql.rapids.ExecutionPlanCaptureCallback$.assertDidFallBack(ExecutionPlanCaptureCallback.scala:91)
	at org.apache.spark.sql.rapids.ExecutionPlanCaptureCallback.assertDidFallBack(ExecutionPlanCaptureCallback.scala)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:568)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:184)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:108)
	at java.base/java.lang.Thread.run(Thread.java:833)


============================================== 69 failed, 3893 passed, 1542 warnings in 2405.59s (0:40:05) ===============================================

nartal1 avatar Jun 13 '25 21:06 nartal1