From f95531215b42cc755a0809b1f370301d05c8e71d Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 6 Nov 2025 07:15:03 -0700 Subject: [PATCH] rebase --- .../scala/org/apache/comet/CometConf.scala | 23 +++++++---- docs/source/user-guide/latest/configs.md | 2 +- .../apache/comet/ExtendedExplainInfo.scala | 41 ++++++++----------- .../apache/comet/rules/CometExecRule.scala | 4 +- .../comet/CometArrayExpressionSuite.scala | 9 ++-- .../apache/comet/exec/CometExecSuite.scala | 5 +-- .../spark/sql/CometTPCQueryListBase.scala | 1 - .../org/apache/spark/sql/CometTestBase.scala | 4 +- .../sql/comet/CometPlanStabilitySuite.scala | 1 - 9 files changed, 41 insertions(+), 49 deletions(-) diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala index d48d149728..4368da9b46 100644 --- a/common/src/main/scala/org/apache/comet/CometConf.scala +++ b/common/src/main/scala/org/apache/comet/CometConf.scala @@ -459,16 +459,21 @@ object CometConf extends ShimCometConf { .booleanConf .createWithDefault(false) - val COMET_EXPLAIN_VERBOSE_ENABLED: ConfigEntry[Boolean] = - conf("spark.comet.explain.verbose.enabled") + val COMET_EXTENDED_EXPLAIN_FORMAT_VERBOSE = "verbose" + val COMET_EXTENDED_EXPLAIN_FORMAT_FALLBACK = "fallback" + + val COMET_EXTENDED_EXPLAIN_FORMAT: ConfigEntry[String] = + conf("spark.comet.explain.format") .category(CATEGORY_EXEC_EXPLAIN) - .doc( - "When this setting is enabled, Comet's extended explain output will provide the full " + - "query plan annotated with fallback reasons as well as a summary of how much of " + - "the plan was accelerated by Comet. When this setting is disabled, a list of fallback " + - "reasons will be provided instead.") - .booleanConf - .createWithDefault(false) + .doc("Choose extended explain output. The default format of " + + s"'$COMET_EXTENDED_EXPLAIN_FORMAT_VERBOSE' will provide the full query plan annotated " + + "with fallback reasons as well as a summary of how much of the plan was accelerated " + + s"by Comet. The format '$COMET_EXTENDED_EXPLAIN_FORMAT_FALLBACK' provides a list of " + + "fallback reasons instead.") + .stringConf + .checkValues( + Set(COMET_EXTENDED_EXPLAIN_FORMAT_VERBOSE, COMET_EXTENDED_EXPLAIN_FORMAT_FALLBACK)) + .createWithDefault(COMET_EXTENDED_EXPLAIN_FORMAT_VERBOSE) val COMET_EXPLAIN_NATIVE_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.explain.native.enabled") diff --git a/docs/source/user-guide/latest/configs.md b/docs/source/user-guide/latest/configs.md index 6caaa53b1b..77c95ab0e3 100644 --- a/docs/source/user-guide/latest/configs.md +++ b/docs/source/user-guide/latest/configs.md @@ -82,9 +82,9 @@ These settings can be used to determine which parts of the plan are accelerated | Config | Description | Default Value | |--------|-------------|---------------| +| `spark.comet.explain.format` | Choose extended explain output. The default format of 'verbose' will provide the full query plan annotated with fallback reasons as well as a summary of how much of the plan was accelerated by Comet. The format 'fallback' provides a list of fallback reasons instead. | verbose | | `spark.comet.explain.native.enabled` | When this setting is enabled, Comet will provide a tree representation of the native query plan before execution and again after execution, with metrics. | false | | `spark.comet.explain.rules` | When this setting is enabled, Comet will log all plan transformations performed in physical optimizer rules. Default: false | false | -| `spark.comet.explain.verbose.enabled` | When this setting is enabled, Comet's extended explain output will provide the full query plan annotated with fallback reasons as well as a summary of how much of the plan was accelerated by Comet. When this setting is disabled, a list of fallback reasons will be provided instead. | false | | `spark.comet.explainFallback.enabled` | When this setting is enabled, Comet will provide logging explaining the reason(s) why a query stage cannot be executed natively. Set this to false to reduce the amount of logging. | false | | `spark.comet.logFallbackReasons.enabled` | When this setting is enabled, Comet will log warnings for all fallback reasons. | false | diff --git a/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala b/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala index b3f6579bdc..56ae64ed68 100644 --- a/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala +++ b/spark/src/main/scala/org/apache/comet/ExtendedExplainInfo.scala @@ -34,16 +34,26 @@ class ExtendedExplainInfo extends ExtendedExplainGenerator { override def title: String = "Comet" - override def generateExtendedInfo(plan: SparkPlan): String = { - if (CometConf.COMET_EXPLAIN_VERBOSE_ENABLED.get()) { - generateVerboseExtendedInfo(plan) - } else { - val info = getFallbackReasons(plan) - info.toSeq.sorted.mkString("\n").trim + def generateExtendedInfo(plan: SparkPlan): String = { + CometConf.COMET_EXTENDED_EXPLAIN_FORMAT.get() match { + case CometConf.COMET_EXTENDED_EXPLAIN_FORMAT_VERBOSE => + // Generates the extended info in a verbose manner, printing each node along with the + // extended information in a tree display. + val planStats = new CometCoverageStats() + val outString = new StringBuilder() + generateTreeString(getActualPlan(plan), 0, Seq(), 0, outString, planStats) + s"${outString.toString()}\n$planStats" + case CometConf.COMET_EXTENDED_EXPLAIN_FORMAT_FALLBACK => + // Generates the extended info as a list of fallback reasons + getFallbackReasons(plan).mkString("\n").trim } } - def getFallbackReasons(node: TreeNode[_]): Set[String] = { + def getFallbackReasons(plan: SparkPlan): Seq[String] = { + extensionInfo(plan).toSeq.sorted + } + + private[comet] def extensionInfo(node: TreeNode[_]): Set[String] = { var info = mutable.Seq[String]() val sorted = sortup(node) sorted.foreach { p => @@ -80,23 +90,6 @@ class ExtendedExplainInfo extends ExtendedExplainGenerator { ordered.reverse } - // generates the extended info in a verbose manner, printing each node along with the - // extended information in a tree display - def generateVerboseExtendedInfo(plan: SparkPlan): String = { - val planStats = new CometCoverageStats() - val outString = new StringBuilder() - generateTreeString(getActualPlan(plan), 0, Seq(), 0, outString, planStats) - s"${outString.toString()}\n$planStats" - } - - /** Get the coverage statistics without the full plan */ - def generateCoverageInfo(plan: SparkPlan): String = { - val planStats = new CometCoverageStats() - val outString = new StringBuilder() - generateTreeString(getActualPlan(plan), 0, Seq(), 0, outString, planStats) - planStats.toString() - } - // Simplified generateTreeString from Spark TreeNode. Appends explain info to the node if any def generateTreeString( node: TreeNode[_], diff --git a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala index 52bbc07dc6..1ba086784d 100644 --- a/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala +++ b/spark/src/main/scala/org/apache/comet/rules/CometExecRule.scala @@ -651,12 +651,12 @@ case class CometExecRule(session: SparkSession) extends Rule[SparkPlan] { // config is enabled) if (CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.get()) { val info = new ExtendedExplainInfo() - if (info.getFallbackReasons(newPlan).nonEmpty) { + if (info.extensionInfo(newPlan).nonEmpty) { logWarning( "Comet cannot execute some parts of this plan natively " + s"(set ${CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key}=false " + "to disable this logging):\n" + - s"${info.generateVerboseExtendedInfo(newPlan)}") + s"${info.generateExtendedInfo(newPlan)}") } } diff --git a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala index 3239bc020d..758311f1fb 100644 --- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala @@ -19,7 +19,6 @@ package org.apache.comet -import scala.collection.immutable.HashSet import scala.util.Random import org.apache.hadoop.fs.Path @@ -126,11 +125,11 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp spark.read.parquet(path.toString).createOrReplaceTempView("t1") sql("SELECT array(struct(_1, _2)) as a, struct(_1, _2) as b FROM t1") .createOrReplaceTempView("t2") - val expectedFallbackReasons = HashSet( - "data type not supported: ArrayType(StructType(StructField(_1,BooleanType,true),StructField(_2,ByteType,true)),false)") - checkSparkAnswerAndFallbackReasons( + val expectedFallbackReason = + "data type not supported: ArrayType(StructType(StructField(_1,BooleanType,true),StructField(_2,ByteType,true)),false)" + checkSparkAnswerAndFallbackReason( sql("SELECT array_remove(a, b) FROM t2"), - expectedFallbackReasons) + expectedFallbackReason) } } } diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala index 56174c7fc0..e33927c1ca 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala @@ -119,10 +119,7 @@ class CometExecSuite extends CometTestBase { val infos = new ExtendedExplainInfo().generateExtendedInfo(cometPlan) assert(infos.contains("Dynamic Partition Pruning is not supported")) - withSQLConf(CometConf.COMET_EXPLAIN_VERBOSE_ENABLED.key -> "true") { - val extendedExplain = new ExtendedExplainInfo().generateExtendedInfo(cometPlan) - assert(extendedExplain.contains("Comet accelerated")) - } + assert(infos.contains("Comet accelerated")) } } } diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCQueryListBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCQueryListBase.scala index 8cabb3a5bb..94915d0b82 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTPCQueryListBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCQueryListBase.scala @@ -87,7 +87,6 @@ trait CometTPCQueryListBase CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true", CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true", - CometConf.COMET_EXPLAIN_VERBOSE_ENABLED.key -> "true", // Lower bloom filter thresholds to allows us to simulate the plan produced at larger scale. "spark.sql.optimizer.runtime.bloomFilter.creationSideThreshold" -> "1MB", "spark.sql.optimizer.runtime.bloomFilter.applicationSideScanSizeThreshold" -> "1MB") { diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala index 1854edf590..df0064f19e 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala @@ -283,7 +283,7 @@ abstract class CometTestBase if (actualFallbacks.isEmpty) { fail( s"Expected fallback reason '$reason' but no fallback reasons were found. Explain: ${explainInfo - .generateVerboseExtendedInfo(cometPlan)}") + .generateExtendedInfo(cometPlan)}") } else { fail( s"Expected fallback reason '$reason' not found in [${actualFallbacks.mkString(", ")}]") @@ -375,7 +375,7 @@ abstract class CometTestBase assert( false, s"Expected only Comet native operators, but found ${op.nodeName}.\n" + - s"plan: ${new ExtendedExplainInfo().generateVerboseExtendedInfo(plan)}") + s"plan: ${new ExtendedExplainInfo().generateExtendedInfo(plan)}") case _ => } } diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala b/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala index 67d1c8240b..3c3264a819 100644 --- a/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala @@ -217,7 +217,6 @@ trait CometPlanStabilitySuite extends DisableAdaptiveExecutionSuite with TPCDSBa withSQLConf( CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key -> "true", - CometConf.COMET_EXPLAIN_VERBOSE_ENABLED.key -> "true", CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true",