spark-rapids
spark-rapids copied to clipboard
[FEA] Support First() in windowing context with double type
Is your feature request related to a problem? Please describe. This is a feature request to support first() function in windowing context with double type.
For example:
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import org.apache.spark.sql.functions._
val data = Seq(
Row(Row("Adam ","","Green"),"1","M",1000.1, "2019-01-01",List("Java","Scala")),
Row(Row("Bob ","Middle","Green"),"2","M",2000.2, "2019-01-02",List("Java","Python")),
Row(Row("Cathy ","","Green"),"3","F",3000.3, "2019-01-03",List())
)
val schema = (new StructType()
.add("name",new StructType()
.add("firstname",StringType)
.add("middlename",StringType)
.add("lastname",StringType))
.add("id",StringType)
.add("gender",StringType)
.add("salary",DoubleType)
.add("birthdayStr",StringType)
.add("language",ArrayType(StringType))
)
val df = spark.createDataFrame(spark.sparkContext.parallelize(data),schema)
df.withColumn("birthday", to_date(col("birthdayStr"))).write.format("parquet").mode("overwrite").save("/tmp/testparquet")
val df2 = spark.read.parquet("/tmp/testparquet")
df2.createOrReplaceTempView("df2")
df2.printSchema
sql("""SELECT gender, first(salary,true) OVER (PARTITION BY gender ORDER BY salary) FROM df2""").collect
@res-life can you keep me updated for this feature once it is done? Thx.
@res-life can you keep me updated for this feature once it is done? Thx.
OK