spark-rapids icon indicating copy to clipboard operation
spark-rapids copied to clipboard

[FEA] Support First() in windowing context with double type

Open res-life opened this issue 2 years ago • 2 comments

Is your feature request related to a problem? Please describe. This is a feature request to support first() function in windowing context with double type.

For example:

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import org.apache.spark.sql.functions._

val data = Seq(
    Row(Row("Adam ","","Green"),"1","M",1000.1, "2019-01-01",List("Java","Scala")),
    Row(Row("Bob ","Middle","Green"),"2","M",2000.2, "2019-01-02",List("Java","Python")),
    Row(Row("Cathy ","","Green"),"3","F",3000.3, "2019-01-03",List())
)

val schema = (new StructType()
  .add("name",new StructType()
    .add("firstname",StringType)
    .add("middlename",StringType)
    .add("lastname",StringType)) 
  .add("id",StringType)
  .add("gender",StringType)
  .add("salary",DoubleType)
  .add("birthdayStr",StringType)
  .add("language",ArrayType(StringType))
             )

val df = spark.createDataFrame(spark.sparkContext.parallelize(data),schema)
df.withColumn("birthday", to_date(col("birthdayStr"))).write.format("parquet").mode("overwrite").save("/tmp/testparquet")
val df2 = spark.read.parquet("/tmp/testparquet")
df2.createOrReplaceTempView("df2")
df2.printSchema

sql("""SELECT gender, first(salary,true) OVER (PARTITION BY gender ORDER BY salary) FROM df2""").collect


res-life avatar Aug 02 '22 01:08 res-life

@res-life can you keep me updated for this feature once it is done? Thx.

viadea avatar Aug 05 '22 19:08 viadea

@res-life can you keep me updated for this feature once it is done? Thx.

OK

res-life avatar Aug 11 '22 02:08 res-life