Gadfly.jl icon indicating copy to clipboard operation
Gadfly.jl copied to clipboard

Error when stacking DataFrame during plot according to the documentation.

Open nathanrboyer opened this issue 2 years ago • 2 comments

I am trying to follow the documentation below.

In some cases, explicitly transforming the data can be burdensome. Gadfly lets you avoid this by referring to columns or groups of columns in an implicit long-form version of the data.

plot(births, x=:Year, y=Col.value(:Males, :Females),
    color=Col.index(:Males, :Females), Geom.line)

However, I get an error using the versions of this method below.

julia> df = outerjoin(yield_table, ultimate_table, on = "Temperature (°F)") |> sort!
15×3 DataFrame
 Row │ Temperature (°F)  Yield Strength (psi)  Ultimate Strength (psi) 
     │ Int64             Float64?              Float64?
─────┼─────────────────────────────────────────────────────────────────
   1 │              100              120000.0                 135000.0
   2 │              150              117000.0                missing
   3 │              200              115200.0                 135000.0
   4 │              250              113600.0                missing
   5 │              300              112200.0                 135000.0
   6 │              400              110300.0                 135000.0
   7 │              500              108800.0                 135000.0
   8 │              600              106700.0                 135000.0
   9 │              650              105200.0                 132900.0
  10 │              700              103200.0                 129700.0
  11 │              750              100800.0                 125800.0
  12 │              800               97900.0                 121300.0
  13 │              850               97100.0                missing
  14 │              900               95200.0                missing
  15 │              950               93400.0                missing
julia> plot(df,
           x="Temperature (°F)",
           y=Col.value("Yield Strength (psi)", "Ultimate Strength (psi)"),
           color=Col.index("Yield Strength (psi)", "Ultimate Strength (psi)")
           )
ERROR: MethodError: no method matching value(::String, ::String)
Stacktrace:
 [1] top-level scope
   @ REPL[130]:1
julia> plot(df,
           x="Temperature (°F)",
           y=Col.value(Symbol("Yield Strength (psi)"), Symbol("Ultimate Strength (psi)")),
           color=Col.index(Symbol("Yield Strength (psi)"), Symbol("Ultimate Strength (psi)"))
           )
ERROR: MethodError: Cannot `convert` an object of type Type{Union{Missing, Float64}} to an object of type DataType
Closest candidates are:
  convert(::Type{T}, ::T) where T at C:\Users\nboyer.AIP\.julia\juliaup\julia-1.7.2+0~x64\share\julia\base\essentials.jl:218
Stacktrace:
 [1] setindex!(h::Dict{Symbol, DataType}, v0::Type, key::Symbol)
   @ Base .\dict.jl:381
 [2] Dict{Symbol, DataType}(kv::Base.Generator{Base.Iterators.Zip{Tuple{Vector{Symbol}, Vector{Type}}}, Gadfly.var"#139#141"})
   @ Base .\dict.jl:104
 [3] meltdata(U::DataFrame, colgroups::Vector{Gadfly.Col.GroupedColumn})
   @ Gadfly C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\dataframes.jl:23
 [4] evalmapping!(mapping::Dict{Symbol, Any}, data_source::DataFrame, data::Gadfly.Data)
   @ Gadfly C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\mapping.jl:228
 [5] plot
   @ C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\Gadfly.jl:327 [inlined]
 [6] #plot#77
   @ C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\Gadfly.jl:289 [inlined]
 [7] top-level scope
   @ REPL[132]:1
julia> plot(df,
           x="Temperature (°F)",
           y=Col.value(),
           color=Col.index()
           )
ERROR: MethodError: Cannot `convert` an object of type Type{Union{Missing, Float64}} to an object of type DataType
Closest candidates are:
  convert(::Type{T}, ::T) where T at C:\Users\nboyer.AIP\.julia\juliaup\julia-1.7.2+0~x64\share\julia\base\essentials.jl:218
Stacktrace:
 [1] setindex!(h::Dict{Symbol, DataType}, v0::Type, key::Symbol)
   @ Base .\dict.jl:381
 [2] Dict{Symbol, DataType}(kv::Base.Generator{Base.Iterators.Zip{Tuple{Vector{Symbol}, Vector{Type}}}, Gadfly.var"#139#141"})
   @ Base .\dict.jl:104
 [3] meltdata(U::DataFrame, colgroups::Vector{Gadfly.Col.GroupedColumn})
   @ Gadfly C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\dataframes.jl:23
 [4] evalmapping!(mapping::Dict{Symbol, Any}, data_source::DataFrame, data::Gadfly.Data)
   @ Gadfly C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\mapping.jl:228
 [5] plot
   @ C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\Gadfly.jl:327 [inlined]
 [6] #plot#77
   @ C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\Gadfly.jl:289 [inlined]
 [7] top-level scope
   @ REPL[131]:1

These other two methods work, but aren't as elegant.

plot(df,
    Guide.ylabel("Strength (psi)"),
    layer(x="Temperature (°F)", y="Yield Strength (psi)", color=["Yield Strength (psi)"]),
    layer(x="Temperature (°F)", y="Ultimate Strength (psi)", color=["Ultimate Strength (psi)"]))
plot(stack(df, ["Yield Strength (psi)", "Ultimate Strength (psi)"]),
    Guide.ylabel("Strength (psi)"),
    Guide.colorkey("Color"),
    x="Temperature (°F)",
    y=:value,
    color=:variable
    )

Is there not an easier way to plot two columns with an automatic legend?

nathanrboyer avatar Apr 28 '22 19:04 nathanrboyer

does it work if you data doesn't have missing values?

bjarthur avatar Jun 11 '22 12:06 bjarthur

Removing the missing values gives the same error for attempt 1, fixes attempt 2, and yields a different error for attempt 3. The documented approach does not work with String column names (attempt 1). Converting them to symbols (attempt 2) works as long as there are no missing values, but this does not work implicitly (attempt 3).

julia> df = DataFrame("Temperature (°F)" => [100,200],
                       "Yield Strength (psi)" => [130000.0, 125000.0],
                       "Ultimate Strength (psi)" => [145000.0, 145000.0])
2×3 DataFrame
 Row │ Temperature (°F)  Yield Strength (psi)  Ultimate Strength (psi) 
     │ Int64             Float64               Float64
─────┼─────────────────────────────────────────────────────────────────
   1 │              100              130000.0                 145000.0
   2 │              200              125000.0                 145000.0

julia> plot(df,
                  x="Temperature (°F)",
                  y=Col.value("Yield Strength (psi)", "Ultimate Strength (psi)"),
                  color=Col.index("Yield Strength (psi)", "Ultimate Strength (psi)")
                  )
ERROR: MethodError: no method matching value(::String, ::String)
Stacktrace:
 [1] top-level scope
   @ REPL[10]:1

julia> plot(df,
                  x="Temperature (°F)",
                  y=Col.value(Symbol("Yield Strength (psi)"), Symbol("Ultimate Strength (psi)")),
                  color=Col.index(Symbol("Yield Strength (psi)"), Symbol("Ultimate Strength (psi)"))
                  ) # Works if no missing values

julia> plot(df,
                  x="Temperature (°F)",
                  y=Col.value(),
                  color=Col.index()
                             )
ERROR: KeyError: key Symbol("Temperature (°F)") not found
Stacktrace:
 [1] getindex(h::Dict{Any, Int64}, key::Symbol)
   @ Base .\dict.jl:481
 [2] evalmapping(source::Gadfly.MeltedData{DataFrame}, arg::String)
   @ Gadfly C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\mapping.jl:196
 [3] _evalmapping!(mapping::Dict{Symbol, Any}, data_source::Gadfly.MeltedData{DataFrame}, data::Gadfly.Data)
   @ Gadfly C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\mapping.jl:204
 [4] evalmapping!(mapping::Dict{Symbol, Any}, data_source::DataFrame, data::Gadfly.Data)
   @ Gadfly C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\mapping.jl:233
 [5] plot
   @ C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\Gadfly.jl:327 [inlined]
 [6] #plot#77
   @ C:\Users\nboyer.AIP\.julia\packages\Gadfly\B5yQc\src\Gadfly.jl:289 [inlined]
 [7] top-level scope
   @ REPL[12]:1

nathanrboyer avatar Jun 13 '22 13:06 nathanrboyer