Unexpected results using SPARQL GROUP BY
select * where { graph ?g {
?o a sosa:Observation ;
sosa:resultTime ?t ;
sosa:observedProperty ?prop ;
sosa:hasResult [
qudt-1-1:numericValue ?val ;
qudt-1-1:unit ?unit ] .
filter (?o = ex:obs16 || ?o = ex:obs17)
}}
| g | o | t | prop | val | unit |
|---|---|---|---|---|---|
| http://www.example.org | http://www.example.org/obs16 | 2014-01-01T08:02:00 | sst | 15.6 | Deg C |
| http://www.example.org | http://www.example.org/obs17 | 2014-01-01T08:33:00 | sst | 15.6 | Deg C |
select ?prop avg(?val) as ?avg where { graph ?g {
?o a sosa:Observation ;
sosa:resultTime ?t ;
sosa:observedProperty ?prop ;
sosa:hasResult [
qudt-1-1:numericValue ?val ;
qudt-1-1:unit ?unit ] .
filter (?o = ex:obs16 || ?o = ex:obs17)
}} group by ?prop
| prop | avg |
|---|---|
| sst | 15.60000038146973 |
Any ideas? Not sure where to start.
Continuing further, strangely enough...
select ?prop ?val where { graph ?g {
?o a sosa:Observation ;
sosa:resultTime ?t ;
sosa:observedProperty ?prop ;
sosa:hasResult [
qudt-1-1:numericValue ?val ;
qudt-1-1:unit ?unit ] .
filter (?o = ex:obs16 || ?o = ex:obs17)
}} group by ?prop ?val ?unit
results in:
| prop | val |
|---|---|
| sst | 15.6 |
| sst | 0.0 |
while ...
select ?prop ?val where { graph ?g {
?o a sosa:Observation ;
sosa:resultTime ?t ;
sosa:observedProperty ?prop ;
sosa:hasResult [
qudt-1-1:numericValue ?val ;
qudt-1-1:unit ?unit ] .
filter (?o = ex:obs16 || ?o = ex:obs17)
}} group by ?prop ?val
results in:
| prop | val |
|---|---|
| sst | 15.6 |
| sst | -9.49985e+35 |
and ...
select ?prop ?val ?unit where { graph ?g {
?o a sosa:Observation ;
sosa:resultTime ?t ;
sosa:observedProperty ?prop ;
sosa:hasResult [
qudt-1-1:numericValue ?val ;
qudt-1-1:unit ?unit ] .
filter (?o = ex:obs16 || ?o = ex:obs17)
}} group by ?prop ?val ?unit
results in:
| prop | val | unit |
|---|---|---|
| sst | 15.6 | Deg C |
| sst | -1.66102e-31 | Deg C |
Leading to a little confusion??
Sample data (.n3.gz, loaded via rdf_loader_run):
ex:obs16 a sosa:Observation ;
sosa:resultTime "2014-01-01T08:02:00"^^xsd:dateTime ;
sosa:observedProperty "sst"^^xsd:string ;
sosa:hasResult ex:result-sst-m-15.6 .
ex:result-sst-m-15.6 a sosa:Result;
qudt-1-1:numericValue "15.6"^^xsd:float ;
qudt-1-1:unit "m"^^xsd:string .
ex:obs17 a sosa:Observation ;
sosa:resultTime "2014-01-01T08:33:00"^^xsd:dateTime ;
sosa:observedProperty "sst"^^xsd:string ;
sosa:hasResult ex:result-sst-m-15.6 .
ex:result-sst-m-15.6 a sosa:Result;
qudt-1-1:numericValue "15.6"^^xsd:float ;
qudt-1-1:unit "m"^^xsd:string .
Trying to deduce what's happening from SPARQL query text and results tables is challenging. Thus, do you have any of the following:
- A live SPARQL endpoint?
- A little RDF data sampling -- which you can produce using a
CONSTRUCTQuery - Ditto, but from an RDF document you may already have in place
Alternatively, are you able to recreate this issue using our live instance at https://linkeddata.uriburner.com/sparql or even https://dbpedia.org/sparql?
Agreed. Just edited my original comment to add sample data at the same time that you posted.
Sample data (.n3.gz, loaded via rdf_loader_run):
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix sosa: <http://www.w3.org/ns/sosa/>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix ex: <http://www.example.com/>
prefix qudt-1-1: <http://qudt.org/1.1/schema/qudt#>
prefix qudt-unit-1-1: <http://qudt.org/1.1/vocab/unit#>
ex:obs16 a sosa:Observation ;
sosa:resultTime "2014-01-01T08:02:00"^^xsd:dateTime ;
sosa:observedProperty "sst"^^xsd:string ;
sosa:hasResult ex:result-sst-m-15.6 .
ex:result-sst-m-15.6 a sosa:Result;
qudt-1-1:numericValue "15.6"^^xsd:float ;
qudt-1-1:unit "m"^^xsd:string .
ex:obs17 a sosa:Observation ;
sosa:resultTime "2014-01-01T08:33:00"^^xsd:dateTime ;
sosa:observedProperty "sst"^^xsd:string ;
sosa:hasResult ex:result-sst-m-15.6 .
ex:result-sst-m-15.6 a sosa:Result;
qudt-1-1:numericValue "15.6"^^xsd:float ;
qudt-1-1:unit "m"^^xsd:string .
The only issue I can think of is that Result nodes are duplicated in the RDF, simply because it made the original data processing task easier. Perhaps this could be causing an issue?
select ?prop ?val ?unit where {
select ?prop ?val ?unit where { graph ?g {
?o a sosa:Observation ;
sosa:resultTime ?t ;
sosa:observedProperty ?prop ;
sosa:hasResult [
qudt-1-1:numericValue ?val ;
qudt-1-1:unit ?unit ] .
filter (?o = ex:obs16 || ?o = ex:obs17)
}}
} group by ?prop ?val ?unit
| prop | val | unit |
|---|---|---|
| sst | 15.6 | Deg C |
| sst | 123.553 | Deg C |
select ?prop ?val ?unit where { graph ?g {
select ?prop ?val ?unit where {
?o a sosa:Observation ;
sosa:resultTime ?t ;
sosa:observedProperty ?prop ;
sosa:hasResult [
qudt-1-1:numericValue ?val ;
qudt-1-1:unit ?unit ] .
filter (?o = ex:obs16 || ?o = ex:obs17)
}}
} group by ?prop ?val ?unit
| prop | val | unit |
|---|---|---|
| sst | 15.6 | Deg C |
| sst | NaN | Deg C |
select ?prop ?val ?unit where {
select ?prop ?val ?unit where {
?o a sosa:Observation ;
sosa:resultTime ?t ;
sosa:observedProperty ?prop ;
sosa:hasResult [
qudt-1-1:numericValue ?val ;
qudt-1-1:unit ?unit ] .
filter (?o = ex:obs16 || ?o = ex:obs17)
}
} group by ?prop ?val ?unit
| prop | val | unit |
|---|---|---|
| sst | 15.6 | Deg C |
| sst | -9.46663e+35 | Deg C |
And ...
select ?prop ?val ?unit ?g where {
select ?prop ?val ?unit where { graph ?g {
?o a sosa:Observation ;
sosa:resultTime ?t ;
sosa:observedProperty ?prop ;
sosa:hasResult [
qudt-1-1:numericValue ?val ;
qudt-1-1:unit ?unit ] .
filter (?o = ex:obs16 || ?o = ex:obs17)
}}
} group by ?prop ?val ?unit
| prop | val | unit |
|---|---|---|
| sst | 15.6 | Deg C |
| sst | NaN | Deg C |
select ?prop ?val ?unit ?g where {
select ?prop ?val ?unit where { graph ?g {
?o a sosa:Observation ;
sosa:resultTime ?t ;
sosa:observedProperty ?prop ;
sosa:hasResult [
qudt-1-1:numericValue ?val ;
qudt-1-1:unit ?unit ] .
filter (?o = ex:obs16 || ?o = ex:obs17)
}}
} group by ?prop ?val ?unit ?g
| prop | val | unit |
|---|---|---|
| sst | 15.6 | Deg C |
| sst | 0.0 | Deg C |
By changing from xsd:float to xsd:decimal to represent the result, all of the above queries return successfully:
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix sosa: <http://www.w3.org/ns/sosa/>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix ex: <http://www.example.com/>
prefix qudt-1-1: <http://qudt.org/1.1/schema/qudt#>
prefix qudt-unit-1-1: <http://qudt.org/1.1/vocab/unit#>
ex:obs16 a sosa:Observation ;
sosa:resultTime "2014-01-01T08:02:00"^^xsd:dateTime ;
sosa:observedProperty "sst"^^xsd:string ;
sosa:hasResult ex:result-sst-m-15.6 .
ex:result-sst-m-15.6 a sosa:Result;
qudt-1-1:numericValue "15.6"^^xsd:decimal ;
qudt-1-1:unit "m"^^xsd:string .
ex:obs17 a sosa:Observation ;
sosa:resultTime "2014-01-01T08:33:00"^^xsd:dateTime ;
sosa:observedProperty "sst"^^xsd:string ;
sosa:hasResult ex:result-sst-m-15.6 .
ex:result-sst-m-15.6 a sosa:Result;
qudt-1-1:numericValue "15.6"^^xsd:decimal ;
qudt-1-1:unit "m"^^xsd:string .
So is there a recommendation against use of xsd:float?