Answers for flight hard dataset appear to be incorrect
Here is the sql for the first 10 questions about flight delays. I use the DOT definition where a flight is considered delayed if it is more that 15 after scheduled arrival time. I omit Cancelled flights. Question 0000 could be a decimal point error but the rest aren't even close.
"hard-flight-0000", "question": "What percentage of the flights from PIT were delayed on 2022-05-17?", "answer": "1.7%"
SELECT ROUND( (SUM(CASE WHEN CAST(ArrDelay AS DECIMAL) >= 15 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1 ) AS DelayedPercentage FROM flights.flights_data WHERE Origin = 'PIT' AND FlightDate = '2022-05-17' AND Cancelled = '0'
16.8
"hard-flight-0001", "question": "What percentage of the flights from JAC were delayed on 2022-02-24?", "answer": "11.1%"
SELECT ROUND( (SUM(CASE WHEN CAST(ArrDelay AS DECIMAL) >= 15 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1 ) AS DelayedPercentage FROM flights.flights_data WHERE Origin = 'JAC' AND FlightDate = '2022-02-24' AND Cancelled = '0'
12.5
"hard-flight-0002", "question": "What percentage of the flights from DEN were delayed on 2022-02-02?", "answer": "15.0%"
SELECT COUNT() AS TotalFlights FROM flights.flights_data WHERE Origin = 'DEN' AND FlightDate = '2022-02-02' AND Cancelled='0' SELECT COUNT() AS DelayedFlights FROM flights.flights_data WHERE Origin = 'DEN' AND FlightDate = '2022-02-02' AND CAST(ArrDelay AS DECIMAL) >= 15 AND Cancelled='0'
373 / 583
SELECT ROUND( (SUM(CASE WHEN CAST(ArrDelay AS DECIMAL) >= 15 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1 ) AS DelayedPercentage FROM flights.flights_data WHERE Origin = 'DEN' AND FlightDate = '2022-02-02' AND Cancelled = '0'
64.0
"hard-flight-0003", "question": "What percentage of the flights from HPN were delayed on 2022-01-30?", "answer": "37.1%"
SELECT ROUND( (SUM(CASE WHEN CAST(ArrDelay AS DECIMAL) >= 15 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1 ) AS DelayedPercentage FROM flights.flights_data WHERE Origin = 'HPN' AND FlightDate = '2022-01-30' AND Cancelled = '0'
22.7
"hard-flight-0004", "question": "What percentage of the flights from STT were delayed on 2022-03-19?", "answer": "4.0%"
SELECT COUNT() AS TotalFlights FROM flights.flights_data WHERE Origin = 'STT' AND FlightDate = '2022-03-19' AND Cancelled='0' SELECT COUNT() AS DelayedFlights FROM flights.flights_data WHERE Origin = 'STT' AND FlightDate = '2022-03-19' AND CAST(ArrDelay AS DECIMAL) >= 15 AND Cancelled='0'
14 / 24
SELECT Flight_Number_Marketing_Airline, Cancelled, DepDelay, ArrDelay, ArrDelayMinutes FROM flights.flights_data WHERE Origin = 'STT' AND FlightDate = '2022-03-19' AND CAST(ArrDelay AS DECIMAL) >= 15
SELECT ROUND( (SUM(CASE WHEN CAST(ArrDelay AS DECIMAL) >= 15 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1 ) AS DelayedPercentage FROM flights.flights_data WHERE Origin = 'STT' AND FlightDate = '2022-03-19' AND Cancelled = '0'
58.3
"hard-flight-0005", "question": "What percentage of the flights from ORD were delayed on 2022-06-13?", "answer": "6.9%"
SELECT ROUND( (SUM(CASE WHEN CAST(ArrDelay AS DECIMAL) >= 15 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1 ) AS DelayedPercentage FROM flights.flights_data WHERE Origin = 'ORD' AND FlightDate = '2022-06-13' AND Cancelled = '0'
38.6
"hard-flight-0006", "question": "What percentage of the flights from LAW were delayed on 2022-04-24?", "answer": "50.0%"
SELECT COUNT() AS TotalFlights FROM flights.flights_data WHERE Origin = 'LAW' AND FlightDate = '2022-04-24' AND Cancelled='0' SELECT COUNT() AS DelayedFlights FROM flights.flights_data WHERE Origin = 'LAW' AND FlightDate = '2022-04-24' AND CAST(ArrDelay AS DECIMAL) >= 15 AND Cancelled='0'
0 / 2 1 of 2 flights was 4 minutes late
SELECT Flight_Number_Marketing_Airline, Cancelled, DepDelay, ArrDelay, ArrDelayMinutes FROM flights.flights_data WHERE Origin = 'LAW' AND FlightDate = '2022-04-24' AND CAST(ArrDelay AS DECIMAL) >= 15 SELECT Flight_Number_Marketing_Airline, Cancelled, DepDelay, ArrDelay, ArrDelayMinutes FROM flights.flights_data WHERE Origin = 'LAW' AND FlightDate = '2022-04-24'
SELECT ROUND( (SUM(CASE WHEN CAST(ArrDelay AS DECIMAL) >= 15 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1 ) AS DelayedPercentage FROM flights.flights_data WHERE Origin = 'LAW' AND FlightDate = '2022-04-24' AND Cancelled = '0'
0
"hard-flight-0007", "question": "What percentage of the flights from PIT were delayed on 2022-01-28?", "answer": "9.6%"
SELECT COUNT() AS TotalFlights FROM flights.flights_data WHERE Origin = 'PIT' AND FlightDate = '2022-01-28' AND Cancelled='0' SELECT COUNT() AS DelayedFlights FROM flights.flights_data WHERE Origin = 'PIT' AND FlightDate = '2022-01-28' AND CAST(ArrDelay AS DECIMAL) >= 15 AND Cancelled='0'
65 / 104 1 of 2 flights was 4 minutes late
SELECT Flight_Number_Marketing_Airline, Cancelled, DepDelay, ArrDelay, ArrDelayMinutes FROM flights.flights_data WHERE Origin = 'PIT' AND FlightDate = '2022-01-28' AND CAST(ArrDelay AS DECIMAL) >= 15 AND Cancelled='0' SELECT Flight_Number_Marketing_Airline, Cancelled, DepDelay, ArrDelay, ArrDelayMinutes FROM flights.flights_data WHERE Origin = 'PIT' AND FlightDate = '2022-01-28' AND Cancelled='0'
SELECT ROUND( (SUM(CASE WHEN CAST(ArrDelay AS DECIMAL) >= 15 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1 ) AS DelayedPercentage FROM flights.flights_data WHERE Origin = 'PIT' AND FlightDate = '2022-01-28' AND Cancelled = '0'
62.5
"hard-flight-0008", "question": "What percentage of the flights from MDW were delayed on 2022-01-04?", "answer": "13.8%"
SELECT ROUND( (SUM(CASE WHEN CAST(ArrDelay AS DECIMAL) >= 15 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1 ) AS DelayedPercentage FROM flights.flights_data WHERE Origin = 'MDW' AND FlightDate = '2022-01-04' AND Cancelled = '0'
64.9
"hard-flight-0009", "question": "What percentage of the flights from CLE were delayed on 2022-04-09?", "answer": "4.8%"
SELECT ROUND( (SUM(CASE WHEN CAST(ArrDelay AS DECIMAL) >= 15 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 1 ) AS DelayedPercentage FROM flights.flights_data WHERE Origin = 'CLE' AND FlightDate = '2022-04-09' AND Cancelled = '0'