dolphindb-vs-clickhouse
dolphindb-vs-clickhouse copied to clipboard
Your CH does not use partition pruning. So CH reads data for all days.
CREATE TABLE IF NOT EXISTS taq_local
(
`symbol` String,
`date` Date,
`time` DateTime,
`bid` Float64,
`ofr` Float64,
`bidSiz` Int32,
`ofrsiz` Int32,
`mode` Int32,
`ex` FixedString(1),
`mmid` Nullable(String)
)
ENGINE = MergeTree()
PARTITION BY toYYYYMMDD(time)
ORDER BY symbol
SETTINGS index_granularity = 8192;
insert into taq_local(symbol, time) select toString(number%23), toDateTime('2007-08-01 00:00:00') + intDiv(number, 100)
from numbers(300000000);
your query
SELECT count()
FROM taq_local
WHERE (symbol IN ('1', '2', '3'))
AND toDate(time) BETWEEN '2007-08-03' AND '2007-08-07'
┌─count()─┐
│ 5634783 │
└─────────┘
1 rows in set. Elapsed: 0.055 sec. Processed 29.81 million rows, 418.38 MB (545.28 million rows/s., 7.65 GB/s.)
proper query
SELECT count()
FROM taq_local
WHERE symbol IN ('1', '2', '3')
and time >= '2007-08-03 00:00:00' AND time < '2007-08-08 00:00:00'
┌─count()─┐
│ 5634783 │
└─────────┘
1 rows in set. Elapsed: 0.014 sec. Processed 6.16 million rows, 86.61 MB (428.83 million rows/s., 6.03 GB/s.)
Or
SELECT count()
FROM taq_local
WHERE (symbol IN ('1', '2', '3')) and toYYYYMMDD(time) between toYYYYMMDD(toDate('2007-08-03')) AND toYYYYMMDD(toDate('2007-08-07'))
┌─count()─┐
│ 5634783 │
└─────────┘
1 rows in set. Elapsed: 0.018 sec. Processed 6.16 million rows, 86.61 MB (347.28 million rows/s., 4.88 GB/s.)
As you can see there is huge difference
Processed 29.81 million rows
vs Processed 6.16 million rows
https://stackoverflow.com/a/60155974/11644308