disk.frame
disk.frame copied to clipboard
debug this! bloomfilter feature
library(disk.frame)
a = data.frame(id = sample(1:100, 1000, replace=TRUE), values = runif(1000))
adf = as.disk.frame(a, nchunks = 6)
adf_sharded = adf %>%
mutate(rand_chunk = sample(1:2,n(), replace=TRUE)) %>% # create a new column to sharding into sub-shards
shard(shardby = c("id", "rand_chunk"))
adf_with_bloomfilter = adf_sharded %>%
make_bloomfilter("id")
adf_with_bloomfilter %>%
bf_likely_in_chunks("id", 1)
adf_with_bloomfilter %>%
use_bloom_filter("id", 1) %>%
collect
a = data.frame(id3 = sample(letters, 1000, replace=TRUE), values = runif(1000))
adf = as.disk.frame(a, nchunks = 6)
adf_sharded = adf %>%
#mutate(rand_chunk = sample(1:2,n(), replace=TRUE)) %>% # create a new column to sharding into sub-shards
#shard(shardby = c("id3", "rand_chunk"))
shard(shardby = c("id3"))
df = adf_sharded %>%
make_bloomfilter("id3")
df %>%
bf_likely_in_chunks("id3", "a")
df %>%
use_bloom_filter("id3", "a") %>%
collect
this works
df = nycflights13::flights %>% as.disk.frame(shardby = c("carrier"))
make_bloomfilter(df, "carrier")
expect_true(length(bf_likely_in_chunks(df, "carrier", "UA")) == 1)
use_bloom_filter(df, "carrier", "UA") %>% collect