OpenMLDB icon indicating copy to clipboard operation
OpenMLDB copied to clipboard

Wrong results in Memtable::GetRecordIdxCnt

Open Leowner opened this issue 2 years ago • 2 comments

Wrong results in Memtable::GetRecordIdxCnt.

  1. Here GetRecordIdxCnt will get RecordIdxCnt for idx. But inside this function, it will call segments_[real_idx][i]->GetIdxCnt();. Maybe int GetIdxCnt(uint32_t ts_idx, uint64_t& ts_cnt) should be used instead of GetIdxCnt() for this specific idx.
bool MemTable::GetRecordIdxCnt(uint32_t idx, uint64_t** stat, uint32_t* size) {
    if (stat == NULL) {
        return false;
    }
    std::shared_ptr<IndexDef> index_def = table_index_.GetIndex(idx);
    if (!index_def || !index_def->IsReady()) {
        return false;
    }
    auto* data_array = new uint64_t[seg_cnt_];
    uint32_t real_idx = index_def->GetInnerPos();
    for (uint32_t i = 0; i < seg_cnt_; i++) {
        data_array[i] = segments_[real_idx][i]->GetIdxCnt();
    }
    *stat = data_array;
    *size = seg_cnt_;
    return true;
}
  1. If create an index with two absolute timestamp columns, after SchedGC the GetRecordIdxCnt is not correct. In this test, the GetRecordIdxCnt should be 142, but the result now is 191.
SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 90, 0);
SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, 0);
SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kAbsoluteTime, 40, 0);

for (int i = 0; i < 100; i++) {
        uint64_t ts = now - (99 - i) * 60 * 1000;
        std::string ts_str = std::to_string(ts);

        std::vector<std::string> row = {"test"+ std::to_string(i / 10),
                                        "testnew"+ std::to_string(i / 10),
                                        ts_str,
                                        ts_str,
                                        ts_str};
        ::openmldb::api::PutRequest request;
        ::openmldb::api::Dimension* dim = request.add_dimensions();
        dim->set_idx(0);
        dim->set_key(row[0]);
        ::openmldb::api::Dimension* dim1 = request.add_dimensions();
        dim1->set_idx(1);
        dim1->set_key(row[1]);
        ::openmldb::api::Dimension* dim2 = request.add_dimensions();
        dim2->set_idx(2);
        dim2->set_key(row[1]);
        std::string value;
        ASSERT_EQ(0, codec.EncodeRow(row, &value));
        table->Put(0, value, request.dimensions());
    }

EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt());
table->SchedGc();
EXPECT_EQ(142, (int64_t)table->GetRecordIdxCnt());

Leowner avatar May 11 '22 08:05 Leowner

@dl239 pls help investigate this issue ?

aceforeverd avatar Jun 07 '22 08:06 aceforeverd

GetRecordIdxCnt get the first index count. so the value before gc is 100

dl239 avatar Oct 12 '22 03:10 dl239

I fixed the issue concerning RecordIdxCnt.

However, there seem to be more issues regarding GC itself. Two very similar test cases led to different results:

TEST_F(TableTest, ISSUE1799GcTest_FAIL) { // F
    common_header__
    SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString);
    SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString);
    SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt);
    SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt);
    SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt);
    SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 90, 0);
    SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "test", "ts2", ::openmldb::type::kAbsoluteTime, 50, 0);
    SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kAbsoluteTime, 40, 0);
    Table* table = CreateTable(table_meta, table_path);
    table->Init();
    codec::SDKCodec codec(table_meta);
    uint64_t now = ::baidu::common::timer::get_micros() / 1000;
    for (int i = 0; i < 100; i++) {
            uint64_t ts = now - (99 - i) * 60 * 1000;
            std::string ts_str = std::to_string(ts);
            std::vector<std::string> row = {"test"+ std::to_string(i / 10),
                                            "testnew"+ std::to_string(i / 10),
                                            ts_str,
                                            ts_str,
                                            ts_str};
            ::openmldb::api::PutRequest request;
            ::openmldb::api::Dimension* dim = request.add_dimensions();
            dim->set_idx(0);
            dim->set_key(row[0]);
            std::string value;
            ASSERT_EQ(0, codec.EncodeRow(row, &value));
            ASSERT_TRUE(table->Put(0, value, request.dimensions()));
        }
    EXPECT_EQ(100, (int64_t)table->GetRecordIdxCnt());
    table->SchedGc();
    EXPECT_EQ(91, (int64_t)table->GetRecordIdxCnt());
}

TEST_F(TableTest, ISSUE1799GcTest2_SUCCEED) { // S
    common_header__
    SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString);
    SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString);
    SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt);
    SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt);
    SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt);
    SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 90, 0);
    SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, 0);
    SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kAbsoluteTime, 40, 0);
    Table* table = CreateTable(table_meta, table_path);
    table->Init();
    codec::SDKCodec codec(table_meta);
    uint64_t now = ::baidu::common::timer::get_micros() / 1000;
    for (int i = 0; i < 100; i++) {
            uint64_t ts = now - (99 - i) * 60 * 1000;
            std::string ts_str = std::to_string(ts);
            std::vector<std::string> row = {"test"+ std::to_string(i / 10),
                                            "testnew"+ std::to_string(i / 10),
                                            ts_str,
                                            ts_str,
                                            ts_str};
            ::openmldb::api::PutRequest request;
            ::openmldb::api::Dimension* dim = request.add_dimensions();
            dim->set_idx(0);
            dim->set_key(row[0]);
            std::string value;
            ASSERT_EQ(0, codec.EncodeRow(row, &value));
            ASSERT_TRUE(table->Put(0, value, request.dimensions()));
        }
    EXPECT_EQ(100, (int64_t)table->GetRecordIdxCnt());
    table->SchedGc();
    EXPECT_EQ(91, (int64_t)table->GetRecordIdxCnt());
}

jkpjkpjkp avatar Oct 17 '22 03:10 jkpjkpjkp

fixed in #2719

dl239 avatar Nov 03 '22 11:11 dl239