datahub
datahub copied to clipboard
Postgres partition table support
Describe the bug Current Postgres ingestion has some bugs:
- UUID datatype: unable to map type UUID() to metadata schema'
- If table has partitions - every partition had ingested as separate table to metadata
To Reproduce Create and ingest table with this DDL:
create table test_partitioned_0_20000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('0') TO ('20000000');
create table test_partitioned_20000000_40000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('20000000') TO ('40000000');
create table test_partitioned_40000000_60000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('40000000') TO ('60000000');
create table test_partitioned_60000000_80000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('60000000') TO ('80000000');
create table test_partitioned_80000000_100000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('80000000') TO ('100000000');
create table test_partitioned_100000000_120000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('100000000') TO ('120000000');
create table test_partitioned_120000000_140000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('120000000') TO ('140000000');
create table test_partitioned_140000000_160000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('140000000') TO ('160000000');
create table test_partitioned_160000000_180000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('160000000') TO ('180000000');
create table test_partitioned_180000000_200000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('180000000') TO ('200000000');
create table test_partitioned_200000000_220000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('200000000') TO ('220000000');
create table test_partitioned_220000000_240000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('220000000') TO ('240000000');
create table test_partitioned_240000000_260000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('240000000') TO ('260000000');
create table test_partitioned_260000000_280000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('260000000') TO ('280000000');
create table test_partitioned_280000000_300000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('280000000') TO ('300000000');
create table test_partitioned_300000000_320000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('300000000') TO ('320000000');
create table test_partitioned_320000000_340000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('320000000') TO ('340000000');
create table test_partitioned_340000000_360000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('340000000') TO ('360000000');
create table test_partitioned_360000000_380000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('360000000') TO ('380000000');
create table test_partitioned_380000000_400000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('380000000') TO ('400000000');
create table test_partitioned_400000000_420000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('400000000') TO ('420000000');
create table test_partitioned_420000000_440000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('420000000') TO ('440000000');
create table test_partitioned_440000000_460000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('440000000') TO ('460000000');
create table test_partitioned_460000000_480000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('460000000') TO ('480000000');
create table test_partitioned_480000000_500000000
partition of test_partitioned
(
unique (id, "IntMainID")
)
FOR VALUES FROM ('480000000') TO ('500000000');
create table test_partitioned
(
id bigint,
"IntMainID" bigint,
"Type" varchar(30),
"Login" bigint,
"OperationType" varchar(30),
"USDAmount" double precision,
multi_login_id uuid,
"Status" bigint,
"DateTime" timestamp with time zone,
updated_at timestamp,
etl_created_at timestamp,
etl_updated_at timestamp,
etl_crc bigint,
constraint test_v2_pk
unique (id, "IntMainID")
)
partition by RANGE ("IntMainID");
create table test_partitioned_default
partition of test_partitioned
(
unique (id, "IntMainID"),
constraint "test_partitioned_default_IntMainID_check"
check ("IntMainID" IS NULL)
)
DEFAULT
partition by HASH (id);
create table test_partitioned_default_0
partition of test_partitioned_default
(
unique (id, "IntMainID"),
constraint "test_partitioned_default_IntMainID_check"
check ("IntMainID" IS NULL)
)
FOR VALUES WITH (modulus 5, remainder 0);
create table test_partitioned_default_2
partition of test_partitioned_default
(
unique (id, "IntMainID"),
constraint "test_partitioned_default_IntMainID_check"
check ("IntMainID" IS NULL)
)
FOR VALUES WITH (modulus 5, remainder 2);
create table test_partitioned_default_3
partition of test_partitioned_default
(
unique (id, "IntMainID"),
constraint "test_partitioned_default_IntMainID_check"
check ("IntMainID" IS NULL)
)
FOR VALUES WITH (modulus 5, remainder 3);
create table test_partitioned_default_4
partition of test_partitioned_default
(
unique (id, "IntMainID"),
constraint "test_partitioned_default_IntMainID_check"
check ("IntMainID" IS NULL)
)
FOR VALUES WITH (modulus 5, remainder 4);
Expected behavior
Only one table must be ingested to DataHub and uuid
datatype must be recognized
Screenshots If applicable, add screenshots to help explain your problem.
Desktop (please complete the following information):
- OS: [e.g. iOS]
- Browser [e.g. chrome, safari]
- Version [e.g. 22]
Additional context Add any other context about the problem here.
I can't speak for the partitioning error, but the issue with UUID datatypes should now be resolved (by https://github.com/linkedin/datahub/pull/4179)!
This issue is stale because it has been open for 30 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io. For feature requests please use https://feature-requests.datahubproject.io