datahub icon indicating copy to clipboard operation
datahub copied to clipboard

Postgres partition table support

Open igor-kramer opened this issue 3 years ago • 2 comments

Describe the bug Current Postgres ingestion has some bugs:

  • UUID datatype: unable to map type UUID() to metadata schema'
  • If table has partitions - every partition had ingested as separate table to metadata

To Reproduce Create and ingest table with this DDL:

create table test_partitioned_0_20000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('0') TO ('20000000');

create table test_partitioned_20000000_40000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('20000000') TO ('40000000');

create table test_partitioned_40000000_60000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('40000000') TO ('60000000');

create table test_partitioned_60000000_80000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('60000000') TO ('80000000');

create table test_partitioned_80000000_100000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('80000000') TO ('100000000');

create table test_partitioned_100000000_120000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('100000000') TO ('120000000');

create table test_partitioned_120000000_140000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('120000000') TO ('140000000');

create table test_partitioned_140000000_160000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('140000000') TO ('160000000');

create table test_partitioned_160000000_180000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('160000000') TO ('180000000');

create table test_partitioned_180000000_200000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('180000000') TO ('200000000');

create table test_partitioned_200000000_220000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('200000000') TO ('220000000');

create table test_partitioned_220000000_240000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('220000000') TO ('240000000');

create table test_partitioned_240000000_260000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('240000000') TO ('260000000');

create table test_partitioned_260000000_280000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('260000000') TO ('280000000');

create table test_partitioned_280000000_300000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('280000000') TO ('300000000');

create table test_partitioned_300000000_320000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('300000000') TO ('320000000');

create table test_partitioned_320000000_340000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('320000000') TO ('340000000');

create table test_partitioned_340000000_360000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('340000000') TO ('360000000');

create table test_partitioned_360000000_380000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('360000000') TO ('380000000');

create table test_partitioned_380000000_400000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('380000000') TO ('400000000');

create table test_partitioned_400000000_420000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('400000000') TO ('420000000');

create table test_partitioned_420000000_440000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('420000000') TO ('440000000');

create table test_partitioned_440000000_460000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('440000000') TO ('460000000');

create table test_partitioned_460000000_480000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('460000000') TO ('480000000');

create table test_partitioned_480000000_500000000
    partition of test_partitioned
        (
            unique (id, "IntMainID")
            )
        FOR VALUES FROM ('480000000') TO ('500000000');

create table test_partitioned
(
    id              bigint,
    "IntMainID"     bigint,
    "Type"          varchar(30),
    "Login"         bigint,
    "OperationType" varchar(30),
    "USDAmount"     double precision,
    multi_login_id  uuid,
    "Status"        bigint,
    "DateTime"      timestamp with time zone,
    updated_at      timestamp,
    etl_created_at  timestamp,
    etl_updated_at  timestamp,
    etl_crc         bigint,
    constraint test_v2_pk
        unique (id, "IntMainID")
)
    partition by RANGE ("IntMainID");

create table test_partitioned_default
    partition of test_partitioned
        (
            unique (id, "IntMainID"),
            constraint "test_partitioned_default_IntMainID_check"
                check ("IntMainID" IS NULL)
            )
        DEFAULT
    partition by HASH (id);

create table test_partitioned_default_0
    partition of test_partitioned_default
        (
            unique (id, "IntMainID"),
            constraint "test_partitioned_default_IntMainID_check"
                check ("IntMainID" IS NULL)
            )
        FOR VALUES WITH (modulus 5, remainder 0);

create table test_partitioned_default_2
    partition of test_partitioned_default
        (
            unique (id, "IntMainID"),
            constraint "test_partitioned_default_IntMainID_check"
                check ("IntMainID" IS NULL)
            )
        FOR VALUES WITH (modulus 5, remainder 2);

create table test_partitioned_default_3
    partition of test_partitioned_default
        (
            unique (id, "IntMainID"),
            constraint "test_partitioned_default_IntMainID_check"
                check ("IntMainID" IS NULL)
            )
        FOR VALUES WITH (modulus 5, remainder 3);

create table test_partitioned_default_4
    partition of test_partitioned_default
        (
            unique (id, "IntMainID"),
            constraint "test_partitioned_default_IntMainID_check"
                check ("IntMainID" IS NULL)
            )
        FOR VALUES WITH (modulus 5, remainder 4);

Expected behavior Only one table must be ingested to DataHub and uuid datatype must be recognized

Screenshots If applicable, add screenshots to help explain your problem.

Desktop (please complete the following information):

  • OS: [e.g. iOS]
  • Browser [e.g. chrome, safari]
  • Version [e.g. 22]

Additional context Add any other context about the problem here.

igor-kramer avatar Feb 17 '22 12:02 igor-kramer

I can't speak for the partitioning error, but the issue with UUID datatypes should now be resolved (by https://github.com/linkedin/datahub/pull/4179)!

kevinhu avatar Feb 22 '22 20:02 kevinhu

This issue is stale because it has been open for 30 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io. For feature requests please use https://feature-requests.datahubproject.io

github-actions[bot] avatar Sep 15 '22 02:09 github-actions[bot]