marquez
marquez copied to clipboard
runtime lineage graph similar to job & dataset
the current lineage graph only provide graph with latest runs. but there are scenarios where a run can use only few input datasets and generate a specific output based on a run arguments. also when an data is generated by run it would be good to understand which run actually generated a specific dataset version and columns used for specific run.
{ "graph": [ { "id": "dataset:s3://mart-zone:mart.active_customers", "type": "DATASET", "data": { "id": { "namespace": "s3://mart-zone", "name": "mart.active_customers" }, "type": "DB_TABLE", "name": "mart.active_customers", "physicalName": "mart.active_customers", "createdAt": "2025-04-12T09:10:00Z", "updatedAt": "2025-04-13T22:48:37.195029Z", "namespace": "s3://mart-zone", "sourceName": "default", "fields": [], "tags": [], "lastModifiedAt": null, "description": null, "lastLifecycleState": "" }, "inEdges": [ { "origin": "run:62b9f622-8c02-406d-a769-3ff4bfe81438", "destination": "dataset:s3://mart-zone:mart.active_customers" } ], "outEdges": [] }, { "id": "dataset:s3://raw-zone:raw.customers", "type": "DATASET", "data": { "id": { "namespace": "s3://raw-zone", "name": "raw.customers" }, "type": "DB_TABLE", "name": "raw.customers", "physicalName": "raw.customers", "createdAt": "2025-04-12T08:05:00Z", "updatedAt": "2025-04-13T08:03:00Z", "namespace": "s3://raw-zone", "sourceName": "default", "fields": [], "tags": [], "lastModifiedAt": null, "description": null, "lastLifecycleState": "" }, "inEdges": [], "outEdges": [ { "origin": "dataset:s3://raw-zone:raw.customers", "destination": "run:8132c404-bd09-4e91-9ec4-0e99ad98ccbe" } ] }, { "id": "dataset:s3://stage-zone:stage.customers", "type": "DATASET", "data": { "id": { "namespace": "s3://stage-zone", "name": "stage.customers" }, "type": "DB_TABLE", "name": "stage.customers", "physicalName": "stage.customers", "createdAt": "2025-04-12T08:05:00Z", "updatedAt": "2025-04-13T08:32:00Z", "namespace": "s3://stage-zone", "sourceName": "default", "fields": [], "tags": [], "lastModifiedAt": null, "description": null, "lastLifecycleState": "" }, "inEdges": [ { "origin": "run:8132c404-bd09-4e91-9ec4-0e99ad98ccbe", "destination": "dataset:s3://stage-zone:stage.customers" } ], "outEdges": [ { "origin": "dataset:s3://stage-zone:stage.customers", "destination": "run:62b9f622-8c02-406d-a769-3ff4bfe81438" } ] }, { "id": "run:62b9f622-8c02-406d-a769-3ff4bfe81438", "type": "RUN", "data": { "createdAt": "2025-04-13T08:30:00Z", "updatedAt": "2025-04-13T08:30:00Z", "startedAt": "2025-04-13T08:30:00Z", "endedAt": null, "state": "RUNNING", "jobUuid": "17792156-db6f-4fad-99b2-2c281ad8a848", "jobVersionUuid": null, "namespaceName": "databricks://prod", "jobName": "customer_mart_builder", "depth": 0, "inputs": [ { "namespace": "s3://stage-zone", "name": "stage.customers" } ], "outputs": [ { "namespace": "s3://mart-zone", "name": "mart.active_customers" } ], "inputDatasetVersions": [ { "datasetVersionId": { "namespace": "s3://stage-zone", "name": "stage.customers", "version": "4bd4d267-1e33-3df8-9213-c4924f672572" }, "facets": {} } ], "outputDatasetVersions": [ { "datasetVersionId": { "namespace": "s3://mart-zone", "name": "mart.active_customers", "version": "395f433c-3066-34a0-a319-ff335db07070" }, "facets": {} } ], "childRunIds": [ "0841f6cd-fb64-4b16-a38c-4c9d6f678733" ], "parentRunIds": [ "62b9f622-8c02-406d-a769-3ff4bfe81438" ] }, "inEdges": [ { "origin": "dataset:s3://stage-zone:stage.customers", "destination": "run:62b9f622-8c02-406d-a769-3ff4bfe81438" } ], "outEdges": [ { "origin": "run:62b9f622-8c02-406d-a769-3ff4bfe81438", "destination": "dataset:s3://mart-zone:mart.active_customers" } ] }, { "id": "run:8132c404-bd09-4e91-9ec4-0e99ad98ccbe", "type": "RUN", "data": { "createdAt": "2025-04-13T08:00:00Z", "updatedAt": "2025-04-13T08:00:00Z", "startedAt": "2025-04-13T08:00:00Z", "endedAt": null, "state": "RUNNING", "jobUuid": "2ea32952-8a35-414b-a4a8-58c7c8722e87", "jobVersionUuid": null, "namespaceName": "databricks://prod", "jobName": "landing_data_ingestion", "depth": 1, "inputs": [ { "namespace": "s3://raw-zone", "name": "raw.customers" } ], "outputs": [ { "namespace": "s3://stage-zone", "name": "stage.customers" } ], "inputDatasetVersions": [ { "datasetVersionId": { "namespace": "s3://raw-zone", "name": "raw.customers", "version": "e2d60de2-2363-3d48-b87b-ed6ebdac8fe4" }, "facets": {} } ], "outputDatasetVersions": [ { "datasetVersionId": { "namespace": "s3://stage-zone", "name": "stage.customers", "version": "4bd4d267-1e33-3df8-9213-c4924f672572" }, "facets": {} } ], "childRunIds": [ "7b6a56c9-2d23-45aa-9429-33e6f849299c" ], "parentRunIds": [ "8132c404-bd09-4e91-9ec4-0e99ad98ccbe" ] }, "inEdges": [ { "origin": "dataset:s3://raw-zone:raw.customers", "destination": "run:8132c404-bd09-4e91-9ec4-0e99ad98ccbe" } ], "outEdges": [ { "origin": "run:8132c404-bd09-4e91-9ec4-0e99ad98ccbe", "destination": "dataset:s3://stage-zone:stage.customers" } ] } ] }
Thanks for opening your first issue in the Marquez project! Please be sure to follow the issue template!