Analysts see a fanned-out DAG for processing NTD Excel files
Description
This PR rewrites the NTD Excel processing operators and Gusty DAG definition to include test coverage for operators and hooks
Resolves #4380
Type of change
- [ ] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature
- [x] Breaking change (fix or feature that would cause existing functionality to not work as expected)
- [ ] Documentation
How has this been tested?
pytest
Post-merge follow-ups
- [ ] No action required
- [x] Actions required (specified below)
Monitor staging execution
Terraform plan in iac/cal-itp-data-infra-staging/composer/us
Plan: 0 to add, 1 to change, 0 to destroy.
Terraform used the selected providers to generate the following execution
plan. Resource actions are indicated with the following symbols:
!~ update in-place
Terraform will perform the following actions:
# google_composer_environment.calitp-staging-composer will be updated in-place
!~ resource "google_composer_environment" "calitp-staging-composer" {
id = "projects/cal-itp-data-infra-staging/locations/us-west2/environments/calitp-staging-composer"
name = "calitp-staging-composer"
# (5 unchanged attributes hidden)
!~ config {
# (8 unchanged attributes hidden)
!~ software_config {
!~ pypi_packages = {
!~ "boto3" = "==1.41.2" -> ">=1.26.87,<2"
+ "calitp-data-infra" = "==2025.6.5"
+ "pydantic" = ">=1.9,<2.0"
!~ "sentry-sdk" = "==2.44.0" -> "==1.17.0"
# (9 unchanged elements hidden)
}
# (6 unchanged attributes hidden)
# (1 unchanged block hidden)
}
# (8 unchanged blocks hidden)
}
# (1 unchanged block hidden)
}
Plan: 0 to add, 1 to change, 0 to destroy.
:memo: Plan generated in Plan Terraform for Warehouse and DAG changes #1083
Terraform plan in iac/cal-itp-data-infra/airflow/us
Plan: 4 to add, 2 to change, 0 to destroy.
Terraform used the selected providers to generate the following execution
plan. Resource actions are indicated with the following symbols:
+ create
!~ update in-place
Terraform will perform the following actions:
# google_storage_bucket_object.calitp-composer["dags/download_and_parse_ntd_xlsx.py"] will be created
+ resource "google_storage_bucket_object" "calitp-composer" {
+ bucket = "calitp-composer"
+ content = (sensitive value)
+ content_type = (known after apply)
+ crc32c = (known after apply)
+ detect_md5hash = "different hash"
+ generation = (known after apply)
+ id = (known after apply)
+ kms_key_name = (known after apply)
+ md5hash = (known after apply)
+ md5hexhash = (known after apply)
+ media_link = (known after apply)
+ name = "dags/download_and_parse_ntd_xlsx.py"
+ output_name = (known after apply)
+ self_link = (known after apply)
+ source = "../../../../airflow/dags/download_and_parse_ntd_xlsx.py"
+ storage_class = (known after apply)
}
# google_storage_bucket_object.calitp-composer["plugins/hooks/ntd_xlsx_hook.py"] will be created
+ resource "google_storage_bucket_object" "calitp-composer" {
+ bucket = "calitp-composer"
+ content = (sensitive value)
+ content_type = (known after apply)
+ crc32c = (known after apply)
+ detect_md5hash = "different hash"
+ generation = (known after apply)
+ id = (known after apply)
+ kms_key_name = (known after apply)
+ md5hash = (known after apply)
+ md5hexhash = (known after apply)
+ media_link = (known after apply)
+ name = "plugins/hooks/ntd_xlsx_hook.py"
+ output_name = (known after apply)
+ self_link = (known after apply)
+ source = "../../../../airflow/plugins/hooks/ntd_xlsx_hook.py"
+ storage_class = (known after apply)
}
# google_storage_bucket_object.calitp-composer["plugins/operators/ntd_xlsx_to_gcs_operator.py"] will be created
+ resource "google_storage_bucket_object" "calitp-composer" {
+ bucket = "calitp-composer"
+ content = (sensitive value)
+ content_type = (known after apply)
+ crc32c = (known after apply)
+ detect_md5hash = "different hash"
+ generation = (known after apply)
+ id = (known after apply)
+ kms_key_name = (known after apply)
+ md5hash = (known after apply)
+ md5hexhash = (known after apply)
+ media_link = (known after apply)
+ name = "plugins/operators/ntd_xlsx_to_gcs_operator.py"
+ output_name = (known after apply)
+ self_link = (known after apply)
+ source = "../../../../airflow/plugins/operators/ntd_xlsx_to_gcs_operator.py"
+ storage_class = (known after apply)
}
# google_storage_bucket_object.calitp-composer["plugins/operators/ntd_xlsx_to_jsonl_operator.py"] will be created
+ resource "google_storage_bucket_object" "calitp-composer" {
+ bucket = "calitp-composer"
+ content = (sensitive value)
+ content_type = (known after apply)
+ crc32c = (known after apply)
+ detect_md5hash = "different hash"
+ generation = (known after apply)
+ id = (known after apply)
+ kms_key_name = (known after apply)
+ md5hash = (known after apply)
+ md5hexhash = (known after apply)
+ media_link = (known after apply)
+ name = "plugins/operators/ntd_xlsx_to_jsonl_operator.py"
+ output_name = (known after apply)
+ self_link = (known after apply)
+ source = "../../../../airflow/plugins/operators/ntd_xlsx_to_jsonl_operator.py"
+ storage_class = (known after apply)
}
# google_storage_bucket_object.calitp-composer-catalog will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-composer-catalog" {
!~ content = (sensitive value)
!~ crc32c = "vOnyHg==" -> (known after apply)
!~ detect_md5hash = "1I/iibNqP27Cuslct64mvg==" -> "different hash"
!~ generation = 1763693339999660 -> (known after apply)
id = "calitp-composer-data/warehouse/target/catalog.json"
!~ md5hash = "1I/iibNqP27Cuslct64mvg==" -> (known after apply)
name = "data/warehouse/target/catalog.json"
# (16 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-composer-manifest will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-composer-manifest" {
!~ content = (sensitive value)
!~ crc32c = "v5uuaA==" -> (known after apply)
!~ detect_md5hash = "Mvktvbmhm3GH3JWLxXuwqA==" -> "different hash"
!~ generation = 1763693343753501 -> (known after apply)
id = "calitp-composer-data/warehouse/target/manifest.json"
!~ md5hash = "Mvktvbmhm3GH3JWLxXuwqA==" -> (known after apply)
name = "data/warehouse/target/manifest.json"
# (16 unchanged attributes hidden)
}
Plan: 4 to add, 2 to change, 0 to destroy.
:memo: Plan generated in Plan Terraform for Warehouse and DAG changes #1083
Terraform plan in iac/cal-itp-data-infra-staging/airflow/us
Plan: 4 to add, 20 to change, 3 to destroy.
Terraform used the selected providers to generate the following execution
plan. Resource actions are indicated with the following symbols:
+ create
!~ update in-place
- destroy
Terraform will perform the following actions:
# google_storage_bucket_object.calitp-staging-composer["dags/airtable_loader_v2/generate_gtfs_download_configs.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "MijWlA==" -> (known after apply)
!~ detect_md5hash = "s632w01yc8uo408y4VdAyw==" -> "different hash"
!~ generation = 1764013430615830 -> (known after apply)
id = "calitp-staging-composer-dags/airtable_loader_v2/generate_gtfs_download_configs.py"
!~ md5hash = "s632w01yc8uo408y4VdAyw==" -> (known after apply)
name = "dags/airtable_loader_v2/generate_gtfs_download_configs.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["dags/download_and_parse_ntd_xlsx.py"] will be created
+ resource "google_storage_bucket_object" "calitp-staging-composer" {
+ bucket = "calitp-staging-composer"
+ content = (sensitive value)
+ content_type = (known after apply)
+ crc32c = (known after apply)
+ detect_md5hash = "different hash"
+ generation = (known after apply)
+ id = (known after apply)
+ kms_key_name = (known after apply)
+ md5hash = (known after apply)
+ md5hexhash = (known after apply)
+ media_link = (known after apply)
+ name = "dags/download_and_parse_ntd_xlsx.py"
+ output_name = (known after apply)
+ self_link = (known after apply)
+ source = "../../../../airflow/dags/download_and_parse_ntd_xlsx.py"
+ storage_class = (known after apply)
}
# google_storage_bucket_object.calitp-staging-composer["dags/download_gtfs_schedule_v2/download_schedule_feeds.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "8qLecA==" -> (known after apply)
!~ detect_md5hash = "iGapm0xJ3U0wowUUkId1eQ==" -> "different hash"
!~ generation = 1764013431501405 -> (known after apply)
id = "calitp-staging-composer-dags/download_gtfs_schedule_v2/download_schedule_feeds.py"
!~ md5hash = "iGapm0xJ3U0wowUUkId1eQ==" -> (known after apply)
name = "dags/download_gtfs_schedule_v2/download_schedule_feeds.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["dags/parse_and_validate_gtfs.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "izcCNg==" -> (known after apply)
!~ detect_md5hash = "OYzikIAicY+DHEzfTfaPLQ==" -> "different hash"
!~ generation = 1764017282402906 -> (known after apply)
id = "calitp-staging-composer-dags/parse_and_validate_gtfs.py"
!~ md5hash = "OYzikIAicY+DHEzfTfaPLQ==" -> (known after apply)
name = "dags/parse_and_validate_gtfs.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["dags/sync_ntd_data_xlsx/scrape_ntd_xlsx_urls.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "g2TBuw==" -> (known after apply)
!~ detect_md5hash = "PSes9rK7j0FP6JcNRQhPLg==" -> "different hash"
!~ generation = 1764013430624290 -> (known after apply)
id = "calitp-staging-composer-dags/sync_ntd_data_xlsx/scrape_ntd_xlsx_urls.py"
!~ md5hash = "PSes9rK7j0FP6JcNRQhPLg==" -> (known after apply)
name = "dags/sync_ntd_data_xlsx/scrape_ntd_xlsx_urls.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["dags/unzip_and_validate_gtfs_schedule_hourly/METADATA.yml"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "SpZBgg==" -> (known after apply)
!~ detect_md5hash = "GEap1lcPysYtPcmAZwOBRw==" -> "different hash"
!~ generation = 1764026084601836 -> (known after apply)
id = "calitp-staging-composer-dags/unzip_and_validate_gtfs_schedule_hourly/METADATA.yml"
!~ md5hash = "GEap1lcPysYtPcmAZwOBRw==" -> (known after apply)
name = "dags/unzip_and_validate_gtfs_schedule_hourly/METADATA.yml"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/calitp_data_infra/__init__.py"] will be destroyed
# (because key ["plugins/calitp_data_infra/__init__.py"] is not in for_each map)
- resource "google_storage_bucket_object" "calitp-staging-composer" {
- bucket = "calitp-staging-composer" -> null
- content_type = "text/plain; charset=utf-8" -> null
- crc32c = "AAAAAA==" -> null
- detect_md5hash = "1B2M2Y8AsgTpgAmY7PhCfg==" -> null
- event_based_hold = false -> null
- generation = 1764013430607737 -> null
- id = "calitp-staging-composer-plugins/calitp_data_infra/__init__.py" -> null
- md5hash = "1B2M2Y8AsgTpgAmY7PhCfg==" -> null
- md5hexhash = "d41d8cd98f00b204e9800998ecf8427e" -> null
- media_link = "https://storage.googleapis.com/download/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2F__init__.py?generation=1764013430607737&alt=media" -> null
- metadata = {} -> null
- name = "plugins/calitp_data_infra/__init__.py" -> null
- output_name = "plugins/calitp_data_infra/__init__.py" -> null
- self_link = "https://www.googleapis.com/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2F__init__.py" -> null
- source = "../../../../airflow/plugins/calitp_data_infra/__init__.py" -> null
- storage_class = "STANDARD" -> null
- temporary_hold = false -> null
# (6 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/calitp_data_infra/auth.py"] will be destroyed
# (because key ["plugins/calitp_data_infra/auth.py"] is not in for_each map)
- resource "google_storage_bucket_object" "calitp-staging-composer" {
- bucket = "calitp-staging-composer" -> null
- content_type = "text/plain; charset=utf-8" -> null
- crc32c = "6lsUtA==" -> null
- detect_md5hash = "+/KTbwc3sd3B4wBkY+HoUw==" -> null
- event_based_hold = false -> null
- generation = 1764013431501729 -> null
- id = "calitp-staging-composer-plugins/calitp_data_infra/auth.py" -> null
- md5hash = "+/KTbwc3sd3B4wBkY+HoUw==" -> null
- md5hexhash = "fbf2936f0737b1ddc1e3006463e1e853" -> null
- media_link = "https://storage.googleapis.com/download/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2Fauth.py?generation=1764013431501729&alt=media" -> null
- metadata = {} -> null
- name = "plugins/calitp_data_infra/auth.py" -> null
- output_name = "plugins/calitp_data_infra/auth.py" -> null
- self_link = "https://www.googleapis.com/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2Fauth.py" -> null
- source = "../../../../airflow/plugins/calitp_data_infra/auth.py" -> null
- storage_class = "STANDARD" -> null
- temporary_hold = false -> null
# (6 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/calitp_data_infra/storage.py"] will be destroyed
# (because key ["plugins/calitp_data_infra/storage.py"] is not in for_each map)
- resource "google_storage_bucket_object" "calitp-staging-composer" {
- bucket = "calitp-staging-composer" -> null
- content_type = "text/plain; charset=utf-8" -> null
- crc32c = "b87VYA==" -> null
- detect_md5hash = "sq1Q+wmsL8o0RKJLFUFC7g==" -> null
- event_based_hold = false -> null
- generation = 1764013431168899 -> null
- id = "calitp-staging-composer-plugins/calitp_data_infra/storage.py" -> null
- md5hash = "sq1Q+wmsL8o0RKJLFUFC7g==" -> null
- md5hexhash = "b2ad50fb09ac2fca3444a24b154142ee" -> null
- media_link = "https://storage.googleapis.com/download/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2Fstorage.py?generation=1764013431168899&alt=media" -> null
- metadata = {} -> null
- name = "plugins/calitp_data_infra/storage.py" -> null
- output_name = "plugins/calitp_data_infra/storage.py" -> null
- self_link = "https://www.googleapis.com/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2Fstorage.py" -> null
- source = "../../../../airflow/plugins/calitp_data_infra/storage.py" -> null
- storage_class = "STANDARD" -> null
- temporary_hold = false -> null
# (6 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/hooks/download_config_hook.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "rVl5AQ==" -> (known after apply)
!~ detect_md5hash = "/NQgFN9OopgfEltaQ1kNuA==" -> "different hash"
!~ generation = 1764017282355947 -> (known after apply)
id = "calitp-staging-composer-plugins/hooks/download_config_hook.py"
!~ md5hash = "/NQgFN9OopgfEltaQ1kNuA==" -> (known after apply)
name = "plugins/hooks/download_config_hook.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/hooks/kuba_hook.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "pIf6jA==" -> (known after apply)
!~ detect_md5hash = "M9n0Cr7dL9+4asfxnMHgjQ==" -> "different hash"
!~ generation = 1764013430307175 -> (known after apply)
id = "calitp-staging-composer-plugins/hooks/kuba_hook.py"
!~ md5hash = "M9n0Cr7dL9+4asfxnMHgjQ==" -> (known after apply)
name = "plugins/hooks/kuba_hook.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/hooks/ntd_xlsx_hook.py"] will be created
+ resource "google_storage_bucket_object" "calitp-staging-composer" {
+ bucket = "calitp-staging-composer"
+ content = (sensitive value)
+ content_type = (known after apply)
+ crc32c = (known after apply)
+ detect_md5hash = "different hash"
+ generation = (known after apply)
+ id = (known after apply)
+ kms_key_name = (known after apply)
+ md5hash = (known after apply)
+ md5hexhash = (known after apply)
+ media_link = (known after apply)
+ name = "plugins/hooks/ntd_xlsx_hook.py"
+ output_name = (known after apply)
+ self_link = (known after apply)
+ source = "../../../../airflow/plugins/hooks/ntd_xlsx_hook.py"
+ storage_class = (known after apply)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/hooks/soda_hook.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "Wxj+aQ==" -> (known after apply)
!~ detect_md5hash = "CDAoj9pONq2nsD5BT43vEg==" -> "different hash"
!~ generation = 1764013431478759 -> (known after apply)
id = "calitp-staging-composer-plugins/hooks/soda_hook.py"
!~ md5hash = "CDAoj9pONq2nsD5BT43vEg==" -> (known after apply)
name = "plugins/hooks/soda_hook.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/hooks/transitland_hook.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "xolZ/w==" -> (known after apply)
!~ detect_md5hash = "lMSV7OyTWTBE5ar8Ush1oA==" -> "different hash"
!~ generation = 1764013430624783 -> (known after apply)
id = "calitp-staging-composer-plugins/hooks/transitland_hook.py"
!~ md5hash = "lMSV7OyTWTBE5ar8Ush1oA==" -> (known after apply)
name = "plugins/hooks/transitland_hook.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/aggregator_to_gcs_operator.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "h26Utg==" -> (known after apply)
!~ detect_md5hash = "+2mzXgNPW36mWf5Z3ZZBag==" -> "different hash"
!~ generation = 1764013431525654 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/aggregator_to_gcs_operator.py"
!~ md5hash = "+2mzXgNPW36mWf5Z3ZZBag==" -> (known after apply)
name = "plugins/operators/aggregator_to_gcs_operator.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/blackcat_to_gcs_operator.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "s7BJ7Q==" -> (known after apply)
!~ detect_md5hash = "husvfrVLOwWESpUczaAT3w==" -> "different hash"
!~ generation = 1764013430617597 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/blackcat_to_gcs_operator.py"
!~ md5hash = "husvfrVLOwWESpUczaAT3w==" -> (known after apply)
name = "plugins/operators/blackcat_to_gcs_operator.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/dbt_manifest_to_dictionary_operator.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "4MyK1Q==" -> (known after apply)
!~ detect_md5hash = "FIHwYcjOm5NPB+nPb7enyg==" -> "different hash"
!~ generation = 1764013431525706 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/dbt_manifest_to_dictionary_operator.py"
!~ md5hash = "FIHwYcjOm5NPB+nPb7enyg==" -> (known after apply)
name = "plugins/operators/dbt_manifest_to_dictionary_operator.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/dbt_manifest_to_metadata_operator.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "UDX9og==" -> (known after apply)
!~ detect_md5hash = "F4n9FAx9ExF1Zl3J7NaMWQ==" -> "different hash"
!~ generation = 1764013430607149 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/dbt_manifest_to_metadata_operator.py"
!~ md5hash = "F4n9FAx9ExF1Zl3J7NaMWQ==" -> (known after apply)
name = "plugins/operators/dbt_manifest_to_metadata_operator.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/gtfs_csv_to_jsonl_hourly.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "TqO7Pw==" -> (known after apply)
!~ detect_md5hash = "NwuYcA61jx8T4i0e2jx8IQ==" -> "different hash"
!~ generation = 1764024328100973 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/gtfs_csv_to_jsonl_hourly.py"
!~ md5hash = "NwuYcA61jx8T4i0e2jx8IQ==" -> (known after apply)
name = "plugins/operators/gtfs_csv_to_jsonl_hourly.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/littlepay_raw_sync_feed_v3.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "TDDjLg==" -> (known after apply)
!~ detect_md5hash = "zXyVjenO8Z0Xcx4u85EVYQ==" -> "different hash"
!~ generation = 1764013430609071 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/littlepay_raw_sync_feed_v3.py"
!~ md5hash = "zXyVjenO8Z0Xcx4u85EVYQ==" -> (known after apply)
name = "plugins/operators/littlepay_raw_sync_feed_v3.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/ntd_xlsx_to_gcs_operator.py"] will be created
+ resource "google_storage_bucket_object" "calitp-staging-composer" {
+ bucket = "calitp-staging-composer"
+ content = (sensitive value)
+ content_type = (known after apply)
+ crc32c = (known after apply)
+ detect_md5hash = "different hash"
+ generation = (known after apply)
+ id = (known after apply)
+ kms_key_name = (known after apply)
+ md5hash = (known after apply)
+ md5hexhash = (known after apply)
+ media_link = (known after apply)
+ name = "plugins/operators/ntd_xlsx_to_gcs_operator.py"
+ output_name = (known after apply)
+ self_link = (known after apply)
+ source = "../../../../airflow/plugins/operators/ntd_xlsx_to_gcs_operator.py"
+ storage_class = (known after apply)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/ntd_xlsx_to_jsonl_operator.py"] will be created
+ resource "google_storage_bucket_object" "calitp-staging-composer" {
+ bucket = "calitp-staging-composer"
+ content = (sensitive value)
+ content_type = (known after apply)
+ crc32c = (known after apply)
+ detect_md5hash = "different hash"
+ generation = (known after apply)
+ id = (known after apply)
+ kms_key_name = (known after apply)
+ md5hash = (known after apply)
+ md5hexhash = (known after apply)
+ media_link = (known after apply)
+ name = "plugins/operators/ntd_xlsx_to_jsonl_operator.py"
+ output_name = (known after apply)
+ self_link = (known after apply)
+ source = "../../../../airflow/plugins/operators/ntd_xlsx_to_jsonl_operator.py"
+ storage_class = (known after apply)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/pod_operator.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "H81Llg==" -> (known after apply)
!~ detect_md5hash = "6vO0LHE3p5d/cOQ71Ghv8g==" -> "different hash"
!~ generation = 1764013431061693 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/pod_operator.py"
!~ md5hash = "6vO0LHE3p5d/cOQ71Ghv8g==" -> (known after apply)
name = "plugins/operators/pod_operator.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/scrape_ntd_xlsx.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "800p7w==" -> (known after apply)
!~ detect_md5hash = "+sUY5347tlkwmkjx/59Ytg==" -> "different hash"
!~ generation = 1764013431523798 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/scrape_ntd_xlsx.py"
!~ md5hash = "+sUY5347tlkwmkjx/59Ytg==" -> (known after apply)
name = "plugins/operators/scrape_ntd_xlsx.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/scrape_state_geoportal.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "HSTs0g==" -> (known after apply)
!~ detect_md5hash = "kroDAzyyod9g32UxYWccew==" -> "different hash"
!~ generation = 1764013431034226 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/scrape_state_geoportal.py"
!~ md5hash = "kroDAzyyod9g32UxYWccew==" -> (known after apply)
name = "plugins/operators/scrape_state_geoportal.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/scripts/gtfs_rt_parser.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "XjBzGA==" -> (known after apply)
!~ detect_md5hash = "BfvE2ctBSfWnSYBpIv2hOg==" -> "different hash"
!~ generation = 1764013430633935 -> (known after apply)
id = "calitp-staging-composer-plugins/scripts/gtfs_rt_parser.py"
!~ md5hash = "BfvE2ctBSfWnSYBpIv2hOg==" -> (known after apply)
name = "plugins/scripts/gtfs_rt_parser.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/utils.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "YEA87w==" -> (known after apply)
!~ detect_md5hash = "kgfrhATYmjk8tB2Z4v/yWQ==" -> "different hash"
!~ generation = 1764024327799629 -> (known after apply)
id = "calitp-staging-composer-plugins/utils.py"
!~ md5hash = "kgfrhATYmjk8tB2Z4v/yWQ==" -> (known after apply)
name = "plugins/utils.py"
# (17 unchanged attributes hidden)
}
Plan: 4 to add, 20 to change, 3 to destroy.
:memo: Plan generated in Plan Terraform for Warehouse and DAG changes #1083
Terraform plan in iac/cal-itp-data-infra-staging/airflow/us
Plan: 2 to add, 17 to change, 3 to destroy.
Terraform used the selected providers to generate the following execution
plan. Resource actions are indicated with the following symbols:
+ create
!~ update in-place
- destroy
Terraform will perform the following actions:
# google_storage_bucket_object.calitp-staging-composer["dags/airtable_loader_v2/generate_gtfs_download_configs.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "MijWlA==" -> (known after apply)
!~ detect_md5hash = "s632w01yc8uo408y4VdAyw==" -> "different hash"
!~ generation = 1763690417635115 -> (known after apply)
id = "calitp-staging-composer-dags/airtable_loader_v2/generate_gtfs_download_configs.py"
!~ md5hash = "s632w01yc8uo408y4VdAyw==" -> (known after apply)
name = "dags/airtable_loader_v2/generate_gtfs_download_configs.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["dags/download_gtfs_schedule_v2/download_schedule_feeds.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "8qLecA==" -> (known after apply)
!~ detect_md5hash = "iGapm0xJ3U0wowUUkId1eQ==" -> "different hash"
!~ generation = 1763690417218909 -> (known after apply)
id = "calitp-staging-composer-dags/download_gtfs_schedule_v2/download_schedule_feeds.py"
!~ md5hash = "iGapm0xJ3U0wowUUkId1eQ==" -> (known after apply)
name = "dags/download_gtfs_schedule_v2/download_schedule_feeds.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["dags/sync_ntd_data_xlsx/scrape_ntd_xlsx_urls.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "g2TBuw==" -> (known after apply)
!~ detect_md5hash = "PSes9rK7j0FP6JcNRQhPLg==" -> "different hash"
!~ generation = 1763690417804807 -> (known after apply)
id = "calitp-staging-composer-dags/sync_ntd_data_xlsx/scrape_ntd_xlsx_urls.py"
!~ md5hash = "PSes9rK7j0FP6JcNRQhPLg==" -> (known after apply)
name = "dags/sync_ntd_data_xlsx/scrape_ntd_xlsx_urls.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/calitp_data_infra/__init__.py"] will be destroyed
# (because key ["plugins/calitp_data_infra/__init__.py"] is not in for_each map)
- resource "google_storage_bucket_object" "calitp-staging-composer" {
- bucket = "calitp-staging-composer" -> null
- content_type = "text/plain; charset=utf-8" -> null
- crc32c = "AAAAAA==" -> null
- detect_md5hash = "1B2M2Y8AsgTpgAmY7PhCfg==" -> null
- event_based_hold = false -> null
- generation = 1763690417199832 -> null
- id = "calitp-staging-composer-plugins/calitp_data_infra/__init__.py" -> null
- md5hash = "1B2M2Y8AsgTpgAmY7PhCfg==" -> null
- md5hexhash = "d41d8cd98f00b204e9800998ecf8427e" -> null
- media_link = "https://storage.googleapis.com/download/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2F__init__.py?generation=1763690417199832&alt=media" -> null
- metadata = {} -> null
- name = "plugins/calitp_data_infra/__init__.py" -> null
- output_name = "plugins/calitp_data_infra/__init__.py" -> null
- self_link = "https://www.googleapis.com/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2F__init__.py" -> null
- source = "../../../../airflow/plugins/calitp_data_infra/__init__.py" -> null
- storage_class = "STANDARD" -> null
- temporary_hold = false -> null
# (6 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/calitp_data_infra/auth.py"] will be destroyed
# (because key ["plugins/calitp_data_infra/auth.py"] is not in for_each map)
- resource "google_storage_bucket_object" "calitp-staging-composer" {
- bucket = "calitp-staging-composer" -> null
- content_type = "text/plain; charset=utf-8" -> null
- crc32c = "6lsUtA==" -> null
- detect_md5hash = "+/KTbwc3sd3B4wBkY+HoUw==" -> null
- event_based_hold = false -> null
- generation = 1763690417207979 -> null
- id = "calitp-staging-composer-plugins/calitp_data_infra/auth.py" -> null
- md5hash = "+/KTbwc3sd3B4wBkY+HoUw==" -> null
- md5hexhash = "fbf2936f0737b1ddc1e3006463e1e853" -> null
- media_link = "https://storage.googleapis.com/download/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2Fauth.py?generation=1763690417207979&alt=media" -> null
- metadata = {} -> null
- name = "plugins/calitp_data_infra/auth.py" -> null
- output_name = "plugins/calitp_data_infra/auth.py" -> null
- self_link = "https://www.googleapis.com/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2Fauth.py" -> null
- source = "../../../../airflow/plugins/calitp_data_infra/auth.py" -> null
- storage_class = "STANDARD" -> null
- temporary_hold = false -> null
# (6 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/calitp_data_infra/storage.py"] will be destroyed
# (because key ["plugins/calitp_data_infra/storage.py"] is not in for_each map)
- resource "google_storage_bucket_object" "calitp-staging-composer" {
- bucket = "calitp-staging-composer" -> null
- content_type = "text/plain; charset=utf-8" -> null
- crc32c = "b87VYA==" -> null
- detect_md5hash = "sq1Q+wmsL8o0RKJLFUFC7g==" -> null
- event_based_hold = false -> null
- generation = 1763690417395966 -> null
- id = "calitp-staging-composer-plugins/calitp_data_infra/storage.py" -> null
- md5hash = "sq1Q+wmsL8o0RKJLFUFC7g==" -> null
- md5hexhash = "b2ad50fb09ac2fca3444a24b154142ee" -> null
- media_link = "https://storage.googleapis.com/download/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2Fstorage.py?generation=1763690417395966&alt=media" -> null
- metadata = {} -> null
- name = "plugins/calitp_data_infra/storage.py" -> null
- output_name = "plugins/calitp_data_infra/storage.py" -> null
- self_link = "https://www.googleapis.com/storage/v1/b/calitp-staging-composer/o/plugins%2Fcalitp_data_infra%2Fstorage.py" -> null
- source = "../../../../airflow/plugins/calitp_data_infra/storage.py" -> null
- storage_class = "STANDARD" -> null
- temporary_hold = false -> null
# (6 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/hooks/kuba_hook.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "pIf6jA==" -> (known after apply)
!~ detect_md5hash = "M9n0Cr7dL9+4asfxnMHgjQ==" -> "different hash"
!~ generation = 1763690418428084 -> (known after apply)
id = "calitp-staging-composer-plugins/hooks/kuba_hook.py"
!~ md5hash = "M9n0Cr7dL9+4asfxnMHgjQ==" -> (known after apply)
name = "plugins/hooks/kuba_hook.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/hooks/ntd_xlsx_hook.py"] will be created
+ resource "google_storage_bucket_object" "calitp-staging-composer" {
+ bucket = "calitp-staging-composer"
+ content = (sensitive value)
+ content_type = (known after apply)
+ crc32c = (known after apply)
+ detect_md5hash = "different hash"
+ generation = (known after apply)
+ id = (known after apply)
+ kms_key_name = (known after apply)
+ md5hash = (known after apply)
+ md5hexhash = (known after apply)
+ media_link = (known after apply)
+ name = "plugins/hooks/ntd_xlsx_hook.py"
+ output_name = (known after apply)
+ self_link = (known after apply)
+ source = "../../../../airflow/plugins/hooks/ntd_xlsx_hook.py"
+ storage_class = (known after apply)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/hooks/soda_hook.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "Wxj+aQ==" -> (known after apply)
!~ detect_md5hash = "CDAoj9pONq2nsD5BT43vEg==" -> "different hash"
!~ generation = 1763690418284709 -> (known after apply)
id = "calitp-staging-composer-plugins/hooks/soda_hook.py"
!~ md5hash = "CDAoj9pONq2nsD5BT43vEg==" -> (known after apply)
name = "plugins/hooks/soda_hook.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/hooks/transitland_hook.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "xolZ/w==" -> (known after apply)
!~ detect_md5hash = "lMSV7OyTWTBE5ar8Ush1oA==" -> "different hash"
!~ generation = 1763690417210931 -> (known after apply)
id = "calitp-staging-composer-plugins/hooks/transitland_hook.py"
!~ md5hash = "lMSV7OyTWTBE5ar8Ush1oA==" -> (known after apply)
name = "plugins/hooks/transitland_hook.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/aggregator_to_gcs_operator.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "h26Utg==" -> (known after apply)
!~ detect_md5hash = "+2mzXgNPW36mWf5Z3ZZBag==" -> "different hash"
!~ generation = 1763690417144134 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/aggregator_to_gcs_operator.py"
!~ md5hash = "+2mzXgNPW36mWf5Z3ZZBag==" -> (known after apply)
name = "plugins/operators/aggregator_to_gcs_operator.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/blackcat_to_gcs_operator.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "s7BJ7Q==" -> (known after apply)
!~ detect_md5hash = "husvfrVLOwWESpUczaAT3w==" -> "different hash"
!~ generation = 1763690417861255 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/blackcat_to_gcs_operator.py"
!~ md5hash = "husvfrVLOwWESpUczaAT3w==" -> (known after apply)
name = "plugins/operators/blackcat_to_gcs_operator.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/dbt_manifest_to_dictionary_operator.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "4MyK1Q==" -> (known after apply)
!~ detect_md5hash = "FIHwYcjOm5NPB+nPb7enyg==" -> "different hash"
!~ generation = 1763690416913428 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/dbt_manifest_to_dictionary_operator.py"
!~ md5hash = "FIHwYcjOm5NPB+nPb7enyg==" -> (known after apply)
name = "plugins/operators/dbt_manifest_to_dictionary_operator.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/dbt_manifest_to_metadata_operator.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "UDX9og==" -> (known after apply)
!~ detect_md5hash = "F4n9FAx9ExF1Zl3J7NaMWQ==" -> "different hash"
!~ generation = 1763690417995295 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/dbt_manifest_to_metadata_operator.py"
!~ md5hash = "F4n9FAx9ExF1Zl3J7NaMWQ==" -> (known after apply)
name = "plugins/operators/dbt_manifest_to_metadata_operator.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/gtfs_csv_to_jsonl_hourly.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "pCzW9g==" -> (known after apply)
!~ detect_md5hash = "n34Iv1u6y/wZeOnqO2WDEA==" -> "different hash"
!~ generation = 1763690417647780 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/gtfs_csv_to_jsonl_hourly.py"
!~ md5hash = "n34Iv1u6y/wZeOnqO2WDEA==" -> (known after apply)
name = "plugins/operators/gtfs_csv_to_jsonl_hourly.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/littlepay_raw_sync_feed_v3.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "TDDjLg==" -> (known after apply)
!~ detect_md5hash = "zXyVjenO8Z0Xcx4u85EVYQ==" -> "different hash"
!~ generation = 1763690417211071 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/littlepay_raw_sync_feed_v3.py"
!~ md5hash = "zXyVjenO8Z0Xcx4u85EVYQ==" -> (known after apply)
name = "plugins/operators/littlepay_raw_sync_feed_v3.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/ntd_xlsx_to_gcs_operator.py"] will be created
+ resource "google_storage_bucket_object" "calitp-staging-composer" {
+ bucket = "calitp-staging-composer"
+ content = (sensitive value)
+ content_type = (known after apply)
+ crc32c = (known after apply)
+ detect_md5hash = "different hash"
+ generation = (known after apply)
+ id = (known after apply)
+ kms_key_name = (known after apply)
+ md5hash = (known after apply)
+ md5hexhash = (known after apply)
+ media_link = (known after apply)
+ name = "plugins/operators/ntd_xlsx_to_gcs_operator.py"
+ output_name = (known after apply)
+ self_link = (known after apply)
+ source = "../../../../airflow/plugins/operators/ntd_xlsx_to_gcs_operator.py"
+ storage_class = (known after apply)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/pod_operator.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "H81Llg==" -> (known after apply)
!~ detect_md5hash = "6vO0LHE3p5d/cOQ71Ghv8g==" -> "different hash"
!~ generation = 1763690417200759 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/pod_operator.py"
!~ md5hash = "6vO0LHE3p5d/cOQ71Ghv8g==" -> (known after apply)
name = "plugins/operators/pod_operator.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/scrape_ntd_xlsx.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "800p7w==" -> (known after apply)
!~ detect_md5hash = "+sUY5347tlkwmkjx/59Ytg==" -> "different hash"
!~ generation = 1763690417819438 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/scrape_ntd_xlsx.py"
!~ md5hash = "+sUY5347tlkwmkjx/59Ytg==" -> (known after apply)
name = "plugins/operators/scrape_ntd_xlsx.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/operators/scrape_state_geoportal.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "HSTs0g==" -> (known after apply)
!~ detect_md5hash = "kroDAzyyod9g32UxYWccew==" -> "different hash"
!~ generation = 1763690418239800 -> (known after apply)
id = "calitp-staging-composer-plugins/operators/scrape_state_geoportal.py"
!~ md5hash = "kroDAzyyod9g32UxYWccew==" -> (known after apply)
name = "plugins/operators/scrape_state_geoportal.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/scripts/gtfs_rt_parser.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "XjBzGA==" -> (known after apply)
!~ detect_md5hash = "BfvE2ctBSfWnSYBpIv2hOg==" -> "different hash"
!~ generation = 1763690417837883 -> (known after apply)
id = "calitp-staging-composer-plugins/scripts/gtfs_rt_parser.py"
!~ md5hash = "BfvE2ctBSfWnSYBpIv2hOg==" -> (known after apply)
name = "plugins/scripts/gtfs_rt_parser.py"
# (17 unchanged attributes hidden)
}
# google_storage_bucket_object.calitp-staging-composer["plugins/utils.py"] will be updated in-place
!~ resource "google_storage_bucket_object" "calitp-staging-composer" {
!~ crc32c = "2wseZw==" -> (known after apply)
!~ detect_md5hash = "L7u5luMr8c8OXJVHR7nzEg==" -> "different hash"
!~ generation = 1763690417826500 -> (known after apply)
id = "calitp-staging-composer-plugins/utils.py"
!~ md5hash = "L7u5luMr8c8OXJVHR7nzEg==" -> (known after apply)
name = "plugins/utils.py"
# (17 unchanged attributes hidden)
}
Plan: 2 to add, 17 to change, 3 to destroy.
:memo: Plan generated in Plan Terraform for Warehouse and DAG changes #1070