PyAirbyte icon indicating copy to clipboard operation
PyAirbyte copied to clipboard

Pyairbyte destination-bigquery columns are empty

Open julioyildo opened this issue 10 months ago • 0 comments

Hi,

I'm reaching you because my bigquery destination is not working as expected.

  1. Here is my bigquery cache result : Image

  2. Here the final dataset destination result Image

As we can see, values are absents and metadata are not matching. I'm on aibryte 0.23.0 and my code is the following:

import json
import envs
import airbyte as ab
from airbyte.caches import  BigQueryCache

service_account_credentials_path = "service-account-credentials.json"
user_credentials = {...}


with open(service_account_credentials_path, 'r') as file:
    service_account_credentials_json = json.load(file)


site_urls = ["...hidden"]


def source_google_search_console(project_id: str, site_urls: list, user_credentials: dict): 
    source = ab.get_source(
        "source-google-search-console",
        #install_if_missing = True,
        config={
            "site_urls": site_urls,
            "start_date": "2025-01-01",
            "end_date": "2025-01-01",
            "authorization": user_credentials
        }
    )

    # Verify the config and creds by running `check`:
    source.check()
    source.select_streams(['sites'])
    #source.select_all_streams()

    cache = BigQueryCache(project_name=project_id, credentials_path=service_account_credentials_path, dataset_name="airbyte_cache")
    result = source.read(cache, force_full_refresh=True)
    return result, cache



def destination_bigquery(result, cache,  project_id: str, dataset_id: str, region: str, credentials_json: str, destination_type: str ="destination-bigquery"):
    destination = ab.get_destination(
        destination_type,
        config = {
            "project_id": project_id, 
            "dataset_id": dataset_id, 
            "dataset_location": region,
            "credentials_json": json.dumps(credentials_json)
        }
    )
    destination.check()
    destination.write(source_data=result, cache=cache)
    return destination


# Calls
gsc_result, bq_cache = source_google_search_console(envs.PROJECT_ID, site_urls, user_credentials)

res = destination_bigquery(gsc_result, bq_cache, envs.PROJECT_ID, "pyairbyte_tests", "EU", service_account_credentials_json)

julioyildo avatar Feb 17 '25 10:02 julioyildo