spark-bigquery-connector
spark-bigquery-connector copied to clipboard
Problem reading from view with another materialization project
Im trying to access a view with viewMaterializationProject/viewMaterializationDataset parameters which are not the same as where the view is located. This is the error Im getting (I replaced some irrelevant-private names):
20/07/06 18:10:50 INFO com.google.cloud.spark.bigquery.direct.DirectBigQueryRelation: Querying table project_name.dataset_name.table_name, parameters sent from Spark: requiredColumns=[], filters=[IsNotNull(event_name),EqualTo(event_name,LM_MAP_TOOLS_SHOWN)]
com.google.cloud.spark.bigquery.repackaged.com.google.common.util.concurrent.UncheckedExecutionException: com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.BigQueryException: Not found: Dataset project_name.dataset_name was not found in location us-east1
at com.google.cloud.spark.bigquery.repackaged.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2051)
at com.google.cloud.spark.bigquery.repackaged.com.google.common.cache.LocalCache.get(LocalCache.java:3951)
at com.google.cloud.spark.bigquery.repackaged.com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4871)
at com.google.cloud.spark.bigquery.direct.DirectBigQueryRelation.getActualTable(DirectBigQueryRelation.scala:200)
at com.google.cloud.spark.bigquery.direct.DirectBigQueryRelation.buildScan(DirectBigQueryRelation.scala:93)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$10.apply(DataSourceStrategy.scala:293)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$10.apply(DataSourceStrategy.scala:293)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$pruneFilterProject$1.apply(DataSourceStrategy.scala:338)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$pruneFilterProject$1.apply(DataSourceStrategy.scala:337)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy.pruneFilterProjectRaw(DataSourceStrategy.scala:415)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy.pruneFilterProject(DataSourceStrategy.scala:333)
at org.apache.spark.sql.execution.datasources.DataSourceStrategy.apply(DataSourceStrategy.scala:289)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:63)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:63)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3260)
at org.apache.spark.sql.Dataset.count(Dataset.scala:2780)
at com.funnalytics.spark.apps.BiqQueryTestApp.call(BiqQueryTestApp.java:38)
at com.funnalytics.spark.apps.BiqQueryTestApp.call(BiqQueryTestApp.java:7)
at picocli.CommandLine.executeUserObject(CommandLine.java:1701)
at picocli.CommandLine.access$900(CommandLine.java:146)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2059)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2026)
at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:1893)
at picocli.CommandLine.execute(CommandLine.java:1822)
at com.funnalytics.spark.apps.BiqQueryTestApp.main(BiqQueryTestApp.java:13)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:890)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:192)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:217)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
- Other tables (not views) from the same project/dataset where the view is located are fully accessible without any problem.
- I already verified that the Service Account being used has BigQueryAdmin permissions on the viewMaterializationProject and BigQueryDataViewer+BigQueryJobUser on the project where the view is located. What else Im missing?
- The project of the Service Account Im using is in "us-east-1" and the project where the view is located is "US"
can you help please?
This is similar to issue #329