superset
superset copied to clipboard
superset db upgrade is returning: psycopg2.errors.DuplicateAlias: table name "sl_columns" specified more than once
I've been using superset in docker environment, last version I used I from 6 months ago (docker image from https://hub.docker.com/r/apache/superset does not have the version number), when I update the version since the model data changed I have to run code superset db upgrade
but it returns error. I'm using as superset database PostgreSQL 12, the error appears when a9422eeaae74 (new_dataset_models_take_2) migration is running.
INFO [alembic.runtime.migration] Running upgrade ad07e4fdbaba -> a9422eeaae74, new_dataset_models_take_2
>> Copy 17 physical tables to sl_tables...
<string>:3: SAWarning: TypeDecorator UUIDType() will not produce a cache key because the ``cache_ok`` attribute is not set to True. This can have significant performance implications including some performance degradations in comparison to prior SQLAlchemy versions. Set this attribute to True if this type object's state is safe to use in a cache key, or False to disable this warning. (Background on this error at: https://sqlalche.me/e/14/cprf)
>> Copy 57 SqlaTable to sl_datasets...
Copy dataset owners...
Link physical datasets with tables...
>> Copy 3,885 table columns to sl_columns...
Link all columns to sl_datasets...
>> Copy 61 metrics to sl_columns...
Link metric columns to datasets...
>> Run postprocessing on 3,946 columns
/app/superset/migrations/versions/2022-04-01_14-38_a9422eeaae74_new_dataset_models_take_2.py:732: SAWarning: TypeDecorator UUIDType() will not produce a cache key because the ``cache_ok`` attribute is not set to True. This can have significant performance implications including some performance degradations in comparison to prior SQLAlchemy versions. Set this attribute to True if this type object's state is safe to use in a cache key, or False to disable this warning. (Background on this error at: https://sqlalche.me/e/14/cprf)
count = session.query(func.count()).select_from(query).scalar()
Traceback (most recent call last):
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1819, in _execute_context
self.dialect.do_execute(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 732, in do_execute
cursor.execute(statement, parameters)
psycopg2.errors.DuplicateAlias: table name "sl_columns" specified more than once
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/bin/superset", line 33, in <module>
sys.exit(load_entry_point('apache-superset', 'console_scripts', 'superset')())
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1128, in __call__
return self.main(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/flask/cli.py", line 601, in main
return super().main(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1053, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1659, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1659, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1395, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 754, in invoke
return __callback(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/click/decorators.py", line 26, in new_func
return f(get_current_context(), *args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/flask/cli.py", line 445, in decorator
return __ctx.invoke(f, *args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 754, in invoke
return __callback(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/flask_migrate/cli.py", line 149, in upgrade
_upgrade(directory, revision, sql, tag, x_arg)
File "/usr/local/lib/python3.8/site-packages/flask_migrate/__init__.py", line 98, in wrapped
f(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/flask_migrate/__init__.py", line 185, in upgrade
command.upgrade(config, revision, sql=sql, tag=tag)
File "/usr/local/lib/python3.8/site-packages/alembic/command.py", line 294, in upgrade
script.run_env()
File "/usr/local/lib/python3.8/site-packages/alembic/script/base.py", line 490, in run_env
util.load_python_file(self.dir, "env.py")
File "/usr/local/lib/python3.8/site-packages/alembic/util/pyfiles.py", line 97, in load_python_file
module = load_module_py(module_id, path)
File "/usr/local/lib/python3.8/site-packages/alembic/util/compat.py", line 184, in load_module_py
spec.loader.exec_module(module)
File "<frozen importlib._bootstrap_external>", line 843, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/app/superset/extensions/../migrations/env.py", line 126, in <module>
run_migrations_online()
File "/app/superset/extensions/../migrations/env.py", line 118, in run_migrations_online
context.run_migrations()
File "<string>", line 8, in run_migrations
File "/usr/local/lib/python3.8/site-packages/alembic/runtime/environment.py", line 813, in run_migrations
self.get_context().run_migrations(**kw)
File "/usr/local/lib/python3.8/site-packages/alembic/runtime/migration.py", line 561, in run_migrations
step.migration_fn(**kw)
File "/app/superset/migrations/versions/2022-04-01_14-38_a9422eeaae74_new_dataset_models_take_2.py", line 881, in upgrade
postprocess_columns(session)
File "/app/superset/migrations/versions/2022-04-01_14-38_a9422eeaae74_new_dataset_models_take_2.py", line 732, in postprocess_columns
count = session.query(func.count()).select_from(query).scalar()
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 2888, in scalar
ret = self.one()
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 2865, in one
return self._iter().one()
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 2903, in _iter
result = self.session.execute(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 1696, in execute
result = conn._execute_20(statement, params or {}, execution_options)
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1631, in _execute_20
return meth(self, args_10style, kwargs_10style, execution_options)
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/sql/elements.py", line 325, in _execute_on_connection
return connection._execute_clauseelement(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1498, in _execute_clauseelement
ret = self._execute_context(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1862, in _execute_context
self._handle_dbapi_exception(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 2043, in _handle_dbapi_exception
util.raise_(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 207, in raise_
raise exception
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1819, in _execute_context
self.dialect.do_execute(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 732, in do_execute
cursor.execute(statement, parameters)
sqlalchemy.exc.ProgrammingError: (psycopg2.errors.DuplicateAlias) table name "sl_columns" specified more than once
[SQL: SELECT count(*) AS count_1
FROM (SELECT sl_columns.id AS column_id, table_columns.column_name AS column_name, sl_columns.changed_by_fk AS changed_by_fk, sl_columns.changed_on AS changed_on, sl_columns.created_on AS created_on, sl_columns.description AS description, sql_metrics.d3format AS d3format, sl_datasets.external_url AS external_url, sl_columns.extra_json AS extra_json, sl_columns.is_dimensional AS is_dimensional, sl_columns.is_filterable AS is_filterable, sl_datasets.is_managed_externally AS is_managed_externally, sl_columns.is_physical AS is_physical, sql_metrics.metric_type AS metric_type, table_columns.python_date_format AS python_date_format, dbs.sqlalchemy_uri AS sqlalchemy_uri, sl_dataset_tables.table_id AS table_id, coalesce(table_columns.verbose_name, sql_metrics.verbose_name) AS verbose_name, sl_columns.warning_text AS warning_text
FROM sl_columns, (SELECT sl_columns.uuid AS uuid, sl_columns.created_on AS created_on, sl_columns.changed_on AS changed_on, sl_columns.id AS id, sl_columns.table_id AS table_id, sl_columns.is_aggregation AS is_aggregation, sl_columns.is_additive AS is_additive, sl_columns.is_dimensional AS is_dimensional, sl_columns.is_filterable AS is_filterable, sl_columns.is_increase_desired AS is_increase_desired, sl_columns.is_managed_externally AS is_managed_externally, sl_columns.is_partition AS is_partition, sl_columns.is_physical AS is_physical, sl_columns.is_temporal AS is_temporal, sl_columns.is_spatial AS is_spatial, sl_columns.name AS name, sl_columns.type AS type, sl_columns.unit AS unit, sl_columns.expression AS expression, sl_columns.description AS description, sl_columns.warning_text AS warning_text, sl_columns.external_url AS external_url, sl_columns.extra_json AS extra_json, sl_columns.created_by_fk AS created_by_fk, sl_columns.changed_by_fk AS changed_by_fk
FROM sl_columns
LIMIT %(param_1)s OFFSET %(param_2)s) AS sl_columns JOIN sl_dataset_columns ON sl_dataset_columns.column_id = sl_columns.id JOIN sl_datasets ON sl_datasets.id = sl_dataset_columns.dataset_id LEFT OUTER JOIN sl_dataset_tables ON sl_datasets.is_physical AND sl_dataset_tables.dataset_id = sl_datasets.id JOIN dbs ON dbs.id = sl_datasets.database_id LEFT OUTER JOIN table_columns ON table_columns.uuid = sl_columns.uuid LEFT OUTER JOIN sql_metrics ON sql_metrics.uuid = sl_columns.uuid
WHERE sl_columns.is_physical OR table_columns.verbose_name IS NOT NULL OR table_columns.verbose_name IS NOT NULL OR sql_metrics.verbose_name IS NOT NULL OR sql_metrics.d3format IS NOT NULL OR sql_metrics.metric_type IS NOT NULL) AS anon_1]
[parameters: {'param_1': 100000, 'param_2': 0}]
(Background on this error at: https://sqlalche.me/e/14/f405)
How to reproduce the bug
- Run
superset db upgrade
before a9422eeaae74 migration using PostgreSQL 12 as superset database.
Expected results
Migration must be applied
Actual results
Error
Environment
Checklist
Make sure to follow these steps before submitting your issue - thank you!
- [x] I have checked the superset logs for python stacktraces and included it here as text if there are any.
- [x] I have reproduced the issue with at least the latest released version of superset.
- [x] I have checked the issue tracker for the same issue and I haven't found one similar.
Additional context
The error looks like a missing alias on database query when a join is performed to a subquery.
The error is solved by changing the method get_joined_tables
in the file 38_a9422eeaae74_new_dataset_models_take_2.py at line 638 by next code:
def get_joined_tables(offset, limit):
# Import aliased from sqlalchemy
from sqlalchemy.orm import aliased
# Create alias of NewColumn
new_column_alias = aliased(NewColumn)
# Get subquery and give it the alias "sl_colums_2"
subquery = session.query(new_column_alias).offset(offset).limit(limit).subquery("sl_columns_2")
return (
sa.join(
subquery,
dataset_column_association_table,
# Use column id from subquery
dataset_column_association_table.c.column_id == subquery.c.id,
)
.join(
NewDataset,
NewDataset.id == dataset_column_association_table.c.dataset_id,
)
.join(
dataset_table_association_table,
# Join tables with physical datasets
and_(
NewDataset.is_physical,
dataset_table_association_table.c.dataset_id == NewDataset.id,
),
isouter=True,
)
.join(Database, Database.id == NewDataset.database_id)
.join(
TableColumn,
# Use column uuid from subquery
TableColumn.uuid == subquery.c.uuid,
isouter=True,
)
.join(
SqlMetric,
# Use column uuid from subquery
SqlMetric.uuid == subquery.c.uuid,
isouter=True,
)
)
I'm facing the same problem with the latest docker image pulled just now. When will this fix make it to the docker image? Thanks
Unfortunately we've faced the same issue
I have the same error when I trying to use database from Superset 1.3 in 2.0.0
I've also ran into the same issue, when upgrading using the helm chart from container image 1.3.0 to latest
(defined by default by helm chart values) the init-db job fails with:
<string>:3: SAWarning: TypeDecorator UUIDType() will not produce a cache key because the ``cache_ok`` attribute is not set to True. This can have significant performance implications including some performance degradations in comparison to prior SQLAlchemy versions. Set this attribute to True if this type object's state is safe to use in a cache key, or False to disable this warning. (Background on this error at: https://sqlalche.me/e/14/cprf)
/app/superset/migrations/versions/2022-04-01_14-38_a9422eeaae74_new_dataset_models_take_2.py:732: SAWarning: TypeDecorator UUIDType() will not produce a cache key because the ``cache_ok`` attribute is not set to True. This can have significant performance implications including some performance degradations in comparison to prior SQLAlchemy versions. Set this attribute to True if this type object's state is safe to use in a cache key, or False to disable this warning. (Background on this error at: https://sqlalche.me/e/14/cprf)
count = session.query(func.count()).select_from(query).scalar()
Link metric columns to datasets...
>> Run postprocessing on 8 columns
Traceback (most recent call last):
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1819, in _execute_context
self.dialect.do_execute(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 732, in do_execute
cursor.execute(statement, parameters)
psycopg2.errors.DuplicateAlias: table name "sl_columns" specified more than once
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/bin/superset", line 33, in <module>
sys.exit(load_entry_point('apache-superset', 'console_scripts', 'superset')())
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1128, in __call__
return self.main(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/flask/cli.py", line 601, in main
return super().main(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1053, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1659, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1659, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1395, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 754, in invoke
return __callback(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/click/decorators.py", line 26, in new_func
return f(get_current_context(), *args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/flask/cli.py", line 445, in decorator
return __ctx.invoke(f, *args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 754, in invoke
return __callback(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/flask_migrate/cli.py", line 149, in upgrade
_upgrade(directory, revision, sql, tag, x_arg)
File "/usr/local/lib/python3.8/site-packages/flask_migrate/__init__.py", line 98, in wrapped
f(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/flask_migrate/__init__.py", line 185, in upgrade
command.upgrade(config, revision, sql=sql, tag=tag)
File "/usr/local/lib/python3.8/site-packages/alembic/command.py", line 294, in upgrade
script.run_env()
File "/usr/local/lib/python3.8/site-packages/alembic/script/base.py", line 490, in run_env
util.load_python_file(self.dir, "env.py")
File "/usr/local/lib/python3.8/site-packages/alembic/util/pyfiles.py", line 97, in load_python_file
module = load_module_py(module_id, path)
File "/usr/local/lib/python3.8/site-packages/alembic/util/compat.py", line 184, in load_module_py
spec.loader.exec_module(module)
File "<frozen importlib._bootstrap_external>", line 843, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/app/superset/extensions/../migrations/env.py", line 126, in <module>
run_migrations_online()
File "/app/superset/extensions/../migrations/env.py", line 118, in run_migrations_online
context.run_migrations()
File "<string>", line 8, in run_migrations
File "/usr/local/lib/python3.8/site-packages/alembic/runtime/environment.py", line 813, in run_migrations
self.get_context().run_migrations(**kw)
File "/usr/local/lib/python3.8/site-packages/alembic/runtime/migration.py", line 561, in run_migrations
step.migration_fn(**kw)
File "/app/superset/migrations/versions/2022-04-01_14-38_a9422eeaae74_new_dataset_models_take_2.py", line 881, in upgrade
postprocess_columns(session)
File "/app/superset/migrations/versions/2022-04-01_14-38_a9422eeaae74_new_dataset_models_take_2.py", line 732, in postprocess_columns
count = session.query(func.count()).select_from(query).scalar()
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 2888, in scalar
ret = self.one()
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 2865, in one
return self._iter().one()
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py", line 2903, in _iter
result = self.session.execute(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 1696, in execute
result = conn._execute_20(statement, params or {}, execution_options)
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1631, in _execute_20
return meth(self, args_10style, kwargs_10style, execution_options)
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/sql/elements.py", line 325, in _execute_on_connection
return connection._execute_clauseelement(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1498, in _execute_clauseelement
ret = self._execute_context(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1862, in _execute_context
self._handle_dbapi_exception(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 2043, in _handle_dbapi_exception
util.raise_(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 207, in raise_
raise exception
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1819, in _execute_context
self.dialect.do_execute(
File "/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 732, in do_execute
cursor.execute(statement, parameters)
sqlalchemy.exc.ProgrammingError: (psycopg2.errors.DuplicateAlias) table name "sl_columns" specified more than once
[SQL: SELECT count(*) AS count_1
FROM (SELECT sl_columns.id AS column_id, table_columns.column_name AS column_name, sl_columns.changed_by_fk AS changed_by_fk, sl_columns.changed_on AS changed_on, sl_columns.created_on AS created_on, sl_columns.description AS description, sql_metrics.d3format AS d3format, sl_datasets.external_url AS external_url, sl_columns.extra_json AS extra_json, sl_columns.is_dimensional AS is_dimensional, sl_columns.is_filterable AS is_filterable, sl_datasets.is_managed_externally AS is_managed_externally, sl_columns.is_physical AS is_physical, sql_metrics.metric_type AS metric_type, table_columns.python_date_format AS python_date_format, dbs.sqlalchemy_uri AS sqlalchemy_uri, sl_dataset_tables.table_id AS table_id, coalesce(table_columns.verbose_name, sql_metrics.verbose_name) AS verbose_name, sl_columns.warning_text AS warning_text
FROM sl_columns, (SELECT sl_columns.uuid AS uuid, sl_columns.created_on AS created_on, sl_columns.changed_on AS changed_on, sl_columns.id AS id, sl_columns.table_id AS table_id, sl_columns.is_aggregation AS is_aggregation, sl_columns.is_additive AS is_additive, sl_columns.is_dimensional AS is_dimensional, sl_columns.is_filterable AS is_filterable, sl_columns.is_increase_desired AS is_increase_desired, sl_columns.is_managed_externally AS is_managed_externally, sl_columns.is_partition AS is_partition, sl_columns.is_physical AS is_physical, sl_columns.is_temporal AS is_temporal, sl_columns.is_spatial AS is_spatial, sl_columns.name AS name, sl_columns.type AS type, sl_columns.unit AS unit, sl_columns.expression AS expression, sl_columns.description AS description, sl_columns.warning_text AS warning_text, sl_columns.external_url AS external_url, sl_columns.extra_json AS extra_json, sl_columns.created_by_fk AS created_by_fk, sl_columns.changed_by_fk AS changed_by_fk
FROM sl_columns
LIMIT %(param_1)s OFFSET %(param_2)s) AS sl_columns JOIN sl_dataset_columns ON sl_dataset_columns.column_id = sl_columns.id JOIN sl_datasets ON sl_datasets.id = sl_dataset_columns.dataset_id LEFT OUTER JOIN sl_dataset_tables ON sl_datasets.is_physical AND sl_dataset_tables.dataset_id = sl_datasets.id JOIN dbs ON dbs.id = sl_datasets.database_id LEFT OUTER JOIN table_columns ON table_columns.uuid = sl_columns.uuid LEFT OUTER JOIN sql_metrics ON sql_metrics.uuid = sl_columns.uuid
WHERE sl_columns.is_physical OR table_columns.verbose_name IS NOT NULL OR table_columns.verbose_name IS NOT NULL OR sql_metrics.verbose_name IS NOT NULL OR sql_metrics.d3format IS NOT NULL OR sql_metrics.metric_type IS NOT NULL) AS anon_1]
[parameters: {'param_1': 100000, 'param_2': 0}]
(Background on this error at: https://sqlalche.me/e/14/f405)
After setting up the container image to the tag 2.0.0
the init job runs ok
@hgranillo can i know what is mean the setting up the container image tag?
I'm currently looking for a solution to the same issue.
I want to know whether the image tag is specified in docker-compose or whether the tag is set using docker image tag.
Also, may I know how to solve it?
The error is solved by changing the method
get_joined_tables
in the file 38_a9422eeaae74_new_dataset_models_take_2.py at line 638 by next code:def get_joined_tables(offset, limit): # Import aliased from sqlalchemy from sqlalchemy.orm import aliased # Create alias of NewColumn new_column_alias = aliased(NewColumn) # Get subquery and give it the alias "sl_colums_2" subquery = session.query(new_column_alias).offset(offset).limit(limit).subquery("sl_columns_2") return ( sa.join( subquery, dataset_column_association_table, # Use column id from subquery dataset_column_association_table.c.column_id == subquery.c.id, ) .join( NewDataset, NewDataset.id == dataset_column_association_table.c.dataset_id, ) .join( dataset_table_association_table, # Join tables with physical datasets and_( NewDataset.is_physical, dataset_table_association_table.c.dataset_id == NewDataset.id, ), isouter=True, ) .join(Database, Database.id == NewDataset.database_id) .join( TableColumn, # Use column uuid from subquery TableColumn.uuid == subquery.c.uuid, isouter=True, ) .join( SqlMetric, # Use column uuid from subquery SqlMetric.uuid == subquery.c.uuid, isouter=True, ) )
this query is killing python @snt1017 . Is it working?
@hgranillo can i know what is mean the setting up the container image tag?
I'm currently looking for a solution to the same issue.
I want to know whether the image tag is specified in docker-compose or whether the tag is set using docker image tag.
Also, may I know how to solve it?
Means that I'm using apache/superset:2.0.0
as the docker image.
I'm guessing if you are following this document here: https://superset.apache.org/docs/installation/installing-superset-using-docker-compose/
If that is true, then for you this would mean setting TAG=2.0.0
when you run docker-compose: TAG=2.0.0 docker-compose -f docker-compose-non-dev.yml up
The error is solved by changing the method
get_joined_tables
in the file 38_a9422eeaae74_new_dataset_models_take_2.py at line 638 by next code:def get_joined_tables(offset, limit): # Import aliased from sqlalchemy from sqlalchemy.orm import aliased # Create alias of NewColumn new_column_alias = aliased(NewColumn) # Get subquery and give it the alias "sl_colums_2" subquery = session.query(new_column_alias).offset(offset).limit(limit).subquery("sl_columns_2") return ( sa.join( subquery, dataset_column_association_table, # Use column id from subquery dataset_column_association_table.c.column_id == subquery.c.id, ) .join( NewDataset, NewDataset.id == dataset_column_association_table.c.dataset_id, ) .join( dataset_table_association_table, # Join tables with physical datasets and_( NewDataset.is_physical, dataset_table_association_table.c.dataset_id == NewDataset.id, ), isouter=True, ) .join(Database, Database.id == NewDataset.database_id) .join( TableColumn, # Use column uuid from subquery TableColumn.uuid == subquery.c.uuid, isouter=True, ) .join( SqlMetric, # Use column uuid from subquery SqlMetric.uuid == subquery.c.uuid, isouter=True, ) )
this query is killing python @snt1017 . Is it working?
The same. It is not working for me.
getting the images using the 2.0.0 tag (as a workaround to the bug mentioned up above) - doesn't seem to work
We are getting the dreaded Row error again when bringing the containers up....because SQL alchemy in these images seems to be stuck on version 1.3 instead of the minimum requirement of 1.4
ImportError: cannot import name 'Row' from 'sqlalchemy.engine' (/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/init.py)
We have a docker deployment
In the end, we had to follow the advice up above which was to fix the python function
- fixed get_joined_tables function
- rebuild the superset image (docker-compose build - after adding the word "build" inside the docker-compose file)
- docker-compose up
The database upgrade didn't work out of the box either (from 1.4 to 2.0), so we had to go into the superset_app container and run: superset db upgrade
Sample from our docker-compose file
superset: env_file: docker/.env build: . # image: *superset-image container_name: superset_app command: ["/app/docker/docker-bootstrap.sh", "app"] restart: unless-stopped ports: - 8088:8088 user: *superset-user depends_on: *superset-depends-on volumes: *superset-volumes environment: CYPRESS_CONFIG: "${CYPRESS_CONFIG}"
The error is solved by changing the method
get_joined_tables
in the file 38_a9422eeaae74_new_dataset_models_take_2.py at line 638 by next code:def get_joined_tables(offset, limit): # Import aliased from sqlalchemy from sqlalchemy.orm import aliased # Create alias of NewColumn new_column_alias = aliased(NewColumn) # Get subquery and give it the alias "sl_colums_2" subquery = session.query(new_column_alias).offset(offset).limit(limit).subquery("sl_columns_2") return ( sa.join( subquery, dataset_column_association_table, # Use column id from subquery dataset_column_association_table.c.column_id == subquery.c.id, ) .join( NewDataset, NewDataset.id == dataset_column_association_table.c.dataset_id, ) .join( dataset_table_association_table, # Join tables with physical datasets and_( NewDataset.is_physical, dataset_table_association_table.c.dataset_id == NewDataset.id, ), isouter=True, ) .join(Database, Database.id == NewDataset.database_id) .join( TableColumn, # Use column uuid from subquery TableColumn.uuid == subquery.c.uuid, isouter=True, ) .join( SqlMetric, # Use column uuid from subquery SqlMetric.uuid == subquery.c.uuid, isouter=True, ) )
this query is killing python @snt1017 . Is it working?
This query is not working, it is generating a cartesian join. We need to change te query to join NewColumn and the new subquery.
SELECT sl_columns.id AS column_id, table_columns.column_name, sl_columns.changed_by_fk, sl_columns.changed_on, sl_columns.created_on, sl_columns.description, sql_metrics.d3format, sl_datasets.external_url, sl_columns.extra_json, sl_columns.is_dimensional, sl_columns.is_filterable, sl_datasets.is_managed_externally, sl_columns.is_physical, sql_metrics.metric_type, table_columns.python_date_format, dbs.sqlalchemy_uri, sl_dataset_tables.table_id, coalesce(table_columns.verbose_name, sql_metrics.verbose_name) AS verbose_name, sl_columns.warning_text
FROM sl_columns,
(
SELECT sl_columns_1.uuid AS uuid, sl_columns_1.created_on AS created_on, sl_columns_1.changed_on AS changed_on, sl_columns_1.id AS id, sl_columns_1.table_id AS table_id, sl_columns_1.is_aggregation AS is_aggregation, sl_columns_1.is_additive AS is_additive, sl_columns_1.is_dimensional AS is_dimensional, sl_columns_1.is_filterable AS is_filterable, sl_columns_1.is_increase_desired AS is_increase_desired, sl_columns_1.is_managed_externally AS is_managed_externally, sl_columns_1.is_partition AS is_partition, sl_columns_1.is_physical AS is_physical, sl_columns_1.is_temporal AS is_temporal, sl_columns_1.is_spatial AS is_spatial, sl_columns_1.name AS name, sl_columns_1.type AS type, sl_columns_1.unit AS unit, sl_columns_1.expression AS expression, sl_columns_1.description AS description, sl_columns_1.warning_text AS warning_text, sl_columns_1.external_url AS external_url, sl_columns_1.extra_json AS extra_json, sl_columns_1.created_by_fk AS created_by_fk, sl_columns_1.changed_by_fk AS changed_by_fk
FROM sl_columns AS sl_columns_1
LIMIT 0 OFFSET 500
) AS sl_columns_2 JOIN sl_dataset_columns
ON sl_dataset_columns.column_id = sl_columns_2.id JOIN sl_datasets
ON sl_datasets.id = sl_dataset_columns.dataset_id LEFT OUTER JOIN sl_dataset_tables
ON sl_datasets.is_physical AND sl_dataset_tables.dataset_id = sl_datasets.id JOIN dbs
ON dbs.id = sl_datasets.database_id LEFT OUTER JOIN table_columns
ON table_columns.uuid = sl_columns_2.uuid LEFT OUTER JOIN sql_metrics
ON sql_metrics.uuid = sl_columns_2.uuid
WHERE sl_columns.is_physical OR
table_columns.verbose_name IS NOT NULL OR
table_columns.verbose_name IS NOT NULL OR
sql_metrics.verbose_name IS NOT NULL OR
sql_metrics.d3format IS NOT NULL OR
sql_metrics.metric_type IS NOT NULL
I changed the code for:
def get_joined_tables(offset, limit):
# Import aliased from sqlalchemy
from sqlalchemy.orm import aliased
# Create alias of NewColumn
new_column_alias = aliased(NewColumn)
# Get subquery and give it the alias "sl_colums_2"
subquery = session.query(new_column_alias).offset(offset).limit(limit).subquery(
"sl_columns_2")
return (
sa.join(
subquery,
NewColumn,
# Use column id from subquery
subquery.c.id == NewColumn.id
) .join(
dataset_column_association_table,
# Use column id from subquery
dataset_column_association_table.c.column_id == subquery.c.id,
)
.join(
NewDataset,
NewDataset.id == dataset_column_association_table.c.dataset_id,
)
.join(
dataset_table_association_table,
# Join tables with physical datasets
and_(
NewDataset.is_physical,
dataset_table_association_table.c.dataset_id == NewDataset.id,
),
isouter=True,
)
.join(Database, Database.id == NewDataset.database_id)
.join(
TableColumn,
# Use column uuid from subquery
TableColumn.uuid == subquery.c.uuid,
isouter=True,
)
.join(
SqlMetric,
# Use column uuid from subquery
SqlMetric.uuid == subquery.c.uuid,
isouter=True,
)
)
And it is working
@micsbot Thank you! You code works for me!