sql-metadata
sql-metadata copied to clipboard
Parser returns tokens as tables
The attached code shows that the parser returns token such as 'amount', 'DAY', 'NULL' when requested to provide tables.
a="""
WITH cte_agg_ad_networks AS (
SELECT A.*
,CASE WHEN COALESCE(B.insert_date,D.insert_date,E.insert_date,F.insert_date,G.insert_date) IS NOT NULL
THEN '1'
ELSE '0'
END AS is_test,
c.revised_name,
'1' AS is_actual,
CASE
WHEN A.source_campaign <> c.revised_name THEN c.revised_name
ELSE last_value(A.source_campaign) over (partition BY A.source_campaignid ORDER BY date rows BETWEEN unbounded preceding AND unbounded following)
END AS last_source_campaign,
COALESCE(H.new_source_name,A.source) AS new_source,
NULL AS cac_distribution_channel_type
from spark_stg.marketing_external__agg_ad_networks A
LEFT JOIN spark_stg.bi_workspace__marketing_test_activity B ON A.source_adid = B.ad_id AND A.date >= B.start_date AND A.date <= B.end_date
LEFT JOIN spark_stg.bi_workspace__marketing_test_activity D ON A.source_adgroupid = D.adgroup_id AND A.date >= D.start_date AND A.date <= D.end_date AND D.ad_id IS NULL
LEFT JOIN spark_stg.bi_workspace__marketing_test_activity E ON A.source_campaign = E.campaign_name AND A.date >= E.start_date AND A.date <= E.end_date AND E.adgroup_id IS NULL
LEFT JOIN spark_stg.bi_workspace__marketing_test_activity F ON A.source = F.source AND A.date >= F.start_date AND A.date <= F.end_date AND F.campaign_name IS NULL
LEFT JOIN spark_stg.bi_workspace__marketing_test_activity G ON A.channel = G.distribution_channel AND A.date >= G.start_date AND A.date <= G.end_date AND G.source IS NULL
LEFT JOIN spark_stg.bi_workspace__distribution_level_names C
ON A.channel = C.channel
AND A.source = C.source
AND A.source_campaign = C.campaign
LEFT JOIN spark_stg.bi_workspace__source_by_campaign_mapping H ON A.source_campaignid = H.campaign_id
) ,tv_channel AS (
SELECT 'tv' AS channel,
cast(left(cast(datetime as varchar(20)),10) as date) AS date,
NULL AS source,
creative as source_campaign,
NULL AS source_campaignid,
channel as source_adgroup,
NULL AS source_adgroupid,
creative as source_adname,
tcaspotid as source_adid,
NULL AS source_keyword,
NULL AS source_keywordid,
NULL AS source_match_type,
NULL AS campaign_cob,
NULL AS source_serial,
NULL AS ad_labels,
NULL AS state,
NULL AS device,
NULL AS impressions,
NULL AS clicks,
SUM(CAST(COST AS NUMERIC)) spend,
NULL AS premium,
NULL AS leads,
NULL AS quotes,
NULL AS payment_views,
NULL AS purchases,
NULL AS starts,
'1' AS is_test,
null AS revised_name,
'1' AS is_actual,
creative AS last_source_campaign,
NULL AS new_source,
NULL AS cac_distribution_channel_type
FROM stg_etl_tables.tvsquard_spots
GROUP BY date,source_campaign,source_adgroup,source_adname,source_adid
)
,distribution_channel_step1 as (
select act.*
from spark_stg.marketing_external__adaptive_Actuals_Operating_expense act
left join spark_stg.db_data_science__ref_budget_to_actuals bda on cast(date_format(to_timestamp(act.date, 'MM_yyyy'), 'yyyy-MM-dd') as date) = cast(add_months(bda.`active month (day 1)`, -1) as date)
where(cast(date_format(to_timestamp(act.date, 'MM_yyyy'), 'yyyy-MM-dd') as date) < trunc(cast(add_months(current_timestamp(), -1) as date),'month')
or (cast(date_format(to_timestamp(act.date, 'MM_yyyy'), 'yyyy-MM-dd') as date) = trunc(cast(add_months(current_timestamp(), -1) as date),'month')
and cast(current_timestamp() as date)> bda.`full conversion date` ))
and not (level_name = '21_US_A team' and account_code not in ('Pro_cost','APT_cost'))
)
, distribution_channel_step2 as (
select bod.*
from spark_stg.marketing_external__adaptive_BOD_Operating_expense bod
left join spark_stg.db_data_science__ref_budget_to_actuals bda on cast(date_format(to_timestamp(bod.date, 'MM_yyyy'), 'yyyy-MM-dd') as date) = cast(add_months(bda.`active month (day 1)`, -1) as date)
where ( cast(date_format(to_timestamp(bod.date, 'MM_yyyy'), 'yyyy-MM-dd') as date)>= trunc(cast(current_timestamp()as date),'month')
or (cast(date_format(to_timestamp(bod.date, 'MM_yyyy'), 'yyyy-MM-dd') as date)=trunc(cast(add_months(current_timestamp(), -1) as date),'month')
and cast(current_timestamp() as date)<=bda.`full conversion date` )
)
and not (level_name = '21_US_A team' and account_code not in ('Pro_cost','APT_cost'))
)
, distribution_channel_step3 as (
select * from distribution_channel_step1
union
select * from distribution_channel_step2)
, distribution_channel as (
SELECT
c.distribution_channel AS channel,
cast(periods.date as date) AS date,
NULL AS source,
NULL AS source_campaign,
NULL AS source_campaignid,
'CAC related expenses' AS source_adgroup,
NULL AS source_adgroupid,
NULL AS source_adname,
NULL AS source_adid,
NULL AS source_keyword,
NULL AS source_keywordid,
NULL AS source_match_type,
NULL AS campaign_cob,
NULL AS source_serial,
NULL AS ad_labels,
NULL AS state,
NULL AS device,
NULL AS impressions,
NULL AS clicks,
case
when account_code = '6202' then (-1.00*amount / EXTRACT( DAY FROM cast(monthlastday as date)))
when c.distribution_channel = 'Lifecycle' and nvl(amount,0)=0 then 300000/EXTRACT(DAY FROM cast(monthlastday as date))
else amount/EXTRACT(DAY FROM cast(monthlastday as date))
end AS spend,
NULL AS premium,
NULL AS leads,
NULL AS quotes,
NULL AS payment_views,
NULL AS purchases,
NULL AS starts,
'0' AS is_test,
null AS revised_name,
case when version = 'Actuals' then '1' else '0' end is_actual,
null AS last_source_campaign,
NULL AS source,
c.channel_type as cac_distribution_channel_type
from distribution_channel_step3 a
left join spark_stg.db_data_science__ref_cost_categorization c on a.level_name = c.level_name and a.account_name = c.account_name
join spark_stg.bi_workspace__periods periods on cast(date_format(to_timestamp(a.date, 'MM_yyyy'), 'yyyy-MM-dd') as date) = monthfirstday
where c.channel_type != 'Performance Marketing'
)
, union_all_steps as (
select * from distribution_channel
union
select * from cte_agg_ad_networks
union
select * from tv_channel
)
, daily_agg_ad_networks_step1 as (
SELECT distinct
aan.channel,
aan.date,
aan.new_source AS source,
COALESCE(scm.campaign_name_for_sources,aan.last_source_campaign) AS source_campaign,
aan.source_campaignid,
aan.source_adgroup,
aan.source_adgroupid,
aan.source_adname,
aan.source_adid,
aan.source_keyword,
aan.source_keywordid,
aan.source_match_type,
aan.campaign_cob,
aan.source_serial,
aan.impressions,
aan.clicks,
aan.spend,
aan.ad_labels,
aan.state,
aan.premium,
aan.leads,
aan.quotes,
aan.payment_views,
aan.purchases,
aan.is_test,
CASE WHEN aan.channel ='tv' THEN NULL ELSE COALESCE(cmp.account_manager, src.account_manager, cnl.account_manager) END AS account_manager ,
CASE
WHEN aan.channel = 'organic' AND aan.source_serial = '90001' THEN 'Search Brand'
WHEN aan.channel = 'organic' AND aan.source_serial != '90001' THEN 'Search Non-Brand'
WHEN (aan.channel = 'sem' AND aan.new_source IN('adwords_brand','bingads_brand')) THEN 'Search Brand'
WHEN aan.new_source LIKE '%_agents' THEN 'Direct to Agent'
WHEN aan.channel = 'direct' THEN 'Digital Ads'
WHEN (aan.channel = 'sem' AND aan.new_source IN('adwords_retargeting','bingads_retargeting','adwords_brand_re_targeting')) THEN 'Search Retargeting'
WHEN aan.channel = 'sem' AND Lower(NVL(NVL(revised_name,source_campaign),last_source_campaign)) like 'brand' THEN 'Search Brand'
WHEN (aan.channel = 'display' AND REPLACE(aan.new_source,'-','_') in('gdn_re_targeting','criteo_retargeting','dv360_rtgn')) THEN 'Digital Ads - Retargeting'
WHEN (aan.channel = 'display' AND REPLACE(aan.new_source,'-','_') in('adroll')) AND source_campaignid = '33333333' THEN 'Digital Ads - Retargeting'
WHEN (aan.channel = 'display' AND REPLACE(aan.new_source,'-','_') in('adroll')) AND source_campaignid = '33333331' THEN 'Digital Ads'
WHEN (aan.channel = 'display' AND aan.new_source = 'criteo') THEN 'Digital Ads'
WHEN (aan.channel = 'social' AND (aan.new_source IN('facebook_rmkt','linkedin-rmkt') OR aan.last_source_campaign LIKE '%rmkt%' OR aan.source_campaign LIKE '%rmkt%')) THEN 'Digital Ads - Retargeting'
WHEN (aan.channel = 'video' AND aan.new_source = 'youtube_re_targeting') THEN 'Digital Ads - Retargeting'
WHEN (aan.channel = 'video' AND aan.new_source = 'youtube' AND NVL(NVL(revised_name,source_campaign),last_source_campaign) IN ('contractors_yt_remarketing_mobile','contractors_yt_remarketing','general_contractors_yt_remarketing')) THEN 'Digital Ads - Retargeting'
WHEN aan.new_source LIKE '%awareness%' THEN 'Awareness'
WHEN aan.channel IN ('influencers', 'social', 'native', 'display', 'video', 'tv') THEN 'Digital Ads'
WHEN aan.channel IN ('email', 'dm') THEN 'Direct Comm'
WHEN aan.channel IN ('organic','sem') THEN 'Search Non-Brand'
WHEN aan.channel like 'ap-%' THEN 'partnerships'
WHEN aan.channel IN ('partnerships','agents','support') THEN aan.channel
WHEN aan.source_adgroup = 'CAC related expenses' THEN cac_distribution_channel_type
WHEN aan.channel like '%support%' THEN 'support'
ELSE 'Digital Ads'
END AS distribution_channel_type,
NVL(NVL (l.cob_id,CASE WHEN aan.campaign_cob = '' THEN 0 ELSE cast(aan.campaign_cob as INTEGER) END),0) AS cob_id,
COALESCE(cobs.marketing_cob_group,l.cob_group) AS marketing_cob_group,
cobs.cob_name AS cob_name,
COALESCE(cobs.cob_industry,l.cob_industry) AS cob_industry,
aan.revised_name,
aan.starts,
aan.is_actual,
coalesce(scm.sem_account,aan.new_source) as source_func,
NVL(NVL(aan.last_source_campaign,scm.campaign_name_for_sources),aan.revised_name) as campaign_func,
COALESCE(cmp.account_manager, src.account_manager, cnl.account_manager) as account_manager_func,
COALESCE(cmp.streams, src.streams, cnl.streams) as streams_func,
device
FROM cte_agg_ad_networks aan
LEFT JOIN spark_stg.next_insurance_prod__lms_links l ON aan.source_serial = l.source_serial
LEFT JOIN spark_stg.dwh__sources_test_cobs cobs ON nvl (nvl (l.cob_id,CASE WHEN aan.campaign_cob = '' THEN 0 ELSE cast(aan.campaign_cob as INTEGER) END),0) = cobs.cob_id
LEFT JOIN spark_stg.optimization_stg__acquisition_account_managers cmp ON NULLIF(lower(NVL(revised_name,source_campaign)),'') = NULLIF(cmp.campaign_name,'') and lower(aan.new_source) = cmp.source
LEFT JOIN spark_stg.optimization_stg__acquisition_account_managers src ON lower(aan.new_source) = src.source AND lower(aan.channel) = src.distribution_channel AND NVL(src.campaign_name,'') = ''
LEFT JOIN spark_stg.optimization_stg__acquisition_account_managers cnl ON lower(aan.channel) = cnl.distribution_channel AND NVL(cnl.source,'') = ''
LEFT JOIN spark_stg.bi_workspace__sem_campaign_mapping scm ON aan.source_campaignid = scm.campaign_id
WHERE DATE < current_timestamp()
)
select
distinct
aan.channel,
aan.date,
aan.source,
aan.source_campaign,
aan.source_campaignid,
aan.source_adgroup,
aan.source_adgroupid,
aan.source_adname,
aan.source_adid,
aan.source_keyword,
aan.source_keywordid,
aan.source_match_type,
aan.campaign_cob,
aan.source_serial,
aan.impressions,
aan.clicks,
aan.spend,
aan.ad_labels,
aan.state,
aan.premium,
aan.leads,
aan.quotes,
aan.payment_views,
aan.purchases,
aan.is_test,
account_manager,
distribution_channel_type,
cob_id,
marketing_cob_group,
cob_name,
cob_industry,
revised_name,
attributionjsonsources(
cast(aan.date as varchar(20)),
aan.channel,
source_func,
campaign_func,
aan.source_campaignid,
LOWER(NVL(aan.source_adgroup,aan.source_adgroupid)),
aan.source_adgroupid,
NULL,
LOWER(NVL(aan.source_keyword,aan.source_keywordid)),
NULL,
NULL,
CASE WHEN aan.channel IN ('partnerships','sem','social','display','video','agents','native','tv') or aan.channel like 'ap-%' THEN 'paid' ELSE 'nonpaid' END,
NULL,
NULL,
NULL,
aan.source_adid,
aan.source_match_type,
distribution_channel_type,
cast(aan.is_test as VARCHAR(10)),
CASE WHEN aan.channel ='tv' THEN NULL ELSE account_manager_func END,
aan.ad_labels ,
NULL,
NUll,
CASE WHEN aan.channel ='tv' THEN NULL ELSE streams_func END,
NULL
) AS source_json,
aan.starts,
aan.is_actual ,
attributionjsonsession(NULL,NULL,NULL,NULL,NULL,device,NULL,NULL,NULL,NULL,NULL,NULL,NULL) AS session_json
from daily_agg_ad_networks_step1 aan"""
from sql_metadata import Parser
used_tables_from_query = Parser(a).tables
print(used_tables_from_query)
# 'cast', 'monthlastday', 'c.distribution_channel', 'amount', 'DAY', 'NULL', 'null', 'case', 'c.channel_type',