sql-metadata icon indicating copy to clipboard operation
sql-metadata copied to clipboard

Parser returns tokens as tables

Open miky-schreiber opened this issue 4 years ago • 0 comments

The attached code shows that the parser returns token such as 'amount', 'DAY', 'NULL' when requested to provide tables.

a="""
WITH cte_agg_ad_networks AS (
SELECT A.*
,CASE WHEN COALESCE(B.insert_date,D.insert_date,E.insert_date,F.insert_date,G.insert_date) IS NOT NULL
     THEN '1'
     ELSE '0'
 END AS is_test,
 c.revised_name,
 '1' AS is_actual,
        CASE
          WHEN A.source_campaign <> c.revised_name THEN c.revised_name
          ELSE  last_value(A.source_campaign) over (partition BY A.source_campaignid ORDER BY date rows BETWEEN unbounded preceding AND unbounded following)
        END AS last_source_campaign,
 COALESCE(H.new_source_name,A.source) AS new_source,
 NULL AS cac_distribution_channel_type
from spark_stg.marketing_external__agg_ad_networks A
LEFT JOIN spark_stg.bi_workspace__marketing_test_activity B ON A.source_adid = B.ad_id AND A.date >= B.start_date AND A.date <= B.end_date
LEFT JOIN spark_stg.bi_workspace__marketing_test_activity D ON A.source_adgroupid = D.adgroup_id AND A.date >= D.start_date AND A.date <= D.end_date AND D.ad_id IS NULL
LEFT JOIN spark_stg.bi_workspace__marketing_test_activity E ON A.source_campaign = E.campaign_name AND A.date >= E.start_date AND A.date <= E.end_date AND E.adgroup_id IS NULL
LEFT JOIN spark_stg.bi_workspace__marketing_test_activity F ON A.source = F.source AND A.date >= F.start_date AND A.date <= F.end_date AND F.campaign_name IS NULL
LEFT JOIN spark_stg.bi_workspace__marketing_test_activity G ON A.channel = G.distribution_channel AND A.date >= G.start_date AND A.date <= G.end_date AND G.source IS NULL
LEFT JOIN spark_stg.bi_workspace__distribution_level_names C
         ON A.channel = C.channel
        AND A.source = C.source
        AND A.source_campaign = C.campaign
LEFT JOIN spark_stg.bi_workspace__source_by_campaign_mapping H ON A.source_campaignid = H.campaign_id
) ,tv_channel AS (
SELECT 'tv' AS channel,
       cast(left(cast(datetime as varchar(20)),10) as date) AS date,
       NULL AS source,
       creative as source_campaign,
       NULL AS source_campaignid,
       channel as source_adgroup,
       NULL AS source_adgroupid,
       creative as source_adname,
       tcaspotid as source_adid,
       NULL AS source_keyword,
       NULL AS source_keywordid,
       NULL AS source_match_type,
       NULL AS campaign_cob,
       NULL AS source_serial,

       NULL AS ad_labels,
       NULL AS state,
       NULL AS device,
       NULL AS impressions,
       NULL AS clicks,
       SUM(CAST(COST AS NUMERIC)) spend,
       NULL AS premium,
       NULL AS leads,
       NULL AS quotes,
       NULL AS payment_views,
       NULL AS purchases,
       NULL AS starts,
       '1' AS is_test,
       null AS revised_name,
       '1' AS is_actual,
       creative AS last_source_campaign,
       NULL AS new_source,
       NULL AS cac_distribution_channel_type
FROM stg_etl_tables.tvsquard_spots
GROUP BY  date,source_campaign,source_adgroup,source_adname,source_adid
) 

,distribution_channel_step1 as (
    select  act.*
    from spark_stg.marketing_external__adaptive_Actuals_Operating_expense act
    left join spark_stg.db_data_science__ref_budget_to_actuals bda on cast(date_format(to_timestamp(act.date, 'MM_yyyy'), 'yyyy-MM-dd') as date) = cast(add_months(bda.`active month (day 1)`, -1) as date)
    where(cast(date_format(to_timestamp(act.date, 'MM_yyyy'), 'yyyy-MM-dd') as date) < trunc(cast(add_months(current_timestamp(), -1) as date),'month')
      or (cast(date_format(to_timestamp(act.date, 'MM_yyyy'), 'yyyy-MM-dd') as date) = trunc(cast(add_months(current_timestamp(), -1) as date),'month') 
          and cast(current_timestamp() as date)> bda.`full conversion date` ))
    and not  (level_name = '21_US_A team' and account_code not in ('Pro_cost','APT_cost'))
)
, distribution_channel_step2 as (
    select bod.*
    from spark_stg.marketing_external__adaptive_BOD_Operating_expense bod
    left join spark_stg.db_data_science__ref_budget_to_actuals bda  on cast(date_format(to_timestamp(bod.date, 'MM_yyyy'), 'yyyy-MM-dd') as date) = cast(add_months(bda.`active month (day 1)`, -1) as date)
    where ( cast(date_format(to_timestamp(bod.date, 'MM_yyyy'), 'yyyy-MM-dd') as date)>= trunc(cast(current_timestamp()as date),'month')
            or (cast(date_format(to_timestamp(bod.date, 'MM_yyyy'), 'yyyy-MM-dd') as date)=trunc(cast(add_months(current_timestamp(), -1) as date),'month')
            and  cast(current_timestamp() as date)<=bda.`full conversion date` )
            )
	and not (level_name = '21_US_A team' and account_code not in ('Pro_cost','APT_cost'))
)
, distribution_channel_step3 as (
select * from distribution_channel_step1
union 
select * from distribution_channel_step2)

, distribution_channel as (
SELECT 
c.distribution_channel AS channel,
cast(periods.date as date) AS date,
NULL AS source,
NULL AS source_campaign,
NULL AS source_campaignid,
'CAC related expenses' AS source_adgroup,
NULL AS source_adgroupid,
NULL AS source_adname,
NULL AS source_adid,
NULL AS source_keyword,
NULL AS source_keywordid,
NULL AS source_match_type,
NULL AS campaign_cob,
NULL AS source_serial,
NULL AS ad_labels,
NULL AS state,
NULL AS device,
NULL AS impressions,
NULL AS clicks,
case 
when account_code = '6202' then (-1.00*amount / EXTRACT( DAY FROM cast(monthlastday as date))) 
when c.distribution_channel = 'Lifecycle' and nvl(amount,0)=0 then 300000/EXTRACT(DAY FROM  cast(monthlastday as date))
else amount/EXTRACT(DAY FROM  cast(monthlastday as date))
end AS spend,
NULL AS premium,
NULL AS leads,
NULL AS quotes,
NULL AS payment_views,
NULL AS purchases,
NULL AS starts,
'0' AS is_test,
null AS revised_name,
case when version = 'Actuals' then '1' else '0' end is_actual,
null AS last_source_campaign,
NULL AS source,
c.channel_type as cac_distribution_channel_type
    from distribution_channel_step3 a
    left join spark_stg.db_data_science__ref_cost_categorization c on a.level_name = c.level_name and a.account_name = c.account_name
    join spark_stg.bi_workspace__periods periods  on  cast(date_format(to_timestamp(a.date, 'MM_yyyy'), 'yyyy-MM-dd') as date) = monthfirstday
    where c.channel_type != 'Performance Marketing'
)

, union_all_steps as (
select * from distribution_channel
union 
select * from cte_agg_ad_networks
union 
select * from tv_channel

)
, daily_agg_ad_networks_step1 as (
SELECT distinct
    aan.channel,
    aan.date,
    aan.new_source AS source,
    COALESCE(scm.campaign_name_for_sources,aan.last_source_campaign) AS source_campaign,
    aan.source_campaignid,
    aan.source_adgroup,
    aan.source_adgroupid,
    aan.source_adname,
    aan.source_adid,
    aan.source_keyword,
    aan.source_keywordid,
    aan.source_match_type,
    aan.campaign_cob,
    aan.source_serial,
    aan.impressions,
    aan.clicks,
    aan.spend,
    aan.ad_labels,
    aan.state,
    aan.premium,
    aan.leads,
    aan.quotes,
    aan.payment_views,
    aan.purchases,
    aan.is_test,
    CASE WHEN aan.channel ='tv' THEN NULL ELSE COALESCE(cmp.account_manager, src.account_manager, cnl.account_manager) END AS account_manager ,
    CASE
     WHEN aan.channel = 'organic' AND aan.source_serial = '90001' THEN 'Search Brand'
     WHEN aan.channel = 'organic' AND aan.source_serial != '90001' THEN 'Search Non-Brand'
     WHEN (aan.channel = 'sem' AND aan.new_source IN('adwords_brand','bingads_brand')) THEN 'Search Brand'
     WHEN aan.new_source LIKE '%_agents' THEN 'Direct to Agent'
     WHEN aan.channel = 'direct' THEN 'Digital Ads'
     WHEN (aan.channel = 'sem' AND aan.new_source IN('adwords_retargeting','bingads_retargeting','adwords_brand_re_targeting')) THEN 'Search Retargeting'
     WHEN aan.channel = 'sem' AND Lower(NVL(NVL(revised_name,source_campaign),last_source_campaign)) like 'brand' THEN 'Search Brand'
     WHEN (aan.channel = 'display' AND REPLACE(aan.new_source,'-','_') in('gdn_re_targeting','criteo_retargeting','dv360_rtgn')) THEN 'Digital Ads - Retargeting'
     WHEN (aan.channel = 'display' AND REPLACE(aan.new_source,'-','_') in('adroll')) AND source_campaignid = '33333333' THEN 'Digital Ads - Retargeting'
     WHEN (aan.channel = 'display' AND REPLACE(aan.new_source,'-','_') in('adroll')) AND source_campaignid = '33333331' THEN 'Digital Ads'
                     WHEN (aan.channel = 'display' AND aan.new_source = 'criteo') THEN 'Digital Ads'
                     WHEN (aan.channel = 'social' AND (aan.new_source IN('facebook_rmkt','linkedin-rmkt') OR aan.last_source_campaign LIKE '%rmkt%' OR aan.source_campaign LIKE '%rmkt%')) THEN 'Digital Ads - Retargeting'
                     WHEN (aan.channel = 'video' AND aan.new_source = 'youtube_re_targeting') THEN 'Digital Ads - Retargeting'
                     WHEN (aan.channel = 'video' AND aan.new_source = 'youtube' AND NVL(NVL(revised_name,source_campaign),last_source_campaign) IN ('contractors_yt_remarketing_mobile','contractors_yt_remarketing','general_contractors_yt_remarketing')) THEN 'Digital Ads - Retargeting'
                     WHEN aan.new_source LIKE '%awareness%' THEN 'Awareness'
                     WHEN aan.channel IN ('influencers', 'social', 'native', 'display', 'video', 'tv') THEN 'Digital Ads'
                     WHEN aan.channel IN ('email', 'dm') THEN 'Direct Comm'
                     WHEN aan.channel IN ('organic','sem') THEN 'Search Non-Brand'
                     WHEN aan.channel like 'ap-%' THEN 'partnerships'
                     WHEN aan.channel IN ('partnerships','agents','support') THEN aan.channel
                     WHEN aan.source_adgroup = 'CAC related expenses' THEN cac_distribution_channel_type
                     WHEN aan.channel like '%support%' THEN 'support'
                     ELSE 'Digital Ads'
                  END AS distribution_channel_type,
                  NVL(NVL (l.cob_id,CASE WHEN aan.campaign_cob = '' THEN 0 ELSE cast(aan.campaign_cob as INTEGER) END),0) AS cob_id,
                  COALESCE(cobs.marketing_cob_group,l.cob_group) AS marketing_cob_group,
                  cobs.cob_name AS cob_name,
                  COALESCE(cobs.cob_industry,l.cob_industry) AS cob_industry,
                  aan.revised_name,
                  aan.starts,
                  aan.is_actual,
    coalesce(scm.sem_account,aan.new_source) as source_func,
    NVL(NVL(aan.last_source_campaign,scm.campaign_name_for_sources),aan.revised_name) as campaign_func,
    COALESCE(cmp.account_manager, src.account_manager, cnl.account_manager) as account_manager_func,
    COALESCE(cmp.streams, src.streams, cnl.streams) as streams_func,
    device 
FROM cte_agg_ad_networks aan
  LEFT JOIN spark_stg.next_insurance_prod__lms_links l ON aan.source_serial = l.source_serial
  LEFT JOIN spark_stg.dwh__sources_test_cobs cobs ON nvl (nvl (l.cob_id,CASE WHEN aan.campaign_cob = '' THEN 0 ELSE cast(aan.campaign_cob as INTEGER) END),0) = cobs.cob_id
  LEFT JOIN spark_stg.optimization_stg__acquisition_account_managers cmp ON NULLIF(lower(NVL(revised_name,source_campaign)),'') = NULLIF(cmp.campaign_name,'') and lower(aan.new_source) = cmp.source
  LEFT JOIN spark_stg.optimization_stg__acquisition_account_managers src ON lower(aan.new_source) = src.source AND lower(aan.channel) = src.distribution_channel AND NVL(src.campaign_name,'') = ''
  LEFT JOIN spark_stg.optimization_stg__acquisition_account_managers cnl ON lower(aan.channel) = cnl.distribution_channel AND NVL(cnl.source,'') = ''
  LEFT JOIN spark_stg.bi_workspace__sem_campaign_mapping scm ON aan.source_campaignid = scm.campaign_id
WHERE DATE < current_timestamp()
)

select 
 distinct
      aan.channel,
      aan.date,
      aan.source,
      aan.source_campaign,
      aan.source_campaignid,
      aan.source_adgroup,
      aan.source_adgroupid,
      aan.source_adname,
      aan.source_adid,
      aan.source_keyword,
      aan.source_keywordid,
      aan.source_match_type,
      aan.campaign_cob,
      aan.source_serial,
      aan.impressions,
      aan.clicks,
      aan.spend,
      aan.ad_labels,
      aan.state,
      aan.premium,
      aan.leads,
      aan.quotes,
      aan.payment_views,
      aan.purchases,
      aan.is_test,
      account_manager,
      distribution_channel_type,
      cob_id,
      marketing_cob_group,
      cob_name,
      cob_industry,
      revised_name,
      attributionjsonsources(
            cast(aan.date as varchar(20)),
            aan.channel,
            source_func,
            campaign_func,
            aan.source_campaignid,
            LOWER(NVL(aan.source_adgroup,aan.source_adgroupid)),
            aan.source_adgroupid,
            NULL,
            LOWER(NVL(aan.source_keyword,aan.source_keywordid)),
            NULL,
            NULL,
            CASE WHEN aan.channel IN ('partnerships','sem','social','display','video','agents','native','tv') or aan.channel like 'ap-%' THEN 'paid' ELSE 'nonpaid' END,
            NULL,
            NULL,
            NULL,
            aan.source_adid,
            aan.source_match_type,
            distribution_channel_type,
            cast(aan.is_test as VARCHAR(10)),
            CASE WHEN aan.channel ='tv' THEN NULL ELSE account_manager_func END,
            aan.ad_labels ,
            NULL,
            NUll,
            CASE WHEN aan.channel ='tv' THEN NULL ELSE  streams_func END,
            NULL 
            )  AS source_json,
    aan.starts,
    aan.is_actual      ,
    attributionjsonsession(NULL,NULL,NULL,NULL,NULL,device,NULL,NULL,NULL,NULL,NULL,NULL,NULL) AS session_json
    from daily_agg_ad_networks_step1 aan"""
from sql_metadata import Parser

used_tables_from_query = Parser(a).tables
print(used_tables_from_query)
# 'cast', 'monthlastday', 'c.distribution_channel', 'amount', 'DAY', 'NULL', 'null', 'case', 'c.channel_type',

miky-schreiber avatar Dec 13 '21 21:12 miky-schreiber