/* --------------------------------------------------------------------------- $Revision: 1.1.2.3 $ $Date: 2014/03/26 11:03:38 $ NAME: cxt_key_single_dim.sas TYPE: MACRO DESCRIPTION: Key incoming web analytics data for a single dimension. Existing keys will be attached to the output tables. New keys will be created where necessary. Ouptuts will be staged in a manner that allows them to be consumed in later jobs. SAS VERSION: 9.3 INPUT: Specify data sets, files, reports, graphs, html, e-mail, pages, macro variables, slists, return codes, etc. +----------------------------------------------------------------- | Input Item Name: DIM_TABLE_ROOT_NAME | Description: Name of a dimension. Valid values are REFERRER, PAGE, REFERRER_QUERY_STRING, and Visitor. | Type: macro var | Mode: read only +----------------------------------------------------------------- | Input Item Name: UNKEYED_TBL | Description: A dataset containing unkeyed data. There will be one record per DETAIL_SK. | Dataset contains all fields necessary to feed MD5 tables, key the dimension, | and supplement the dimension with additional observations. | Type: macro var / SAS dataset | Mode: read only +----------------------------------------------------------------- | Input Item Name: METHOD | Description: How data is to be keyed. There are multiple options: RANDOM (random lookup), | SEQUENTIAL (sequential pass), FORMAT (using SAS formats), HASH (hash tables), | and DYN_HASH (dynamic hash table process.) | Type: macro var | Mode: read only +----------------------------------------------------------------- | Input Item Name: WEBMART_TBL | Description: A table telling the process where the webmart lives. | Type: macro var / SAS dataset | Mode: read only +----------------------------------------------------------------- | Input Item Name: CURR_MAX_VALS | Description: The location of the traditional WACNFG.CURR_MAX_VALS, which is a dataset | informing the keying process of the last known maximum surrogate key values. | Type: macro var / SAS dataset | Mode: read only +----------------------------------------------------------------- | Input Item Name: OLD_MD5_TBL | Description: A dataset containing existing MD5 and surrogate key values for &DIM_TABLE_ROOT_NAME | whcih has been used in the past. | Type: macro var/ data set | Mode: Read +----------------------------------------------------------------- OUTPUT: Specify data sets, files, reports, graphs, html, e-mail, pages, macro variables, slists, return codes, etc. +----------------------------------------------------------------- | Output Item Name: NEW_MD5_TBL | Description: A dataset containing existing MD5 and surrogate key values for &DIM_TABLE_ROOT_NAME. These records are new keys only. | Type: macro var / data set | Mode: Write +----------------------------------------------------------------- | Output Item Name: KEYED_OUTPUT | Description: A dataset that contains a list of detail_sk values and | surrogate key values, the latter to be added to the fact table. | Type: macro var / data set | Mode: Write +----------------------------------------------------------------- | Output Item Name: CURR_MAX_OUT | Description: A dataset containing the highest assigned surrogate key value. | Type: macro var / data set | Mode: Write +----------------------------------------------------------------- | Output Item Name: NEW_DIM_TBL | Description: A dataset to be appended to &DIM_TABLE_ROOT_NAME_DIM, which contains | new dimension table records. | Type: macro var / data set | Mode: Write +----------------------------------------------------------------- | Output Item Name: STAGE_STAT | Description: A dataset containing staging metadata (start/completion status/time.) Optionally created. | Type: macro var / data set | Mode: Write +------------------------------------------------------------------ CALLS: %cxt_metadata_library_access %cxt_dim_get_sk_max_values %cxt_sequential_key %cxt_random_key %cxt_format_key %cxt_hash_key %cxt_dyn_hash_key %cxt_external_msg EXAMPLES (optional): %cxt_key_single_dim (....); HISTORY: userid date desc sascos 02/13/2012 initial coding sascos 10/15/2012 Added optional staging status output ------------------------------------------------------------------------------ Copyright (c) 2005-2012, SAS Institute Inc., Cary, NC, USA, All Rights Reserved ------------------------------------------------------------------------------ */ %macro cxt_key_single_dim (dim_table_root_name=referrer, unkeyed_tbl=wawork.referrer_md5, old_md5_tbl=wacnfg.referrer_md5, new_md5_tbl=work.referrer_md5, keyed_output=wawork.ref_keyed, new_dim_tbl=wawork.referrer_dim, method=RANDOM, webmart_tbl=wacnfg.webmart, curr_max_vals=wacnfg.curr_max_sk_values, curr_max_out=work.referrer_max_val, stage_stat=)/* /store*/; %global retcode; /* The surrogate key and md5 variables can be determined by &dim_table_root_name */ %let sk_variable=%trim(%left(&dim_table_root_name))_sk; %let md5_variable=%trim(%left(&dim_table_root_name))_md5; %let retcode=0; /* Initialize the process and ensure all libraries are assigned. */ %cxt_init_global_macs; %cxt_external_msg; %cxt_dim_get_sk_max_value (curr_max_sk_dataset=&curr_max_vals, sk_field=&sk_variable); /********************************************************************/ /* Figure out the fact table's key, which is used in remerges later */ /********************************************************************/ %global fact_key; %if %upcase(&dim_table_root_name)=GEO_IP or %upcase(&dim_table_root_name)=USER_AGENT or %upcase(&dim_table_root_name)=PROFILE %then %let fact_key=session_sk; %else %if %upcase(&dim_table_root_name)=ORIGINATION or %upcase(&dim_table_root_name)=SEARCH_TERM or %upcase(&dim_table_root_name)=CI_RESPONSE or %upcase(&dim_table_root_name)=REFERRER or %upcase(&dim_table_root_name)=REFERRER_QUERY_STRING or %upcase(&dim_table_root_name)=SEARCH_ENGINE %then %let fact_key=session_attribution_sk; %else %if %upcase(&dim_table_root_name)=GOAL or %upcase(&dim_table_root_name)=PROCESS_EXCEPTION or %upcase(&dim_table_root_name)=FORM_FIELD or %upcase(&dim_table_root_name)=INTERNAL_PROMOTION or %upcase(&dim_table_root_name)=IN_PAGE_ERROR or %upcase(&dim_table_root_name)=DOCUMENT or %upcase(&dim_table_root_name)=PRODUCT %then %let fact_key=temporary_sk; %else %if %upcase(&dim_table_root_name)=PROCESS_STEP %then %let fact_key=bus_process_step_details_sk; %else %let fact_key=detail_sk; /*********************************************************************/ /* Figure out special variables to keep on the detail dataset, and */ /* possibly drop from the dimension data set. */ /*********************************************************************/ %global keeplst drplst; %if %upcase(&dim_table_root_name)=GOAL %then %do; %let drplst=detail_sk session_attribution_sk session_sk goal_reached_dttm goal_revenue_amt load_dttm; %let keeplst=&sk_variable load_dttm detail_sk session_attribution_sk session_sk goal_reached_dttm goal_revenue_amt; %end; %else %if %upcase(&dim_table_root_name)=PROFILE %then %do; %let drplst=load_dttm; %let keeplst=&sk_variable &fact_key load_dttm; %end; %else %if %upcase(&dim_table_root_name)=PROCESS_EXCEPTION %then %do; %let drplst=bus_process_step_details_sk; %let keeplst=&sk_variable &fact_key bus_process_step_details_sk; %end; %else %if %upcase(&dim_table_root_name)=FORM_FIELD %then %do; %let drplst=detail_sk session_sk session_attribution_sk next_detail_sk search_results_sk attempt_index_cnt attempt_status_cd; %let drplst=&drplst change_index_no submit_flg form_field_value form_field_detail_dttm load_dttm; %let keeplst=detail_sk form_field_sk session_sk session_attribution_sk next_detail_sk search_results_sk attempt_index_cnt; %let keeplst=&keeplst attempt_status_cd change_index_no submit_flg form_field_value form_field_detail_dttm load_dttm; %if %upcase(&method)=SEQUENTIAL %then %let keeplst=&keeplst temporary_sk;; %end; %else %if %upcase(&dim_table_root_name)=CI_RESPONSE %then %do; %let drplst=temporary_sk; %let keeplst=&sk_variable &fact_key temporary_sk entry_source_id; %end; %else %if %upcase(&dim_table_root_name)=IN_PAGE_ERROR %then %do; %let drplst=temporary_sk detail_sk error_location_txt in_page_error_dttm session_sk session_attribution_sk load_dttm; %let keeplst=&sk_variable detail_sk error_location_txt in_page_error_dttm session_sk session_attribution_sk load_dttm; %end; %else %if %upcase(&dim_table_root_name)=DOCUMENT %then %do; %let drplst=temporary_sk detail_sk session_sk session_attribution_sk document_location_sk link_event_dttm load_dttm; %let keeplst=&sk_variable detail_sk session_sk session_attribution_sk document_location_sk link_event_dttm load_dttm; %end; %else %if %upcase(&dim_table_root_name)=PRODUCT %then %do; %let drplst=order_detail_sk cart_temporary_sk detlprod_temporary_sk; %let keeplst=&sk_variable temporary_sk order_detail_sk cart_temporary_sk detlprod_temporary_sk; %end; %else %do; %let drplst=; %let keeplst=&sk_variable &fact_key; %end; /***************************************************/ /* Valid values for &method are: */ /* RANDOM, SEQUENTIAL, HASH, FORMAT, and DYN_HASH. */ /* All behave the same EXCEPT sequential */ /***************************************************/ %if %upcase(&method)=SEQUENTIAL %then %do;/* Sequential */ %cxt_sequential_key (old_key_file = &old_md5_tbl, unkeyed_data = &unkeyed_tbl, new_md5_data = &new_md5_tbl, sk_field = &sk_variable, md5_field = &md5_variable, keep_cols_mvar= _all_, out_detail_keys = &keyed_output, out_append_keys= &new_dim_tbl, fact_key= &fact_key ); %end; /* Sequential */ %else %do; /* all the other approaches */ %let pgmname =cxt_%trim(%left(&method))_key; %&pgmname; %end; /* all the other approaches */ /************************************************/ /* Set the new MAX_SK value */ /************************************************/ %if %sysfunc(exist(&new_md5_tbl)) %then %do; /* New surrogate keys possibly assigned */ /* Retreive the highest _sk variable assigned */ proc sql noprint; select max(&sk_variable) format 30. into :max_sk from &new_md5_tbl; quit; %end; /* New surrogate keys possibly assigned */ %else %let max_sk=0; /* No new surrogates assigned if the table doesn't exist */ /* It is possible that no new surrogate keys are assigned. */ /* In that case, we want to ensure we retain the original */ /* _sk value. Otherwise, take the highest new one. */ data &curr_max_out; attrib max_sk format=8.; max_sk=max(&&&sk_variable._value, &max_sk); output; run; /**************************************************************/ /* Determine if there was an error, and deal if so. */ /**************************************************************/ %if &SYSERR gt 0 %then %let retcode=1; /* It is possible that calling program wants a dump of staging status */ /* If so, provide it. */ %if &stage_stat ne %str() %then %do; /* create status */ /* Did it work? */ proc sql noprint; select count(1) into :oldtblcnt from &unkeyed_tbl; select count(1) into :newtblcnt from &keyed_output; quit; /* Record coutns should match */ %if &oldtblcnt ne &newtblcnt %then %let retcode=1; /* Staging history update */ data &stage_stat; start="&etls_startTime"dt; end=.; stage_errors=&retcode; run; %end; /* create status */ %mend;