Note: The %INDHD_RUN_MODEL macro
does not generate a SampleSQL.txt file.
To
view the output data in a SAS session, you can use PROC PRINT as long
as you have a LIBNAME statement to access the Hadoop output file.
Here is an example that prints the first ten rows of the output table.
/* Hadoop configuration file */
%let INDCONN=%str(HDFS_SERVER=hd.mycompany.com
HDFS_PORT=8120
MAPRED_SERVER=hd.mycompany.com
MAPRED_PORT=8021
USER=myuserid
PASSWORD=mypwd);
/* libname pointing to Hadoop */
libname gridlib hadoop user=myuserid
pw=mypwd
server="hd.mycompany.com"
HDFS_TEMPDIR="/user/hdmd/temp"
HDFS_DATADIR="/user/hdmd/data"
HDFS_METADIR="/user/hdmd/meta";
/* Delete HDMD file */
proc delete data=gridlib.peopleseq; run;
/* Create HDMD file */
proc hdmd NAME=GRIDLIB.PEOPLESEQ
FILE_FORMAT=DELIMITED
SEP=tab
FILE_TYPE=custom_sequence
INPUT_CLASS='com.sas.hadoop.ep.inputformat.sequence.
PeopleCustomSequenceInputFormat'
DATA_FILE='people.seq';
COLUMN name varchar(20);
COLUMN sex varchar(1);
COLUMN age int;
column height double;
column weight double;
run;
/*================================================================
/* Start MR Job using the run model for Hadoop macro
*================================================================*/
%indhd_run_model(infiletype=custom_sequence
, inmetaname=/user/hdmd/meta/peopleseq.sashdmd
, outdatadir=/user/hdmd/output/peopletxt.out
, outmetadir=/user/hdmd/meta/peopletxt.sashdmd
, scorepgm=/user/hdmd/ds2/inout.ds2
, forceoverwrite=true
, trace=no);
/* Print output file */
proc print data=gridlib.peopletxt(obs=10); run;
The columns in the output
file are available to use in any SQL query expression.
select * from gridlib.peopletxt;
select em_classification from gridlib.peopletxt;