libname
lasrsvr clear;).
libname source "/data/marketing/2012";
libname target greenplm
server = "grid001.example.com"
user = dbuser
password = dbpass
schema = public
database = template1
dbcommit=1000000;
proc hpds2 data = source.mktdata
out = target.mktdata (distributed_by = 'distributed randomly'); 1
performance host = "grid001.example.com"
install = "/opt/TKGrid";;
data DS2GTF.out;
method run();
set DS2GTF.in;
end;
enddata;
run;
proc hpds2 data = source.mkdata2
out = target.mkdata2 (dbtype=(id='int')
distributed_by='distributed by (id)'); 2
performance host = "grid001.example.com"
install = "/opt/TKGrid";
data DS2GTF.out;
method run();
set DS2GTF.in;
end;
enddata;
run;| 1 | The rows of data from the input data set are distributed randomly to Greenplum. |
| 2 | The ID column in the input data set is identified as being an integer data type. The rows of data are distributed based on the value of the ID column. |
libname tdlib teradata
server="dbc.example.com"
database=hps
user=dbuser
password=dbpass
bulkload=yes; 1
data tdlib.order_fact;
set work.order_fact;
run;
data tdlib.product_dim (dbtype=(partno='int') 2
dbcreate_table_opts='primary index(partno)'); 3
set work.product_dim;
run;
data tdlib.salecode(dbtype=(_day='int' fpop='varchar(2)')
bulkload=yes
dbcreate_table_opts='primary index(_day,fpop)'); 4
set work.salecode;
run;
data tdlib.automation(bulkload=yes
dbcommit=1000000 5
dbcreate_table_opts='unique primary index(obsnum)'); 6
set automation;
obsnum = _n_;
run;| 1 | Specify the BULKLOAD=YES option. This option is shown as a LIBNAME option but you can specify it as a data set option. |
| 2 | Specify a data type of int for the variable named partno. |
| 3 | Specify to use the variable named partno as the distribution key for the table. |
| 4 | Specify to use the variables that are named _day and fpop as a distribution key for the table that is named salecode. |
| 5 | Specify the DBCOMMIT= option when you are loading many rows. This option interacts with the BULKLOAD= option to perform checkpointing. Checkpointing provides known synchronization points if a failure occurs during the loading process. |
| 6 | Specify
the UNIQUE keyword in the table
options to indicate that the primary key is unique. This keyword can
improve table loading performance.
|
libname gplib greenplm server="grid001.example.com"
database=hps
schema=public
user=dbuser
password=dbpass;
data gplib.automation(distributed_by='distributed randomly'); 1
set work.automation;
run;
data gplib.results(dbtype=(rep='int') 2
distributed_by='distributed by (rep)') 3;
set work.results;
run;
data gplib.salecode(dbtype=(day='int' fpop='varchar(2)') 4
distributed_by='distributed by day,fpop'); 5
set work.salecode;
run;| 1 | Specify a random distribution of the data. This data set option is for the SAS/ACCESS Interface to Greenplum. |
| 2 | Specify a data type of int for the variable named rep. |
| 3 | Specify to use the variable named rep as the distribution key for the table that is named results. |
| 4 | Specify a data type of int for the variable named day and a data type of varchar(2) for the variable named fpop. |
| 5 | Specify to use the combination of variables day and fpop as the distribution key for the table that is named salecode. |
libname tdlib teradata server="dbc.example.com"
database=hps
user=dbuser
password=dbpass;
data tdlib.parts_dim;
set work.parts_dim;
run;