Example for Working with Time Series Data
/*--------------------------------------------------------------
SAS Sample Library
Name: tsdchap.sas
Description: Example program from SAS/ETS User's Guide,
Working with Time Series Data
Title: Example for Working with Time Series Data
Product: SAS/ETS Software
Keys: time series data
PROC:
Notes:
--------------------------------------------------------------*/
data uscpi;
input year month cpi;
datalines;
1990 6 129.9
1990 7 130.4
1990 8 131.6
1990 9 132.7
1990 10 133.5
1990 11 133.8
1990 12 133.8
1991 1 134.6
1991 2 134.8
1991 3 135.0
1991 4 135.2
1991 5 135.6
1991 6 136.0
1991 7 136.2
;
proc print data=uscpi;
run;
data _null_;
date = '17oct1991'd;
put date=;
run;
data uscpi;
input date : monyy7. cpi;
format date monyy7.;
label cpi = "US Consumer Price Index";
datalines;
jun1990 129.9
jul1990 130.4
aug1990 131.6
sep1990 132.7
oct1990 133.5
nov1990 133.8
dec1990 133.8
jan1991 134.6
feb1991 134.8
mar1991 135.0
apr1991 135.2
may1991 135.6
jun1991 136.0
jul1991 136.2
;
data fmttest;
set uscpi;
date0 = date;
date1 = date;
label date = "DATE with MONYY7. format"
date1 = "DATE with DATE9. format"
date0 = "DATE with no format";
format date monyy7. date1 date9.;
run;
proc print data=fmttest label;
run;
data full;
format date date9.;
do state = 'NC', 'VA';
do i = 1 to 200;
date = intnx( 'DAY', '1Oct1993'd, i );
x = rannor(123);
y = rannor(123);
close = x;
output;
end;
end;
run;
proc arima data=full;
where '31dec1993'd < date < '26mar1994'd;
identify var=close;
run;
data subset;
set full;
where date >= '1jan1970'd;
if state = 'NC';
run;
data subset;
set full;
if date >= '1jan1970'd & state = 'NC';
run;
data subset;
set full;
if date >= '1jan1970'd & state = 'NC';
keep date x y;
run;
proc print data=full(firstobs=20 obs=25);
run;
data usprice;
input date : monyy7. cpi ppi;
format date monyy7.;
label cpi = "Consumer Price Index"
ppi = "Producer Price Index";
datalines;
jun1990 129.9 114.3
jul1990 130.4 114.5
aug1990 131.6 116.5
sep1990 132.7 118.4
oct1990 133.5 120.8
nov1990 133.8 120.1
dec1990 133.8 118.7
jan1991 134.6 119.0
feb1991 134.8 117.2
mar1991 135.0 116.2
apr1991 135.2 116.0
may1991 135.6 116.5
jun1991 136.0 116.3
jul1991 136.2 116.0
;
proc print data=usprice;
run;
data usprice;
input date : monyy7. cpi ppi;
format date monyy7.;
datalines;
jun1990 . 114.3
jul1990 . 114.5
aug1990 131.6 116.5
sep1990 132.7 118.4
oct1990 133.5 120.8
nov1990 133.8 120.1
dec1990 133.8 118.7
jan1991 134.6 119.0
feb1991 134.8 117.2
mar1991 135.0 116.2
apr1991 135.2 116.0
may1991 135.6 116.5
jun1991 136.0 116.3
jul1991 136.2 .
;
data usprice;
input date : monyy. cpi ppi;
format date monyy.;
datalines;
jun1990 . 114.3
jul1990 . 114.5
aug1990 131.6 116.5
sep1990 132.7 118.4
oct1990 133.5 120.8
nov1990 . .
dec1990 133.8 118.7
jan1991 134.6 119.0
feb1991 134.8 117.2
mar1991 135.0 .
apr1991 135.2 116.0
may1991 135.6 116.5
jun1991 136.0 116.3
jul1991 136.2 .
;
data usprice;
input date : monyy7. cpi ppi;
format date monyy7.;
label cpi = "Consumer Price Index"
ppi = "Producer Price Index";
datalines;
jun1990 129.9 114.3
jul1990 130.4 114.5
aug1990 131.6 116.5
sep1990 132.7 118.4
oct1990 133.5 120.8
nov1990 133.8 120.1
dec1990 133.8 118.7
jan1991 134.6 119.0
feb1991 134.8 117.2
mar1991 135.0 116.2
apr1991 135.2 116.0
may1991 135.6 116.5
jun1991 136.0 116.3
jul1991 136.2 116.0
;
data citycpi;
input date : monyy7. cpiny cpichi cpila;
format date monyy7.;
datalines;
nov1989 133.200 126.700 130.000
dec1989 133.300 126.500 130.600
jan1990 135.100 128.100 132.100
feb1990 135.300 129.200 133.600
mar1990 136.600 129.500 134.500
apr1990 137.300 130.400 134.200
may1990 137.200 130.400 134.600
jun1990 137.100 131.700 135.000
jul1990 138.400 132.000 135.600
;
data cpicity;
length city $11;
input city $11. date : monyy. cpi;
format date monyy.;
datalines;
New York JAN1990 135.100
New York FEB1990 135.300
New York MAR1990 136.600
New York APR1990 137.300
New York MAY1990 137.200
New York JUN1990 137.100
New York JUL1990 138.400
Chicago JAN1990 128.100
Chicago FEB1990 129.200
Chicago MAR1990 129.500
Chicago APR1990 130.400
Chicago MAY1990 130.400
Chicago JUN1990 131.700
Chicago JUL1990 132.000
Los Angeles JAN1990 132.100
Los Angeles FEB1990 133.600
Los Angeles MAR1990 134.500
Los Angeles APR1990 134.200
Los Angeles MAY1990 134.600
Los Angeles JUN1990 135.000
Los Angeles JUL1990 135.600
;
proc sort data=cpicity;
by city date;
run;
proc forecast data=uscpi interval=month lead=12
out=foreout outfull outresid;
var cpi;
id date;
run;
proc print data=foreout(obs=6);
run;
proc spectra data=foreout out=spectout;
var cpi;
where _type_='RESIDUAL';
run;
title "FORECAST Output Data Set with BY Groups";
proc forecast data=cpicity interval=month
method=expo lead=2
out=foreout outfull outresid;
var cpi;
id date;
by city;
run;
proc print data=foreout(obs=6);
run;
title "PROC ARIMA Output Data Set";
proc arima data=uscpi;
identify var=cpi(1);
estimate q=1;
forecast id=date interval=month
lead=12 out=arimaout;
run;
proc print data=arimaout(obs=6);
run;
title "Plot of USCPI Data";
proc sgplot data=uscpi;
series x=date y=cpi / markers;
run;
proc sgplot data=uscpi;
series x=date y=cpi / markers;
format date yyqc.;
xaxis values=('1jan90'd to '1jul91'd by qtr);
run;
title "ARIMA Forecasts of CPI";
proc arima data=uscpi;
identify var=cpi(1);
estimate q=1;
forecast id=date interval=month lead=12 out=arimaout;
run;
title "ARIMA forecasts of CPI";
proc sgplot data=arimaout noautolegend;
scatter x=date y=cpi;
scatter x=date y=forecast / markerattrs=(symbol=asterisk);
scatter x=date y=l95 / markerattrs=(symbol=asterisk color=green);
scatter x=date y=u95 / markerattrs=(symbol=asterisk color=green);
format date yyqc4.;
xaxis values=('1jan90'd to '1jul92'd by qtr);
refline '15jul91'd / axis=x;
run;
title "Plot of Forecasts of USCPI Data";
proc forecast data=uscpi interval=month lead=12
out=foreout outfull outresid;
var cpi;
id date;
run;
proc sgplot data=foreout;
where _type_ ^= 'RESIDUAL';
scatter x=date y=cpi / group=_type_ markerattrs=(symbol=asterisk);
format date yyqc4.;
xaxis values=('1jan90'd to '1jul92'd by qtr);
refline '15jul91'd / axis=x;
run;
title "Plot of Residuals for USCPI Data";
proc sgplot data=foreout;
where _type_ = 'RESIDUAL';
needle x=date y=cpi / markers;
format date yyqc4.;
xaxis values=('1jan90'd to '1jul91'd by qtr);
run;
title "Plot of USCPI Data";
proc gplot data=uscpi;
symbol i=spline v=circle h=2;
plot cpi * date;
run;
data usecon;
input year qtr gnp;
date = yyq( year, qtr );
format date yyqc.;
datalines;
1990 1 5375.4
1990 2 5443.3
1990 3 5514.6
1990 4 5527.3
1991 1 5557.7
1991 2 5615.8
;
data uscpi;
input month year cpi;
date = mdy( month, 1, year );
format date monyy.;
datalines;
6 90 129.9
7 90 130.4
8 90 131.6
9 90 132.7
10 90 133.5
11 90 133.8
;
data uscpi;
input date monyy7. cpi;
format date monyy7.;
year = year( date );
month = month( date );
datalines;
jun1990 129.9
jul1990 130.4
aug1990 131.6
sep1990 132.7
;
data weather;
input year month day hour temp;
datetime = dhms( mdy( month, day, year ), hour, 0, 0 );
format datetime datetime10.;
datalines;
91 10 16 21 61
91 10 17 0 56
91 10 17 3 53
91 10 17 6 54
91 10 17 9 65
91 10 17 12 72
;
data weather;
input datetime : datetime13. temp;
format datetime datetime10.;
hour = hour( datetime );
date = datepart( datetime );
year = year( date );
month = month( date );
day = day( date );
datalines;
16oct91:21:00 61
17oct91:00:00 56
17oct91:03:00 53
17oct91:06:00 54
17oct91:09:00 65
17oct91:12:00 72
;
data uscpi;
input cpi;
date = intnx( 'month', '1jun1990'd, _n_-1 );
format date monyy7.;
datalines;
129.9
130.4
131.6
132.7
;
data uscpi;
input date : date9. cpi;
format date monyy7.;
datalines;
15jun1990 129.9
15jul1990 130.4
15aug1990 131.6
15sep1990 132.7
;
data uscpi;
input date : date9. cpi;
format date monyy7.;
monthbeg = intnx( 'month', date, 0, 'beg' );
midmonth = intnx( 'month', monthbeg, 0, 'mid' );
monthend = intnx( 'month', date, 0, 'end' );
datalines;
15jun1990 129.9
15jul1990 130.4
15aug1990 131.6
15sep1990 132.7
;
data test;
set uscpi;
mon07_1 = mdy( month(date), 7, year(date) );
mon07_2 = intnx( 'month', date, 0, 'beg' ) + 6;
mon07_3 = intnx( 'day', date, 6 );
run;
data uscpi;
input date : date9. cpi;
format date monyy7.;
width = intnx( 'month', date, 1 ) - intnx( 'month', date, 0 );
datalines;
15jun1990 129.9
15jul1990 130.4
15aug1990 131.6
15sep1990 132.7
;
data test;
set uscpi;
newyear = intnx( 'year', date - 1, 1 );
format newyear date.;
run;
data uscpi;
set uscpi;
d0 = intnx( 'month', date, 0 ) - 1;
d1 = intnx( 'month', date, 1 ) - 1;
nSunday = intck( 'week.1', d0, d1 );
nMonday = intck( 'week.2', d0, d1 );
nTuesday = intck( 'week.3', d0, d1 );
nWedday = intck( 'week.4', d0, d1 );
nThurday = intck( 'week.5', d0, d1 );
nFriday = intck( 'week.6', d0, d1 );
nSatday = intck( 'week.7', d0, d1 );
drop d0 d1;
run;
data _null_;
set uscpi;
retain prevdate;
if _n_ > 1 then
if intck( 'month', prevdate, date ) ^= 1 then
put "Bad date sequence at observation number " _n_;
prevdate = date;
run;
data uscpi;
input date : monyy7. cpi;
format date monyy7.;
label cpi = "US Consumer Price Index";
datalines;
jun1990 129.9
jul1990 130.4
aug1990 131.6
sep1990 132.7
oct1990 133.5
nov1990 133.8
dec1990 133.8
jan1991 134.6
feb1991 134.8
mar1991 135.0
apr1991 135.2
may1991 135.6
jun1991 136.0
jul1991 136.2
;
data uscpi;
set uscpi;
cpilag = lag( cpi );
cpidif = dif( cpi );
run;
proc print data=uscpi;
run;
data uscpi;
input date : monyy7. cpi;
format date monyy7.;
label cpi = "US Consumer Price Index";
datalines;
jun1990 129.9
jul1990 130.4
aug1990 131.6
sep1990 132.7
oct1990 133.5
nov1990 133.8
dec1990 133.8
jan1991 134.6
feb1991 134.8
mar1991 135.0
apr1991 135.2
may1991 135.6
jun1991 136.0
jul1991 136.2
;
data subset;
set uscpi;
if date >= '1jan1991'd;
cpilag = lag( cpi ); /* WRONG PLACEMENT! */
run;
data subset;
set uscpi;
cpilag = lag( cpi );
if date >= '1jan1991'd;
run;
data residual;
set foreout;
if _type_ = "RESIDUAL";
lagresid = lag( cpi );
run;
data cpicity;
set cpicity;
cpilag = lag( cpi );
run;
data cpicity;
set cpicity;
by city date;
cpilag = lag( cpi );
if first.city then cpilag = .;
run;
data uscpi;
input date : monyy7. cpi;
format date monyy7.;
label cpi = "US Consumer Price Index";
datalines;
jun1990 129.9
jul1990 130.4
aug1990 131.6
sep1990 132.7
oct1990 133.5
nov1990 133.8
dec1990 133.8
jan1991 134.6
feb1991 134.8
mar1991 135.0
apr1991 135.2
may1991 135.6
jun1991 136.0
jul1991 136.2
;
proc expand data=uscpi out=uscpi method=none;
id date;
convert cpi=cpilag / transform=( lag 1 );
convert cpi=cpidif / transform=( dif 1 );
run;
data uscpi;
set uscpi;
retain cpilag;
cpidif = cpi - cpilag;
output;
cpilag = cpi;
run;
data uscpi;
set uscpi;
cpilag12 = lag12( cpi );
cpidif12 = dif12( cpi );
run;
data uscpi;
set uscpi;
cpi2dif = dif( dif( cpi ) );
run;
data uscpi;
set uscpi;
infchng = dif( 100 * dif12( cpi ) / lag12( cpi ) );
run;
data uscpi;
set uscpi;
pctchng = dif( cpi ) / lag( cpi ) * 100;
label pctchng = "Monthly Percent Change, At Monthly Rates";
run;
data uscpi;
set uscpi;
pctchng = ( ( cpi / lag( cpi ) ) ** 12 - 1 ) * 100;
label pctchng = "Monthly Percent Change, At Annual Rates";
run;
data uscpi;
set uscpi;
pctchng = dif12( cpi ) / lag12( cpi ) * 100;
label pctchng = "Percent Change from One Year Ago";
run;
data annual;
set uscpi;
pctchng = dif12( cpi ) / lag12( cpi ) * 100;
label pctchng = "Percent Change: December to December";
if month( date ) = 12;
format date year4.;
run;
data uscpi;
input date : monyy7. cpi;
format date monyy7.;
label cpi = "US Consumer Price Index";
datalines;
jun1990 129.9
jul1990 130.4
aug1990 131.6
sep1990 132.7
oct1990 133.5
nov1990 133.8
dec1990 133.8
jan1991 134.6
feb1991 134.8
mar1991 135.0
apr1991 135.2
may1991 135.6
jun1991 136.0
jul1991 136.2
;
proc expand data=uscpi out=annual from=month to=year;
convert cpi / observed=average method=aggregate;
run;
data annual;
set annual;
pctchng = dif( cpi ) / lag( cpi ) * 100;
label pctchng = "Percent Change in Yearly Averages";
run;
data uscpi;
retain sum12 0;
drop sum12 ave12 cpilag12;
set uscpi;
sum12 = sum12 + cpi;
cpilag12 = lag12( cpi );
if cpilag12 ^= . then sum12 = sum12 - cpilag12;
if lag11( cpi ) ^= . then ave12 = sum12 / 12;
pctchng = dif12( ave12 ) / lag12( ave12 ) * 100;
label pctchng = "Percent Change in 12 Month Moving Ave.";
run;
proc expand data=uscpi out=uscpi method=none;
id date;
convert cpi=cpilead1 / transform=( lead 1 );
convert cpi=cpilead2 / transform=( lead 2 );
run;
data temp;
set uscpi;
keep date cpi;
rename cpi = cpilead;
date = lag( date );
if date ^= .;
run;
data uscpi;
merge uscpi temp;
by date;
run;
data temp1(rename=(cpi=cpilead1))
temp2(rename=(cpi=cpilead2));
set uscpi;
keep date cpi;
date = lag( date );
if date ^= . then output temp1;
date = lag( date );
if date ^= . then output temp2;
run;
data uscpi;
merge uscpi temp1 temp2;
by date;
run;
title1 "International Airline Travel";
title2 "( in Thousands )";
proc sgplot data=sashelp.air;
series y=air x=date / markers;
yaxis label='Miles';
run;
data lair;
set sashelp.air;
logair = log( air );
run;
title2 "Log( 1000 Miles )";
proc sgplot data=lair;
series y=logair x=date / markers;
yaxis label='Log Miles';
run;
data uscpi(keep=date cpi)
usppi(keep=date ppi);
set usprice;
run;
data uscpi(keep=date cpi)
usppi(keep=date ppi);
set usprice;
if date >= '1aug1990'd then output uscpi;
if date <= '1jun1991'd then output usppi;
run;
title "Original Data Set";
proc print data=foreout(obs=10);
where date > '1may1991'd & date < '1oct1991'd;
run;
proc transpose data=foreout out=trans(drop=_name_);
var cpi;
id _type_;
by date;
where date > '1may1991'd & date < '1oct1991'd;
run;
title "Transposed Data Set";
proc print data=trans(obs=10);
run;
title "Original Data Set";
proc print data=cpicity;
run;
proc sort data=cpicity out=temp;
by date city;
run;
proc transpose data=temp out=citycpi(drop=_name_);
var cpi;
id city;
by date;
run;
title "Transposed Data Set";
proc print data=citycpi;
run;
data temp;
set citycpi;
_name_ = 'CPI';
run;
proc transpose data=temp out=retrans name=city;
by date;
run;
proc sort data=retrans;
by city date;
run;
title "Retransposed Data Set";
proc print data=retrans;
run;
proc expand data=usprice out=interpl;
id date;
run;
proc expand data=usprice out=interpl
from=month;
id date;
convert cpi ppi / observed=average;
run;
proc expand data=usprice out=interpl
from=month to=week;
id date;
convert cpi ppi / observed=average;
run;
proc expand data=usprice out=midpoint
from=month;
id date;
convert cpi ppi / observed=(average,middle);
run;
data usprice;
input cpi ppi @@;
date = intnx( 'month', '1jun1990'd, _n_-1 );
format date monyy7.;
datalines;
129.9 114.3 130.4 114.5 131.6 116.5
132.7 118.4 133.5 120.8 133.8 120.1 133.8 118.7
134.6 119.0 134.8 117.2 135.0 116.2 135.2 116.0
135.6 116.5 136.0 116.3 136.2 116.0
;
data temp;
length _name_ $8 _label_ $40;
keep _name_ _label_ date value;
format date monyy.;
input _name_ month year nval _label_ &;
date = mdy( month, 1, year );
do i = 1 to nval;
input value @;
output;
date = intnx( 'month', date, 1 );
end;
datalines;
cpi 8 90 12 Consumer Price Index
131.6 132.7 133.5 133.8 133.8 134.6 134.8 135.0
135.2 135.6 136.0 136.2
ppi 6 90 13 Producer Price Index
114.3 114.5 116.5 118.4 120.8 120.1 118.7 119.0
117.2 116.2 116.0 116.5 116.3
;
proc sort data=temp;
by date _name_;
run;
proc transpose data=temp out=usprice(drop=_name_);
by date;
var value;
run;
proc contents data=usprice;
run;
proc print data=usprice;
run;