Managing a Large Number of Categories
/****************************************************************/
/* S A S S A M P L E L I B R A R Y */
/* */
/* NAME: PAREX12 */
/* TITLE: Managing a Large Number of Categories */
/* PRODUCT: QC */
/* SYSTEM: ALL */
/* KEYS: Pareto Charts */
/* PROCS: PARETO */
/* DATA: */
/* */
/* SUPPORT: saswgr */
/* REF: PROC PARETO, Example 12 */
/* MISC: */
/* */
/****************************************************************/
/* National Vital Statistics Reports
* Volume 61, Number 4
* Deaths: Final Data for 2010
* May 8, 2013
* http://www.cdc.gov/nchs/data/nvsr/nvsr61/nvsr61_04.pdf
*/
data CancerDeaths2010;
length Type $ 45;
input Type & @47 Deaths comma7.;
datalines;
Lip, oral cavity and pharynx 8,474
Esophagus 14,490
Stomach 11,390
Colon, rectum and anus 52,622
Liver and intrahepatic bile ducts 20,305
Pancreas 36,888
Larynx 3,691
Trachea, bronchus and lung 158,318
Skin 9,154
Breast 41,435
Cervix 3,939
Uterus 8,402
Ovary 14,572
Prostate 28,561
Kidney and renal pelvis 13,219
Bladder 14,731
Meninges, brain, other central nervous system 14,164
Hodgkin's disease 1,231
Non-Hodgkin's lymphoma 20,294
Leukemia 22,569
Multiple myeloma and immunoproliferative 11,428
Other lymphoid, hematopoietic and related 68
All other and unspecified 64,798
;
proc pareto data=CancerDeaths2010;
vbar Type / freq = Deaths;
run;
ods graphics / width=800px;
proc pareto data=CancerDeaths2010;
vbar Type / freq = Deaths;
run;
ods graphics / width=800px;
title 'U.S. Cancer Deaths in 2010 by Type';
proc pareto data=CancerDeaths2010;
vbar Type / freq = Deaths
barlabel = value
last = 'All other and unspecified'
nocatlabel
catleglabel = 'Cancer Type'
freqaxis = 0 to 100 by 10
nlegend = 'Total Cancer Deaths'
odstitle = title
out = CSummary;
;
run;
proc print data=CSummary;
run;
data CSummary;
set CSummary;
if _PCT_ < 2.0 then Type='All other and unspecified';
run;
proc print data=CSummary;
run;
proc pareto data=CSummary;
vbar Type / freq = _COUNT_
last = 'All other and unspecified'
barlabel = value
nocatlabel
catleglabel = 'Cancer Type'
freqaxis = 0 to 100 by 10
odstitle = title;
inset n='Total Cancer Deaths:'(comma7.) / noframe;
run;