Resources

Managing a Large Number of Categories

/****************************************************************/
/*          S A S   S A M P L E   L I B R A R Y                 */
/*                                                              */
/*    NAME: PAREX12                                             */
/*   TITLE: Managing a Large Number of Categories               */
/* PRODUCT: QC                                                  */
/*  SYSTEM: ALL                                                 */
/*    KEYS: Pareto Charts                                       */
/*   PROCS: PARETO                                              */
/*    DATA:                                                     */
/*                                                              */
/* SUPPORT: saswgr                                              */
/*     REF: PROC PARETO, Example 12                             */
/*    MISC:                                                     */
/*                                                              */
/****************************************************************/

/* National Vital Statistics Reports
 * Volume 61, Number 4
 * Deaths: Final Data for 2010
 * May 8, 2013
 * http://www.cdc.gov/nchs/data/nvsr/nvsr61/nvsr61_04.pdf
 */

data CancerDeaths2010;
   length Type $ 45;
   input Type & @47 Deaths comma7.;
   datalines;
Lip, oral cavity and pharynx                    8,474
Esophagus                                      14,490
Stomach                                        11,390
Colon, rectum and anus                         52,622
Liver and intrahepatic bile ducts              20,305
Pancreas                                       36,888
Larynx                                          3,691
Trachea, bronchus and lung                    158,318
Skin                                            9,154
Breast                                         41,435
Cervix                                          3,939
Uterus                                          8,402
Ovary                                          14,572
Prostate                                       28,561
Kidney and renal pelvis                        13,219
Bladder                                        14,731
Meninges, brain, other central nervous system  14,164
Hodgkin's disease                               1,231
Non-Hodgkin's lymphoma                         20,294
Leukemia                                       22,569
Multiple myeloma and immunoproliferative       11,428
Other lymphoid, hematopoietic and related          68
All other and unspecified                      64,798
;

proc pareto data=CancerDeaths2010;
   vbar Type / freq = Deaths;
run;

ods graphics / width=800px;
proc pareto data=CancerDeaths2010;
   vbar Type / freq = Deaths;
run;

ods graphics / width=800px;
title 'U.S. Cancer Deaths in 2010 by Type';
proc pareto data=CancerDeaths2010;
   vbar Type / freq        = Deaths
               barlabel    = value
               last        = 'All other and unspecified'
               nocatlabel
               catleglabel = 'Cancer Type'
               freqaxis    = 0 to 100 by 10
               nlegend     = 'Total Cancer Deaths'
               odstitle    = title
               out         = CSummary;
               ;
run;

proc print data=CSummary;
run;

data CSummary;
   set CSummary;
   if _PCT_ < 2.0 then Type='All other and unspecified';
run;

proc print data=CSummary;
run;

proc pareto data=CSummary;
   vbar Type / freq        = _COUNT_
               last        = 'All other and unspecified'
               barlabel    = value
               nocatlabel
               catleglabel = 'Cancer Type'
               freqaxis    = 0 to 100 by 10
               odstitle    = title;
   inset n='Total Cancer Deaths:'(comma7.) / noframe;
run;