Vaccine Adverse Event Reporting System Data Preprocessing

The VAERS data for 2002-2006 is read into a SAS data set using a SAS program called Vaers_Import.sas. This SAS program creates a table called VAERALL. Vaers_Import.sas is included in the Getting Started with Text Miner 4.2 zip file.
proc import out= dmtm9.vaers2006
   datafile= "d:\vaers files\2006vaersdata.csv" 
   dbms=csv replace;
   getnames=yes;
   datarow=2; 
run;
proc import out= dmtm9.vaers2005
   datafile= "d:\vaers files\2005vaersdata.csv" 
   dbms=csv replace;
   getnames=yes;
   datarow=2; 
run;
proc import out= dmtm9.vaers2004
   datafile= "d:\vaers files\2004vaersdata.csv" 
   dbms=csv replace;
   getnames=yes;
   datarow=2; 
run;
proc import out= dmtm9.vaers2003
   datafile= "d:\vaers files\2003vaersdata.csv" 
   dbms=csv replace;
   getnames=yes;
   datarow=2; 
run;
proc import out= dmtm9.vaers2002
   datafile= "d:\vaers files\2002vaersdata.csv" 
   dbms=csv replace;
   getnames=yes;
   datarow=2; 
run;
proc import out= dmtm9.vaervax2006 
   datafile= "d:\vaers files\2006vaersvax.csv" 
   dbms=csv replace;
   getnames=yes;
   datarow=2; 
run;
proc import out= dmtm9.vaervax2005 
   datafile= "d:\vaers files\2005vaersvax.csv" 
   dbms=csv replace;
   getnames=yes;
   datarow=2; 
run;
proc import out= dmtm9.vaervax2004 
   datafile= "d:\vaers files\2004vaersvax.csv" 
   dbms=csv replace;
   getnames=yes;
   datarow=2; 
run;
proc import out= dmtm9.vaervax2003 
   datafile= "d:\vaers files\2003vaersvax.csv" 
   dbms=csv replace;
   getnames=yes;
   datarow=2; 
run;
proc import out= dmtm9.vaervax2002 
   datafile= "d:\vaers files\2002vaersvax.csv" 
   dbms=csv replace;
   getnames=yes;
   datarow=2; 
run;
data dmtm9.vaerall;
   set dmtm9.vaers2002(drop=datedied hospdays) 
       dmtm9.vaers2003(drop=datedied hospdays)
       dmtm9.vaers2004(drop=datedied hospdays)  
       dmtm9.vaers2005(drop=datedied hospdays)
       dmtm9.vaers2006(drop=datedied hospdays);
run;
data dmtm9.vaervaxall;
   set dmtm9.vaervax2002 
       dmtm9.vaervax2003
       dmtm9.vaervax2004 
       dmtm9.vaervax2005 
       dmtm9.vaervax2006;
run;
The data is then further processed to come up with the extract used in the example:
  • The separate COSTART terms are appended into a single COSTRING field for each adverse event.
  • Additional indicator variables are created for each of the vaccinations received. In the case of DTP, both the Pertussis and Diphtheria/Tetanus variables would be flagged.
The SAS code Vaersetup.sas used to generate the resulting table, VAEREXT, is in the Getting Started with Text Miner 4.2 zip file.
libname dmtm9 'd:\emdata\dmtm9';
/*----  TJW Modification: within DATA step  ----*/
%macro FixJunk(TextVar=);
    &TextVar = tranwrd(&TextVar,'n_t ', " not ");
    &TextVar = tranwrd(&TextVar,'N_T ', " NOT ");
    &TextVar = tranwrd(&TextVar,"n't ", " not ");
    &TextVar = tranwrd(&TextVar,"N'T ", " NOT ");
    &TextVar = tranwrd(&TextVar,';', "; ");
    &TextVar = tranwrd(&TextVar,')', " ) ");
    &TextVar = tranwrd(&TextVar,'(', " ( ");
    &TextVar = tranwrd(&TextVar,']', " ] ");
    &TextVar = tranwrd(&TextVar,'[', " [ ");
    &TextVar = tranwrd(&TextVar,'}', " } ");
    &TextVar = tranwrd(&TextVar,'{', " { ");
    &TextVar = tranwrd(&TextVar,'*', " * ");
    &TextVar = tranwrd(&TextVar,',', ", ");
    &TextVar = tranwrd(&TextVar,' w/', " with ");
    *&TextVar = tranwrd(&TextVar,'/', " / ");
    &TextVar = tranwrd(&TextVar,'\', " \ ");
    &TextVar = tranwrd(&TextVar,'~', " ~ ");
    &TextVar = tranwrd(&TextVar,''', " ' ");
    &TextVar = tranwrd(&TextVar,"'s", " ");
    &TextVar = tranwrd(&TextVar,'_', " _ ");
    &TextVar = tranwrd(&TextVar,'&', " and ");
    &TextVar = tranwrd(&TextVar,'.', ".  ");
    &TextVar = tranwrd(&TextVar,'<=', " less than or equal ");
    &TextVar = tranwrd(&TextVar,'>=', " greater than or equal ");
    &TextVar = tranwrd(&TextVar,'<', " less than ");
    &TextVar = tranwrd(&TextVar,'>', " greater than ");
    &TextVar = tranwrd(&TextVar,'=', " equals ");
    &TextVar = trim(left(compbl()));
%mend FixJunk;

data dmtm9.vaerext(keep=cage_yr sex symptom_text serious numdays pedflag sym_cnt
                  vax_1-vax_16 vax_cnt immun_cnt costring v_adminby v_fundby);
   length coterm $ 25 costring $255;
   array syms{20} $ 25 sym01-sym20;
   array vaxs{8} $ vax1-vax8;
   array nvax{16} vax_1-vax_16;

   set dmtm9.vaerall;

   /* Only include adverse events that occurred within 90 days of vaccination */
   if numdays <= 90;
   if cage_yr = . then cage_yr = 0;
   if cage_mo = . then cage_mo = 0;
   if vax_date ne .;

   /* Serious events are ones that required an overnight hospital stay or caused */
   /*  disability, death, or a life-threatening event                            */ 
   if l_threat='Y' or died='Y' or hospital='Y' or x_stay='Y' or disable='Y'
   then serious='Y'; 
   else serious='N';

   /* Determine age of vaccine recipient -- year + month, mark all those under */
   /* 9 as pediatric                                                           */
   cage_yr = cage_yr+cage_mo;
   if cage_yr <=9 then pedflag='Y'; else pedflag='N';
   if died=' ' then died='N';
   if er_visit = ' ' then er_visit='N';
   if recovd = ' ' then recovd='U';

   /* Since serious adverse events are rare (approx 8%) oversample serious events*/
   if serious='N' and uniform(0) < .7 then delete;
   
   /* Create flag variables for illnesses frequently innoculated against, also*/
   /* count up number of immunizations given at one time to a patient as immun_cnt*/
   label vax_1='Anthrax'
      vax_2='Diphtheria/Tetanus'
      vax_3='Flu'
      vax_4='Hepatitis A'
      vax_5='Hepatitis B'
      vax_6='HIB (Haemophilus)'
      vax_7='Polio (IPV,OPV)'
      vax_8='Measles,Mumps,Rubella'
      vax_9='Meningoccoccal'
      vax_10='Pneumo (7-valent)'
      vax_11='Pneumo (23-valent)'
      vax_12='Rabies'
      vax_13='Smallpox'
      vax_14='Typhoid'
      vax_15='Pertussis'
      vax_16='Varicella'
      ;

   do i=1 to 16;
      nvax{i}=0;
      end;

   immun_cnt=0;
   do i=1 to min(vax_cnt,8);
      select (vaxs{i});
         when ('6VAX-F') do; vax_2=1; vax_5=1; vax_6=1; vax_7=1;
            immun_cnt=immun_cnt+5; end;
         when ('ANTH') do; vax_1=1; immun_cnt=immun_cnt+1; end;
         when ('DPP') do; vax_2=1; vax_15=1; vax_7=1; immun_cnt=immun_cnt+4; end;
         when ('DT','DTOX','TD','TTOX') do; vax_2=1; immun_cnt=immun_cnt+2; end;
         when ('DTAP','DTP','TDAP') do;
            vax_2=1; vax_15=1; immun_cnt=immun_cnt+3; end;
         when ('DTAPH','DTPHIB') do;
            vax_2=1; vax_15=1; vax_6=1; immun_cnt=immun_cnt+4; end;
         when ('DTAPHE') do;
            vax_2=1; vax_15=1; vax_5=1; vax_7=1; immun_cnt=immun_cnt+5; end;
         when ('FLU','FLUN') do; vax_3=1; immun_cnt=immun_cnt+1; end;
         when ('HBHEPB') do; vax_6=1; vax_5=1; immun_cnt=immun_cnt+2; end;
         when ('HBPV','HBVC','HIBV') do; vax_6=1; immun_cnt=immun_cnt+1; end;
         when ('HEP') do; vax_5=1; immun_cnt=immun_cnt+1; end;
         when ('HEPA') do; vax_4=1; immun_cnt=immun_cnt+1; end;
         when ('HEPAB') do; vax_4=1; vax_5=1; immun_cnt=immun_cnt+2; end;
         when ('IPV','OPV') do; vax_7=1; immun_cnt=immun_cnt+1; end;
         when ('MEA','MER','MM','MMR','MU','MUR','RUB') do;
            vax_8=1; immun_cnt=immun_cnt+3; end;
         when ('MMRV') do; vax_8=1; vax_16=1; end;
         when ('MEN','MNC','MNQ') do; vax_9=1; end;
         when ('PNC') do; vax_10=1; immun_cnt=immun_cnt+1; end;
         when ('PPV') do; vax_11=1; immun_cnt=immun_cnt+1; end;
         when ('RAB','RABA') do; vax_12=1; immun_cnt=immun_cnt+1; end;
         when ('SMALL') do; vax_13=1; immun_cnt=immun_cnt+1; end;
         when ('TYP') do; vax_14=1; immun_cnt=immun_cnt+1; end;
         when ('VARCEL') do; vax_16=1; immun_cnt=immun_cnt+1; end;
         otherwise;
         end;

      end;
      if immun_cnt > 0;

   /* Create a field, costring, with all the constart terms concatenated */
   /* together in one string                                             */
   costring = '';

   do i=1 to min(sym_cnt,20);
      coterm = syms{i};
      costring=trim(costring) || ' ' || trim(coterm);
      end;

    /* Fix punctuation issues */
    %FixJunk(textvar=symptom_text);    

   run;
      
proc freq;
   tables pedflag immun_cnt vax_cnt v_adminby v_fundby sex serious vax_1-vax_16;
   run;