Category: Data management

Author resource: Unknown


To reduce the effect of outliers/leverage points, observations with extreme values need to be taken care of. This macro either deletes (type=delete) or winsorizes (type=winsorize) these observations. (Winsorizing means that the extreme values are 'reset' at for example the 1st and 99th percentiles.)
Trim or winsorize macro
* byvar = none for no byvar;
* type  = delete/winsor (delete will trim, winsor will winsorize;
*dsetin = dataset to winsorize/trim;
*dsetout = dataset to output with winsorized/trimmed values;
*byvar = subsetting variables to winsorize/trim on;
%macro winsor(dsetin=, dsetout=, byvar=none, vars=, type=winsor, pctl=1 99);
%if &dsetout = %then %let dsetout = &dsetin;
%let varL=;
%let varH=;
%let xn=1;
%do %until ( %scan(&vars,&xn)= );
    %let token = %scan(&vars,&xn);
    %let varL = &varL &token.L;
    %let varH = &varH &token.H;
    %let xn=%EVAL(&xn + 1);
%let xn=%eval(&xn-1);
data xtemp;
    set &dsetin;
%if &byvar = none %then %do;
    data xtemp;
        set xtemp;
        xbyvar = 1;
    %let byvar = xbyvar;
proc sort data = xtemp;
    by &byvar;
proc univariate data = xtemp noprint;
    by &byvar;
    var &vars;
    output out = xtemp_pctl PCTLPTS = &pctl PCTLPRE = &vars PCTLNAME = L H;
data &dsetout;
    merge xtemp xtemp_pctl;
    by &byvar;
    array trimvars{&xn} &vars;
    array trimvarl{&xn} &varL;
    array trimvarh{&xn} &varH;
    do xi = 1 to dim(trimvars);
        %if &type = winsor %then %do;
            if not missing(trimvars{xi}) then do;
              if (trimvars{xi} < trimvarl{xi}) then trimvars{xi} = trimvarl{xi};
              if (trimvars{xi} > trimvarh{xi}) then trimvars{xi} = trimvarh{xi};
        %else %do;
            if not missing(trimvars{xi}) then do;
              if (trimvars{xi} < trimvarl{xi}) then delete;
              if (trimvars{xi} > trimvarh{xi}) then delete;
    drop &varL &varH xbyvar xi;
%mend winsor;


Sample usage:

/* invoke macro to winsorize */
%winsor(dsetin=myLib2.q_finalSet, dsetout=myLib2.r_finalWinsorized, byvar=none, 
vars=car_vw_log r_3day_log , type=winsor, pctl=1 99);

Other Data management
Fama French industry classification codes (12 and 48 industries)


Latest forum posts
question about discretionary accrual models by Zenghui
problem in perl by KZ
Any SAS code to replicate an empirical paper? by Malvernguy
Some links with code examples by Zenghui
Question about Herfindahl index by Zenghui
Question about Dow-Loop SAS code by npap
SEC Filings on EDGAR SAS File by SamLe
sas code problems in Jones model by KZ
question about the Jones model code by Zenghui
Repository forum guidelines by Joost Impink

All rights reserved. © 2010-2014 [Copyright] [Privacy Statement] [Disclaimer] [About]