Category: Data management

Author resource: Unknown


To reduce the effect of outliers/leverage points, observations with extreme values need to be taken care of. This macro either deletes (type=delete) or winsorizes (type=winsorize) these observations. (Winsorizing means that the extreme values are 'reset' at for example the 1st and 99th percentiles.)
Trim or winsorize macro
* byvar = none for no byvar;
* type  = delete/winsor (delete will trim, winsor will winsorize;
*dsetin = dataset to winsorize/trim;
*dsetout = dataset to output with winsorized/trimmed values;
*byvar = subsetting variables to winsorize/trim on;
%macro winsor(dsetin=, dsetout=, byvar=none, vars=, type=winsor, pctl=1 99);
%if &dsetout = %then %let dsetout = &dsetin;
%let varL=;
%let varH=;
%let xn=1;
%do %until ( %scan(&vars,&xn)= );
    %let token = %scan(&vars,&xn);
    %let varL = &varL &token.L;
    %let varH = &varH &token.H;
    %let xn=%EVAL(&xn + 1);
%let xn=%eval(&xn-1);
data xtemp;
    set &dsetin;
%if &byvar = none %then %do;
    data xtemp;
        set xtemp;
        xbyvar = 1;
    %let byvar = xbyvar;
proc sort data = xtemp;
    by &byvar;
proc univariate data = xtemp noprint;
    by &byvar;
    var &vars;
    output out = xtemp_pctl PCTLPTS = &pctl PCTLPRE = &vars PCTLNAME = L H;
data &dsetout;
    merge xtemp xtemp_pctl;
    by &byvar;
    array trimvars{&xn} &vars;
    array trimvarl{&xn} &varL;
    array trimvarh{&xn} &varH;
    do xi = 1 to dim(trimvars);
        %if &type = winsor %then %do;
            if not missing(trimvars{xi}) then do;
              if (trimvars{xi} < trimvarl{xi}) then trimvars{xi} = trimvarl{xi};
              if (trimvars{xi} > trimvarh{xi}) then trimvars{xi} = trimvarh{xi};
        %else %do;
            if not missing(trimvars{xi}) then do;
              if (trimvars{xi} < trimvarl{xi}) then delete;
              if (trimvars{xi} > trimvarh{xi}) then delete;
    drop &varL &varH xbyvar xi;
%mend winsor;


Sample usage:

/* invoke macro to winsorize */
%winsor(dsetin=myLib2.q_finalSet, dsetout=myLib2.r_finalWinsorized, byvar=none, 
vars=car_vw_log r_3day_log , type=winsor, pctl=1 99);

Other Data management
Fama French industry classification


Latest forum posts
Fama French 49 YES 49 !!! by CA Miller
date format by jc625
Discretionary Accruals by jwhi121
question about discretionary accrual models by kerrida
Combining Global and North American data by CA Miller
File for Eventus or SAS Event Study by samme
quarterly accruals by lxt88
Financial Statement Comparability - De Franco, Kothari and Verdi (2011, JAR) Replication by rowing
question about discretionary accrual models by Zenghui
problem in perl by KZ

All rights reserved. © 2010-2014 [Copyright] [Privacy Statement] [Disclaimer] [About]