Category: Data management

Author resource: Unknown


To reduce the effect of outliers/leverage points, observations with extreme values need to be taken care of. This macro either deletes (type=delete) or winsorizes (type=winsorize) these observations. (Winsorizing means that the extreme values are 'reset' at for example the 1st and 99th percentiles.)
Trim or winsorize macro
* byvar = none for no byvar;
* type  = delete/winsor (delete will trim, winsor will winsorize;
*dsetin = dataset to winsorize/trim;
*dsetout = dataset to output with winsorized/trimmed values;
*byvar = subsetting variables to winsorize/trim on;
%macro winsor(dsetin=, dsetout=, byvar=none, vars=, type=winsor, pctl=1 99);
%if &dsetout = %then %let dsetout = &dsetin;
%let varL=;
%let varH=;
%let xn=1;
%do %until ( %scan(&vars,&xn)= );
    %let token = %scan(&vars,&xn);
    %let varL = &varL &token.L;
    %let varH = &varH &token.H;
    %let xn=%EVAL(&xn + 1);
%let xn=%eval(&xn-1);
data xtemp;
    set &dsetin;
%if &byvar = none %then %do;
    data xtemp;
        set xtemp;
        xbyvar = 1;
    %let byvar = xbyvar;
proc sort data = xtemp;
    by &byvar;
proc univariate data = xtemp noprint;
    by &byvar;
    var &vars;
    output out = xtemp_pctl PCTLPTS = &pctl PCTLPRE = &vars PCTLNAME = L H;
data &dsetout;
    merge xtemp xtemp_pctl;
    by &byvar;
    array trimvars{&xn} &vars;
    array trimvarl{&xn} &varL;
    array trimvarh{&xn} &varH;
    do xi = 1 to dim(trimvars);
        %if &type = winsor %then %do;
            if not missing(trimvars{xi}) then do;
              if (trimvars{xi} < trimvarl{xi}) then trimvars{xi} = trimvarl{xi};
              if (trimvars{xi} > trimvarh{xi}) then trimvars{xi} = trimvarh{xi};
        %else %do;
            if not missing(trimvars{xi}) then do;
              if (trimvars{xi} < trimvarl{xi}) then delete;
              if (trimvars{xi} > trimvarh{xi}) then delete;
    drop &varL &varH xbyvar xi;
%mend winsor;


Sample usage:

/* invoke macro to winsorize */
%winsor(dsetin=myLib2.q_finalSet, dsetout=myLib2.r_finalWinsorized, byvar=none, 
vars=car_vw_log r_3day_log , type=winsor, pctl=1 99);

Other Data management
Fama French industry classification codes (12 and 48 industries)


Latest forum posts
File for Eventus or SAS Event Study by samme
quarterly accruals by lxt88
Financial Statement Comparability - De Franco, Kothari and Verdi (2011, JAR) Replication by rowing
question about discretionary accrual models by Zenghui
problem in perl by KZ
Any SAS code to replicate an empirical paper? by Malvernguy
Some links with code examples by Zenghui
Question about Herfindahl index by Zenghui
Question about Dow-Loop SAS code by npap
SEC Filings on EDGAR SAS File by SamLe

All rights reserved. © 2010-2014 [Copyright] [Privacy Statement] [Disclaimer] [About]