Category: Statistical tests

Author resource: Joost Impink


This macro can be used to create a table that holds the mean, median, standard deviation, number of observations of a list of variables for two groups.

Additionally, the level of significance for the difference is computed using a t-test for the difference in means, and the Wilcoxon-Mann-Withney for the difference in medians. Three tables are computed (means, t-test significance, Wilcoxon) that can be exported for further processing (see sample usage).

Note: this macro has not been tested extensively: please verify accuracy when using it!

/*	Table with means, medians, standard deviations for two groups
	as well as the differences for these groups (with significance levels)

	To use this macro, you will need a dataset that holds a variable that 
	divides the sample in two. The sample included below splits Funda
	firm-years after 2009 in firm-years with a market cap smaller versus 
	larger than $750 million.

	The script will provide mean, median, standard deviation, #obs for
	the variables for the two groups.

	It also will generate the significance levels for the t-test in the
	difference in means and a Wilcoxon-Mann-Whitney test for the difference
	in medians.

	The actual differences in means and medians are not computed (can easily
	be computed by subtracting the means, medians).

	The macro generates three datasets (outp is passed to macro): 
		- outp			table with means, medians, std, #obs 
		- outp_test1	table with t-tests	(test for differences in means)
		- outp_test2	table with Wilcoxon-etc (test for differences in medians)
	These tables can be exported for further processing to create the actual table.
	This macro uses the %runquit macro
		%macro runquit;                
			; run; quit;                           
			%if &syserr. ne 0 %then %do;                  
			%abort cancel ;                   
		%mend runquit;   


%macro differenceMeansMedians(dset=, byvar=, vars=, outp=);

		Macro that creates three tables:
		- outp			table with means, medians, std, #obs
		- outp _test1	table with t-tests	(test for differences in means)
		- outp _test2	table with Wilcoxon-etc (test for differences in medians)

		- dset	input dataset
		- outp	output dataset (with statistics)
		- vars	variables
		- by	variable to group on (single variable)


	proc sort data = &dset; by &byvar;%runquit;

	proc means data=&dset NOPRINT ;
	OUTPUT OUT=_table1 mean= median= N= STD=/autoname;
	var &vars;
	by &byvar ;

	/*    Difference in means: t-test  */

	proc ttest H0=0 DATA=&dset ;
	   CLASS  &byvar ;
	   VAR &vars;
	   ods output TTests =work.t1_ttest_ttests Statistics =work.t1_ttest_stats  ;

	/* 	Create table with Variable, mean, Probt, tValue */

	proc sql;
	      create table _table1_test1 as
	      select a.Variable, a.tValue * -1 as tValue, a.Probt, b.Mean * -1 as Mean
	            work.t1_ttest_ttests a,
	            work.t1_ttest_stats b
	            a.Variable = b.Variable
	      and a.Method = "Satterthwaite"
	      and b.class = "Diff (1-2)"    ;

	/*	Formatting of variables */

	data _table1_test1;
	set _table1_test1;
	format tValue 8.2;
	format Probt 8.3;
	format Mean 8.3;

	/*    Difference in medians: Wilcoxon-Mann-Whitney test */

	proc npar1way data = &dset wilcoxon;
	  class &byvar;
	  var &vars;
	  ods output WilcoxonTest = _table1_test2  ;

	data _table1_test2 (keep = Variable pVal);
	set _table1_test2;
	if Name1 eq "P2_WIL";   /* 2-sided p-value;*/
	pVal = nValue1;
	format pVal 8.3;

	data &outp;			set _table1;
	data &outp._test1; 	set _table1_test1;
	data &outp._test2; 	set _table1_test2;%runquit;

	/* 	Clean up; */

	proc datasets library=work;
	   delete t1_ttest_ttests t1_ttest_stats _table1 _table1_test1 _table1_test2;



Sample usage:

/*	Variables to be included in the table */

%LET tableVars =  roa roe ros asset_turn lev size mtb;

/*  Sample set Funda */ 

data work.sample (keep = gvkey fyear &tableVars isLarge);
set comp.funda;

/*	Standard requirement when using US industrial firms*/
if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C' ;

/*	Require positive/non-missing values for assets, equity, sales,
end-of-year stock price and # common shares outstanding*/
if at > 0 and ceq > 0 and sale > 0 and prcc_f > 0 and csho > 0; 

/*	Compute variables */
roa = ni / at; 
roe = ni / ceq; 
ros = ni / sale;
asset_turn = sale/at;
lev = at/ceq;	
size = log(prcc_f * csho);	
mtb = prcc_f * csho / ceq; 

/* 	Restrict sample size */
if fyear > 2009; 		

/*	Create indicator variable, set to 1 if market cap > 750 million, 0 otherwise 
	Statement between parentheses - '(' and ')' - is evaluated to true (1) or false (0) */
isLarge = (prcc_f * csho > 750);

/*	Note: typically, the data would need winsorizing, which is omitted here */

/*	Invoke macro */

%differenceMeansMedians(dset=work.sample, byvar=isLarge, vars=&tableVars, outp=work.sampleTable);

/*	Export files */

/*	Helper macro: export dataset to csv file */

%macro myExport(dset=, file=);

	%let filename = "&file";


/*	Directory to export table (note: no quotes around directory) */

%LET exportDir = C:\temp\someTable\;

/*	Export (invoke export macro) for further processing */

%myExport(dset=work.sampleTable, 	   file=&exportDir\sampleTable.csv);
%myExport(dset=work.sampleTable_test1, file=&exportDir\sampleTable_test1.csv);
%myExport(dset=work.sampleTable_test2, file=&exportDir\sampleTable_test2.csv);

Other Statistical tests
No related posts


Latest forum posts
Fama French 49 YES 49 !!! by CA Miller
date format by Emily
Discretionary Accruals by jwhi121
question about discretionary accrual models by kerrida
Combining Global and North American data by CA Miller
File for Eventus or SAS Event Study by samme
quarterly accruals by lxt88
Financial Statement Comparability - De Franco, Kothari and Verdi (2011, JAR) Replication by rowing
question about discretionary accrual models by Zenghui
problem in perl by KZ

All rights reserved. © 2010-2014 [Copyright] [Privacy Statement] [Disclaimer] [About]