Category: Data management

Author resource: Joost Impink

Explanation

This macro matches Compustat Fundamental Annual with Audit Analytics Auditfees.

It first matches on current ('header') CIK, which is taken from Compustat Company table. In Audit Analytics this is the primary key (COMPANY_FKEY).

Firm-years that remain unmatched, are then matched on similarity (within 1%) of sales and assets, which are in both datasets. Firms that match for two years on this criterion are considered valid matches. Unmatched single firm-years that are matched on sales and assets also need to have the same ticker symbol, or similar company name. (Company name for Audit Analytics is taken from Auditfeesnames).

Note: in case there are multiple firms involved, the script selects the audit firm with the highest fee. A sum with group by needs to be included if the total audit fee is required.

/*

	Matching Compustat Funda with Audit fees (Audit Analytics)

	Matching procedure
	==================
	The initial match is on central index key (CIK), which is called company_fkey in 
	Audit Analytics. Both are in Funda, as in AA, the cik is the current CIK.

	For unmatched firm-years, a match is done on assets and sales.
	AA:
		Assets: MATCHQU_BALSH_ASSETS 
		Sales: MATCHQU_INCMST_REV_TTM 
	Funda
		Assets: AT
		Sales: SALE

	Both assets and sales need to be within 1% 

	For firms that have a single unmatched firm-year, the ticker symbol needs to be 
	the same (tic versus BEST_EDGAR_TICKER), or the 'distance' (spedis) of the 
	firmname needs to be lower than 10. In AA, firm name is in Namesauditfees 
	versus conm in Funda.

	For firms with 2 or more unmatched firm-years, the matched firm needs to match
	the sales/assets criterion for at least 2 years (no further restrictions on 
	ticker, company name).

	Relevant variables in Auditfees:
		COMPANY_FKEY	
		FISCAL_YEAR_ENDED => match with compustat.datadate +- 7 days
		MATCHQU_BALSH_ASSETS (in millions) => total assets
		MATCHQU_INCMST_REV_TTM => sales

	Potentially relevant variables (not used), could be used for matching on price 
	on crsp  (not prcc_f in Funda, which is based on datadate, not the actual end 
	of year date)
		MATCHQU_PRICE_DATE
		MATCHQU_PRICE_CLOSE => match with compustat.prcc_f

	Macro variables
	===============
		dsin		dataset in with variables: gvkey, fyear
		dsout		dataset out, with appended AUDITOR_FKEY, AUDIT_FEES, 
					NON_AUDIT_FEES, for matched records (missings for unmatched)

	Requirements
	============
		The macro assumes it has local access to:
		- comp.funda
		- comp.company
		- aa.auditfees
		- aa.Namesauditfees 
		This macro uses the %runquit macro (this macro triggers an abort
		when an error occurs)

	Sample usage
	============
		data work.a_funda;
		set comp.funda;
		if fyear >= 2010;
		if at > 50;
		if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C' ;
		run;

		%getAuditFees(dsin=a_funda, dsout=work.b_auditfees);


	
	* debugging: write macro code to disk (slows system, especially exports);

	options merror 	   mprint symbolgen;
	FILENAME mprint 'P:\research\sas_projects\tempSAScode.SAS';	
	options               mprint mfile;

*/

%macro getAuditFees(dsin=, dsout=);

/*	Unique firm-years */

data work.a_spec0 (keep = gvkey fyear);
set &dsin;
run;

proc sort data= work.a_spec0 nodupkey;by gvkey fyear;run;

/*	Get Compustat variables */

PROC SQL;
  create table work.a_spec1 as
  	select 
		a.*, b.conm, b.datadate, b.at, b.sale, b.tic
	from
		work.a_spec0 a, 
		comp.funda b
  	where 		
		a.gvkey = b.gvkey
	and a.fyear = b.fyear
	and b.indfmt='INDL' and b.datafmt='STD' and b.popsrc='D' and b.consol='C' ;
%runquit;

/* 	Keep first record in case of multiple records ; */

proc sort data = work.a_spec1 nodupkey; by gvkey fyear;%runquit;

/*	Add  current zip-code, sic, fic, cik */

proc sql;

	create table work.a_spec2 as

		select a.*, b.fic, b.sic, b.ADDZIP, b.cik
		from
			work.a_spec1 a
		LEFT JOIN
			comp.company b
		ON
			a.gvkey = b.gvkey;
%runquit;

data work.a_spec3 (drop = fic);
set work.a_spec2;
if FIC = 'USA';
zipcode_current = put(ADDZIP,$5.);
sic2 = substr(sic,1,2);
if at ne .;
if at > 0;
%runquit;

/*	append company name to aa.auditfees */

proc sql;

	create table work.b_aa1 as
	select a.*, b.name
	from
		aa.auditfees a
	LEFT JOIN
		aa.Namesauditfees b
	on
		a.COMPANY_FKEY = b.COMPANY_FKEY
	and a.FISCAL_YEAR_ENDED = b.FISCAL_YEAR_ENDED;
	
quit;

/*	Join Funda with AA.auditfees */

proc sql;

	create table work.b_aa2 as
	select a.*, b.AUDITOR_FKEY, b.AUDIT_FEES, b.NON_AUDIT_FEES, b.COMPANY_FKEY, 
		b.MATCHQU_BALSH_ASSETS, b.MATCHQU_INCMST_REV_TTM, b.name as NAME_AA, 
		b.BEST_EDGAR_TICKER
	from
		work.a_spec3 a
	LEFT JOIN
		work.b_aa1 b
	ON
		a.cik = b.COMPANY_FKEY
	and a.datadate - 15 <= b.FISCAL_YEAR_ENDED <= a.datadate + 15;

%runquit;

/*	Duplicate rows: keep the one with highest audit fees */

proc sort data = work.b_aa2; by gvkey fyear descending AUDIT_FEES;%runquit;
proc sort data = work.b_aa2 nodupkey; by gvkey fyear ;%runquit;


data work.c_match;
set work.b_aa2;
if AUDIT_FEES ne .;
run;

data work.d_nomatch (drop = AUDITOR_FKEY AUDIT_FEES NON_AUDIT_FEES NAME_AA);
set work.b_aa2;
if AUDIT_FEES eq .;
run;


proc sql;

	create table work.d_nomatch2  as
	select a.*, b.AUDITOR_FKEY, b.AUDIT_FEES, b.NON_AUDIT_FEES, 
		b.MATCHQU_BALSH_ASSETS, b.MATCHQU_INCMST_REV_TTM, b.NAME as NAME_AA, 
		b.BEST_EDGAR_TICKER
	from
		work.d_nomatch a,
		work.b_aa1 b
	where
		a.datadate - 15 <= b.FISCAL_YEAR_ENDED <= a.datadate + 15
	and a.at/1.01 <= b.MATCHQU_BALSH_ASSETS/1000000 <= a.at * 1.01
	and a.sale/1.01 <= b.MATCHQU_INCMST_REV_TTM/1000000 <= a.sale * 1.01
;
quit;

/*	Duplicate rows: keep the one with highest audit fees */

proc sort data = work.d_nomatch2; by gvkey fyear name_aa descending AUDIT_FEES;
proc sort data = work.d_nomatch2 nodupkey; by gvkey fyear name_aa;%runquit;

/*	Take firms that only need 1 year to be matched; these will be checked by 
	ticker or similarity company names */

proc sql;

	create table work.e_single1 as
	select 
		gvkey, count(*) as numObs from work.d_nomatch group by gvkey;
		
quit;

proc sql;

	create table work.e_single2 as
	select 
		* 
	from 
		work.d_nomatch2 
	where 
		gvkey in (select gvkey from work.e_single1 where numObs = 1); 
quit;

data work.e_single3 (drop = goodName goodTicker);
set work.e_single2;
goodName = (spedis(scan(conm,1),scan(name_aa,1))<10);
goodTicker = BEST_EDGAR_TICKER eq tic;
if goodName + goodTicker > 0;
run;


/*	Firms with at least 2 years need to be matched by the same company 
	on sales and assets within 1% range*/

proc sql;

	create table work.f_multiple1 as
	select 
		* 
	from 
		work.d_nomatch2 
	where gvkey in (select gvkey from work.e_single1 where numObs > 1); 
quit;


data work.f_multiple2;
set work.f_multiple1;
paired = gvkey || NAME_AA;
run;

proc sql;

	create table work.f_multiple3 as
	select 
		paired, count(*) as numObs from work.f_multiple2 group by paired;	
quit;

proc sql;

	create table work.f_multiple4 (drop = paired) as
	select 
		* 
	from 
		work.f_multiple2 
	where 
		paired in (select paired from work.f_multiple3 where numObs > 1); 
quit;

data work.g_matched;
set work.c_match work.e_single3 work.f_multiple4;
run;

/*	Create dataset to be returned */

proc sql;

	create table &dsout as
		
		select a.*, b.AUDITOR_FKEY, b.AUDIT_FEES, b.NON_AUDIT_FEES
		from
			 &dsin a
		LEFT JOIN
			work.g_matched b
		ON
			a.gvkey = b.gvkey
		and a.fyear = b.fyear;

quit;

data &dsout;
set &dsout;
if AUDIT_FEES eq 0 then AUDIT_FEES = .;
run;


/*	clean up */

proc datasets library=work;
   delete a_spec0 - a_spec3 b_aa1 b_aa2 c_match  d_nomatch d_nomatch2 
		e_single1 - e_single3 f_multiple1 - f_multiple4 g_matched;
%runquit;

%mend;

Usage

Sample usage:

/*	Sample set Funda */

data work.a_funda (keep = gvkey fyear);
set comp.funda;
if fyear >= 2010;
if at > 50;
if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C' ; * no doubles;
run;

/*	Invoke macro */

%getAuditFees(dsin=work.a_funda, dsout=work.b_auditfees);

Other Data management
SAS Winsorize macro

forum

Latest forum posts
Fama French 49 YES 49 !!! by CA Miller
date format by Emily
Discretionary Accruals by jwhi121
question about discretionary accrual models by kerrida
Combining Global and North American data by CA Miller
File for Eventus or SAS Event Study by samme
quarterly accruals by lxt88
Financial Statement Comparability - De Franco, Kothari and Verdi (2011, JAR) Replication by rowing
question about discretionary accrual models by Zenghui
problem in perl by KZ

All rights reserved. © 2010-2014 wrds.us [Copyright] [Privacy Statement] [Disclaimer] [About]