SAS PROGRAMMING HANDOUT #23 This handout writes a SAS macro to check for linearity in linear regression, (like HANDOUT #11) by creating dummy CATi variables for the percentile groups of a independent variable (CHECK). Then it performs linear regression using the dependent variable (DEP), and the independent variables (CATi) together with the other independent variables (IND). OPTIONS MACROGEN; *this will generate output helpful in finding errors; %macro chklin(data=_last_, check=, n=5, dep=, ind=); %****************************************************************************** Macro Parameters: DATA - dataset to be used, defaults to most recently created CHECK - variable to be checked for linearity N - number of groups to break the CHECK variable into, default is 5 (quintiles) DEP - the dependent variable IND - the independent variables in the model, excluding CHECK THANKS TO DAVID WALL OF NIOSH FOR HELPING WRITE THIS MACRO *******************************************************************************; %* This creates a macro variable that contains a list of all the percentiles; %let percents=; data _null_; do i=0 to &n; call symput('percents', symget('percents') ||' '||trim(left(round(i*100/&n, .01)))); end; %* Find the needed percentiles; proc univariate data=&data noprint; var ✓ output out=_stats pctlpre=p_ pctlpts=&percents; %* Transpose the dataset output by PROC UNIVARIATE so we can define the dummy variables; proc transpose data=_stats out=_temp1; %* Find the names or the variables containing the percentiles and put them into a macro variable; %let names=; data _null_; set _temp1; call symput('names', symget('names') || " " || trim(_name_)); %* Create the indicator variables; %let categor=; %let list=; data _temp2; if _n_=1 then set _stats; set &data; if &check ne . then do; %do i=2 %to &n; cat&i = ( %scan(&names, &i, %str( )) < &check <= %scan(&names, &i+1, %str( )) ); label cat&i="%upcase(&check) group &i"; %let categor=&categor cat&i; %let list=&list "cat&i",; %end; end; keep &categor &dep &ind; %* Run a model with the indicator variables in place of the variable to be checked for linearity; proc REG data=_temp2 ; model &dep = &categor &ind; title "Checking %upcase(&check) for linearity"; title2 "Dep Var = %upcase(&dep)"; title3 "Covariates: %upcase(&ind)"; %endmacro: %mend; *=================================================================================; *the following is a sample application of the macro; LIBNAME JIM2 'A:\'; DATA GNP4; SET JIM2.GNP; %CHKLIN(DATA=GNP4,CHECK=INVEST,DEP=GNP,IND=EXPORTS GOVT); RUN;