******************************************************************; * PROJECT NAME: Coranova * INVESTIGATOR: Warren Bilker * * Written by: Warren Bilker, Colleen Brensinger ******************************************************************; /* The parameters to be input to the CORANOVA macro are described below. DATA is the name of the SAS dataset containing the data (default is the current SAS dataset). BETVAR is the name of the between (i.e. group) factor (1 or 2 in this case. The levels must always be numbered consecutively as 1,2,3,...). COMVAR is the name of the common variable which is correlated with variable representing each level of the within factor. WITHVARS are the within factor variables. PARTIAL are the names of the variables to adjust for in the correlation analysis (i.e. the variables that are partialled out). MBOOT is the number of bootstraps performed to estimate the covariance matrix of the correlated correlations. Recommend 300 as minimum, default is 300. MPERM is the number of permutations performed to estimate the tail area of distribution of the test statistic in order to estimate the p-value for each of the effects. Recommend 300 as minimum, default is 1000. TYPE indicates whether Pearson or Spearman correlation coefficients should be used. "P" indicates Pearson (the default) and "S" indicates Spearman. Here are two working examples: %CORANOVA(data=braindata, betvar=gender, comvar=verbalmem, withvars=frontal temporal subcortical); %CORANOVA(data=braindata, betvar=gender, comvar=verbalmem, withvars=frontal temporal subcortical, partial=age, mboot=300, mperm=1000, type=P); */ options nosyntaxcheck; %MACRO CORANOVA (DATA=_last_, BETVAR=g, COMVAR=c, WITHVARS=v1, PARTIAL=xyz__, MBOOT=500, MPERM=500, TYPE=P); %global nboot; %global nperm; %global f2levels; %global nwith; %global npart; %global partialvar; %global typecorr; %let nboot=&mboot; %let nperm=&mperm; %let dataname=%upcase(&data); %let betvar=%upcase(&betvar); %let comvar=%upcase(&comvar); %let withvars=%upcase(&withvars); %let withvars=%cmpres(&withvars); %let type=%upcase(&type); %let type=%cmpres(&type); %let typecorr=&type; %let partial=%upcase(&partial); %let partial=%cmpres(&partial); %let partialvar=1; %if &partial=XYZ__ %then %let partialvar=0; ** Create separate macro variables for each of the 'withvars' and count the number of within variables for purposes of removing any missing values before sending data to IML.; %let withtext=&withvars; %let nwith=0; %do %until (&withtext = STOPLOOP); %let nwith=%eval(&nwith+1); %global with&nwith; %let with&nwith=%scan(&withvars,&nwith,' '); %if (%length(&withtext) > %length(&&with&nwith)) %then %let withtext=%substr(&withtext,%eval(%length(&&with&nwith)+1)); %else %if (%length(&withtext) = %length(&&with&nwith)) %then %let withtext=STOPLOOP; %end; ** Create separate macro variables for each of the 'partial' and count the number of partial variables for purposes of removing any missing values before sending data to IML.; %let parttext=&partial; %let npart=0; %do %until (&parttext = STOPPART); %let npart=%eval(&npart+1); %let part&npart=%scan(&partial,&npart,' '); %if (%length(&parttext) > %length(&&part&npart)) %then %let parttext=%substr(&parttext,%eval(%length(&&part&npart)+1)); %else %if (%length(&parttext) = %length(&&part&npart)) %then %let parttext=STOPPART; %end; ** Count the total observations in the dataset; data getdata; set &data; xyz__=.; run; data getdata; set getdata(keep=&betvar &comvar &withvars &partial) nobs=last; call symput('TOTSAMP',left(last)); betvarc=&betvar || ''; run; proc sort data=getdata; by betvarc; run; ** count the observations with missing values and count the non-missing observations.; data getdata(drop=j); set getdata; missing=0; array allvar[&nwith] &withvars; array partvar[&npart] &partial; if betvarc='' then missing=1; if &comvar=. then missing=1; do j=1 to &nwith; if allvar[j]=. then missing=1; end; if &partialvar=1 then do; do k=1 to ∂̸ if partvar[k]=. then missing=1; end; end; run; proc freq data=getdata noprint; tables betvarc*missing / out=levels(drop=percent); run; data miss; set levels(where=(missing=1)); run; data notmiss; set levels(where=(missing=0)); run; data levels; merge miss(in=in1 drop=missing rename=(count=n_miss)) notmiss(in=in2 drop=missing rename=(count=n_used)); by betvarc; n_delete=0; if in1 and not in2 then n_used=0; if in2 and not in1 then n_miss=0; total_n=n_miss+n_used; if betvarc = '' then comment='** Removed from analysis (missing data)'; if n_used lt 3 then do; n_delete=n_used; n_used=0; if betvarc ne '' then comment='** Removed from analysis (n < 3)'; end; rename betvarc=&betvar; run; ** remove any observations with missing values; data getdata(drop=missing); set getdata(where=(missing=0)); run; ** Count the number of non-missing observations; data _null_; set getdata nobs=last; call symput('NONMISS',last); run; ** count the number of observations per level of the between variable; proc freq data=getdata noprint; tables betvarc / out=getnbet(keep=betvarc count); run; ** Remove any groups with less than 3 observations; data getdata(drop=count); merge getdata getnbet(keep=betvarc count); by betvarc; if count lt 3 then delete; run; ** count the number of levels of the between variable; proc freq data=getdata noprint; tables betvarc / out=getnbet(keep=betvarc); run; data _null_; set getnbet(where=(betvarc ne '')) nobs=last; call symput('F2LEVELS',last); run; *** Create the Between variable that is numeric and is ordered 1,2,3,...; proc sort data=getdata; by betvarc; run; data getdata; set getdata; by betvarc; if first.betvarc then between+1; mergevar=1; run; data _null_; set getdata nobs=last; call symput('USESAMP',last); run; %let misssamp=%sysevalf(&totsamp-&nonmiss); %let use_samp=%sysevalf(&usesamp-0); %let few_samp=%sysevalf(&nonmiss-&use_samp); %let partialname=&partial; %if &partial=XYZ__ %then %let partialname=; title1 "Between variable: &betvar"; title2 "Common variable: &comvar"; title3 "Within variables: &withvars"; title4 "Partial variables: &partialname"; title5 "The total sample size in &dataname is &totsamp"; title6 "There are &misssamp observations with missing values"; title7 "There are &few_samp observations deleted due to small within group sample size"; title8 "&use_samp observations used in the analysis"; %if &typecorr=P %then %do; title9 "PEARSON Correlation Coefficients"; %end; %else %if &typecorr=S %then %do; title9 "SPEARMAN Correlation Coefficients"; %end; %put _local_; %put _global_; proc print data=levels noobs; var &betvar total_n n_miss n_delete n_used comment; sum total_n n_miss n_delete n_used; run; *** Print out a Warning for groups having less than 5 observations; data warning(keep=warning); set levels(keep=n_used); if n_used > 0 and n_used < 5 then do; WARNING='There are less then 5 unique observations in at least one group.'; output; WARNING='This may not be sufficient for this program to run.'; output; WARNING=''; output; WARNING=''; output; end; run; proc print data=warning noobs; run; ** check that there is variability in the COMVAR and WITHVARS; proc means data=getdata mean var noprint; class &betvar; var &comvar &withvars; output out=checkvar var= &comvar &withvars; run; %let with=WITH; %let novariance=0; data _null_; set checkvar; if &comvar = 0 then do; novariance=1; call symput('NOVARIANCE', 1); output; end; %do i=1 %to &NWITH; if &&with&i = 0 then do; novariance=1; call symput('NOVARIANCE', 1); output; end; %end; run; %if &novariance=1 %then %goto novari; ** Print the correlation matrix that is being tested; %if &partialvar=1 %then %do; proc corr data=getdata out&typecorr=corrs(where=(_type_='CORR') rename=(_name_=withvar)) noprint; by &betvar; with &comvar; var &withvars; partial &partial; run; proc print data=corrs(drop=_type_) noobs; run; %end; %else %do; proc corr data=getdata out&typecorr=corrs(where=(_type_='CORR') rename=(_name_=withvar)) noprint; by &betvar; with &comvar; var &withvars; run; proc print data=corrs(drop=_type_) noobs; run; %end; *** Plot out the correlations by the levels of between variable; proc transpose data=corrs(drop=_type_) out=plotit(rename=(_name_=name)) prefix=corr; by &betvar; run; proc sort data=plotit; by name; run; data plotit; set plotit; by name; if first.name then withvar+1; retain withvar; betvarc=compress(&betvar || ''); run; proc sort data=plotit; by betvarc; run; data plotit; length legend $78; set plotit(rename=(corr1=corr)) nobs=last; by betvarc; if _n_=1 then legend='Legend: '; if first.betvarc and substr(betvarc,1,1)=plotpt then do; plotpt=substr(betvarc,2,1); legend=compress(legend) || trim(left(plotpt)) || "=" || trim(left(betvarc)) || ","; end; else if first.betvarc then do; plotpt=substr(betvarc,1,1); legend=compress(legend) || trim(left(plotpt)) || "=" || trim(left(betvarc)) || ","; end; retain plotpt legend; label name='Within Variable'; if _n_=last then call symput('LEG',legend); run; proc plot data=plotit; title10 "&leg"; plot corr*name = plotpt / vaxis=(-1 to 1 by 0.5) vref=0 hpos=70 vpos=40; run; ********* Center the Means *********; proc means data=getdata noprint; class &betvar; var &comvar; output out=means1 mean=commean; run; data means1; set means1; mergevar=1; run; data getdata; merge getdata means1(keep=_type_ mergevar commean where=(_type_=0) rename=(commean=ocommean)); by mergevar; run; proc sort data=getdata; by &betvar; run; data getdata; merge getdata means1(keep=_type_ &betvar commean where=(_type_=1) rename=(commean=bcommean)); by &betvar; run; data trans(keep=&betvar &comvar which withvar); set getdata; array with[&nwith] &withvars; do which=1 to &nwith; withvar=with[which]; output; end; run; proc means data=trans noprint; class &betvar which; var withvar; output out=means2 mean=withmean; run; data means2; set means2; mergevar=1; run; proc transpose data=means2(where=(_type_=3)) out=means2t prefix=withm; by &betvar; var withmean; run; data getdata; merge getdata means2(keep=_type_ mergevar withmean where=(_type_=0) rename=(withmean=owthmean)); by mergevar; run; data getdata; merge getdata means2t(drop=_name_); by &betvar; &comvar = &comvar + (ocommean - bcommean); array with[&nwith] &withvars; array withm[&nwith] withm1-withm&nwith; do i=1 to &nwith; with[i] = with[i] + (owthmean - withm[i]); end; run; ********* Centered Means *********; data getdata; ** reposition the variables in the SAS dataset so that between variable is first and common variable is second (keeping only the needed vars).; length between 8 &comvar 8 &withvars 8 &partial 8; set getdata(keep=between &comvar &withvars &partial); run; title10 "&mperm permutations and &mboot bootstrap iterations used"; %CORAOV; data testcomp; length message $75; set testcomp; if _n_=1 and col1=0 then message='P-value for between factor not calculated'; if _n_=2 and col1=0 then message='P-value for within factor not calculated'; if _n_=3 and col1=0 then message='P-value for within*between interaction not calculated'; output; if _n_=3 then do; message=''; output; end; if _n_=3 then do; message='could be due to either too small of a within group sample size,'; output; end; if _n_=3 then do; message='only one between group, only one within factor, or too little variability'; output; end; if _n_=3 then do; message=' '; output; end; if _n_=3 then do; message='ERRORs in log file are due to at least one of the tests not being computed'; output; end; if _n_=3 then do; message='and are not problematic for p-values that were computed.'; output; end; run; proc print data=testcomp noobs; where col1=0; var message; run; %if &f2levels=1 %then %do; %if &partialvar=1 %then %do; %do l=1 %to &nwith; proc reg data=getdata noprint; model &&with&l = &partial; output out=try&l(keep=resids&l) r=resids&l; run; data try&l; set try&l; mergevar=_n_; run; %if &l=1 %then %do; data all; set try&l; run; %end; %else %do; data all; merge all try&l; by mergevar; run; %end; %end; data OFdata; set all(drop=mergevar); run; %end; %else %do; data OFdata; set getdata(drop=between &partial); run; %end; %muecorna(in_data=OFdata, in_flag=1, in_cntr=getdata, out_r=out_r, out_cntr=out_cntr, out_kp=out_kp); title10 "Olkin & Finn p-value for Overall Within Effect"; * only print out the first record, this is the overall test for within effect; * the rest are individual p-values from the contrast matrix; proc print data=out_kp (obs=1) noobs; run; %end; %novari: %if &novariance=1 %then %do; title9; title10 'Execution Stopped: No Variability in either COMMON variable or WITHIN variables'; proc means data=getdata mean var; class &betvar; var &comvar &withvars; run; %end; %MEND CORANOVA; %MACRO CORAOV; proc iml; * procedure to generate correlations ; start; errstop=1; errcode = {"if errstop=1 then do;", "call push(errcode);", "resume;", "end;"}; call push(errcode); tests={0,0,0}; * print tests; create testcomp from tests; append from tests; start cor(x,y) global(novar); errstop=1; errcode = {"if errstop=1 then do;", "call push(errcode);", "resume;", "end;"}; novar=0; n=ncol(x); row = nrow(x); *** Replace Y and X with ranks for calculating Spearman correlation; %if &typecorr=S %then %do; y = ranktie(y); do loopx=1 to n; x[,loopx] = ranktie(x[,loopx]); end; %end; sumy = sum(y); tmp1=t(y)*y; tmp2=sumy*sumy/row; if tmp1 > tmp2 then sy2 = sqrt((t(y)*y)-sumy*sumy/row); if sy2=0 then novar=1; sumx = x[+,]; z=repeat(0,n,1); do loop=1 to n; z[loop]=t(x[,loop])*y-sumx[loop]*sumy/row; tmp3=t(x[,loop])*x[,loop]; tmp4=sumx[loop]*sumx[loop]/row; if tmp3 > tmp4 then chkvar=sqrt(t(x[,loop])*x[,loop]-sumx[loop]*sumx[loop]/row)*sy2; if chkvar=0 | tmp3 < tmp4 then novar=1; if novar ^= 1 then z[loop]=z[loop]/(sqrt(t(x[,loop])*x[,loop]-sumx[loop]*sumx[loop]/row)* sy2); end; return(z); finish cor; start corz(x,y) global(novar); errstop=1; errcode = {"if errstop=1 then do;", "call push(errcode);", "resume;", "end;"}; novar=0; n=ncol(x); row = nrow(x); *** Replace Y and X with ranks for calculating Spearman correlation; %if &typecorr=S %then %do; y = ranktie(y); do loopx=1 to n; x[,loopx] = ranktie(x[,loopx]); end; %end; sumy = sum(y); tmp1=t(y)*y; tmp2=sumy*sumy/row; if tmp1 > tmp2 then sy2 = sqrt((t(y)*y)-sumy*sumy/row); if sy2=0 then novar=1; sumx = x[+,]; z=repeat(0,n,1); do loop=1 to n; z[loop]=t(x[,loop])*y-sumx[loop]*sumy/row; tmp3=t(x[,loop])*x[,loop]; tmp4=sumx[loop]*sumx[loop]/row; if tmp3 > tmp4 then chkvar=sqrt(t(x[,loop])*x[,loop]-sumx[loop]*sumx[loop]/row)*sy2; if chkvar=0 | tmp3 < tmp4 then novar=1; if novar ^= 1 then z[loop]=z[loop]/(sqrt(t(x[,loop])*x[,loop]-sumx[loop]*sumx[loop]/row)* sy2); z[loop]=0.5*log((1+z[loop])/(1-z[loop])); end; return(z); finish corz; * procedure to generate variance/covariance structure ; start var(x); errstop=1; errcode = {"if errstop=1 then do;", "call push(errcode);", "resume;", "end;"}; n=nrow(x); sum=x[+,]; xpx=(t(x)*x-t(sum)*sum/n)/n; return(xpx); finish var; * read data; use getdata; read all into alldat; tvars = ncol(alldat); nvars = &nwith + 2; *print alldat; f1levels = &nwith; f2levels = &f2levels; nboot=&nboot; nperm=&nperm; ****** GENERATE CORRELATION AND VAR/COV STRUCTURE **; ****** FROM DATA TO GET THE 3 STATISTICS **; ****** USED TO OBTAIN THE PVALUES AFTER PERMUTING DATA **; *** Replace Common and Within variables with Residuals after partialling out the Partial variables.; if &partialvar=1 then do; do r=1 to f2levels; c = alldat[loc(alldat[,1] =r), 2]; w = alldat[loc(alldat[,1] =r), 3:nvars]; p = alldat[loc(alldat[,1] =r), 3+&nwith:tvars]; jvec=j(nrow(p), 1, 1); p = jvec || p; reg_p = p * inv(t(p)*p) * t(p); c_hat = reg_p * c; w_hat= reg_p * w; if r=1 then cordat = alldat[loc(alldat[,1] =r),1] || (c - c_hat) || (w - w_hat); else cordat=cordat // (alldat[loc(alldat[,1] =r),1] || (c - c_hat) || (w - w_hat)); end; end; else cordat=alldat[,1:nvars]; ***; do i=1 to f2levels; left = cordat[loc(cordat[,1] =i),3:nvars]; right = cordat[loc(cordat[,1]=i),2]; if i=1 then do; temp= cor(left,right); if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; end; else temp=temp//cor(left,right); if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; end; cvec_j =temp; cvec_jz=0.5*log((1+temp)/(1-temp)); bc= shape(0,nboot,(nvars-2)*f2levels); bcz=bc; do j=1 to nboot; do k=1 to f2levels; sub = cordat[loc(cordat[,1]=k), 1:nvars]; samsize = nrow(sub); NEXTSAMP: bs_sub=sub[int(uniform( repeat(0,1,samsize))*samsize)+1,]; ** check for at least 3 unique observations; if ncol(unique(bs_sub[,2])) < 3 then goto NEXTSAMP; if k=1 then bs=bs_sub; else bs = bs//bs_sub; end; do i=1 to f2levels; temp=cor(bs[loc(bs[,1]=i),3:nvars],bs[loc(bs[,1]=i),2]); if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; if i=1 then temp1=temp; else temp1 =temp1//temp; end; temp1z=0.5*log((1+temp1)/(1-temp1)); bc[j,] = t(temp1); bcz[j,] = t(temp1z); end; vmat_j = var(bc); vmat_jz = var(bcz); * between contrast ; one = t(repeat(1,f1levels)); onemore = t(repeat(-1,f1levels)); zero = t(repeat(0,f1levels)); do; skipped=0; if f2levels=1 then do; print "Only 1 Between Group -- cannot compute between or interaction contrast"; skipped=1; goto skipbetw; end; else do i=1 to (f2levels-1); row = one; do j=2 to f2levels; temp = zero; if j=i + 1 then temp = onemore; row = row||temp; end; if i=1 then cont1_j=row; else cont1_j = cont1_j // row; end; %if &typecorr=P %then %do; v = t( cont1_j*cvec_jz ); ** Fisher between; csquare1 = v * inv(cont1_j*vmat_jz*t(cont1_j)) * t(v); ** Fisher between; %end; %else %if &typecorr=S %then %do; v = t( cont1_j*cvec_j ); ** NO Fisher between; csquare1 = v * inv(cont1_j*vmat_j*t(cont1_j)) * t(v); ** NO Fisher between; %end; * within contrast ; skipbetw: do i=1 to f2levels; temp = shape(1,f1levels-1,1)||(-I((f1levels-1))); if i=1 then cont2_j=temp; else cont2_j = cont2_j||temp; end; %if &typecorr=P %then %do; v = t( cont2_j*cvec_jz ); ** Fisher within; csquare2 = v * inv(cont2_j*vmat_jz*t(cont2_j)) * t(v); ** Fisher within; %end; %else %if &typecorr=S %then %do; v = t( cont2_j*cvec_j ); ** NO Fisher within; csquare2 = v * inv(cont2_j*vmat_j*t(cont2_j)) * t(v); ** NO Fisher within; %end; * interaction contrast; if skipped=1 then goto theend; first = shape(1,f1levels-1,1)||(-I((f1levels-1))); onemore = -(shape(1,f1levels-1,1)||(-I((f1levels-1)))); zero = shape(0,f1levels-1,f1levels); do i=1 to f2levels - 1 ; row = first; do j= 2 to f2levels; temp = zero; if j= i + 1 then temp = onemore; row = row||temp; end; if i=1 then cont3_j=row; else cont3_j =cont3_j//row; end; v = t( cont3_j*cvec_j ); ** NO Fisher interaction; csquare3 = v * inv(cont3_j*vmat_j*t(cont3_j)) * t(v); ** NO Fisher interaction; * v = t( cont3_j*cvec_jz ); ** Fisher interaction; * csquare3 = v * inv(cont3_j*vmat_jz*t(cont3_j)) * t(v); ** Fisher interaction; theend: end; ****** PERMUTATION METHOD *******; DO; ******* BETWEEN ****** ; * permute no between effect, under null; if skipped=1 then goto permwith; do b=1 to nperm; between = alldat[,1]; ls=nrow(between); scram_s = uniform(repeat(0,ls,1)); position = rank(scram_s); between=between[position]; permute1t = between||alldat[,2:tvars]; *** Replace Common and Within variables with Residuals after partialling out the Partial variables.; if &partialvar=1 then do; do r=1 to f2levels; c = permute1t[loc(permute1t[,1] =r), 2]; w = permute1t[loc(permute1t[,1] =r), 3:nvars]; p = permute1t[loc(permute1t[,1] =r), 3+&nwith:tvars]; jvec=j(nrow(p), 1, 1); p = jvec || p; reg_p = p * inv(t(p)*p) * t(p); c_hat = reg_p * c; w_hat= reg_p * w; if r=1 then permute1 = permute1t[loc(permute1t[,1] =r),1] || (c - c_hat) || (w - w_hat); else permute1=permute1 // (permute1t[loc(permute1t[,1] =r),1] || (c - c_hat) || (w - w_hat)); end; end; else permute1=permute1t[,1:nvars]; ***; * create 'cvec_j' (correlation) - for between ; do i=1 to f2levels; left = permute1[loc(permute1[,1] =i),3:nvars]; right = permute1[loc(permute1[,1]=i),2]; if i=1 then do; %if &typecorr=P %then %do; temp= corz(left,right); ** Fisher between; %end; %else %if &typecorr=S %then %do; temp= cor(left,right); ** NO Fisher between; %end; if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; end; %if &typecorr=P %then %do; else temp=temp//corz(left,right); ** Fisher between; %end; %else %if &typecorr=S %then %do; else temp=temp//cor(left,right); ** NO Fisher between; %end; if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; end; cvec_j =temp; * bootstrap of permuted between; bc= shape(0,nboot,(nvars-2)*f2levels); do j=1 to nboot; do k=1 to f2levels; sub = permute1[loc(permute1[,1]=k),]; samsize = nrow(sub); NEXTSAMP1: bs_sub=sub[int(uniform( repeat(0,1,samsize))*samsize)+1,]; ** check for at least 3 unique observations; if ncol(unique(bs_sub[,2])) < 3 then goto NEXTSAMP1; if k=1 then bs=bs_sub; else bs = bs//bs_sub; end; do i=1 to f2levels; %if &typecorr=P %then %do; temp=corz(bs[loc(bs[,1]=i),3:nvars],bs[loc(bs[,1]=i),2]); ** Fisher between; %end; %else %if &typecorr=S %then %do; temp=cor(bs[loc(bs[,1]=i),3:nvars],bs[loc(bs[,1]=i),2]); ** NO Fisher between; %end; if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; if i=1 then temp1=temp; else temp1 =temp1//temp; end; bc[j,] = t(temp1); end; * variance from bootsrapped permuted data; vmat_j = var(bc); v = t( cont1_j*cvec_j ); stat1 = v * inv(cont1_j*vmat_j*t(cont1_j)) * t(v); allstat1 = allstat1//stat1; end; ******* WITHIN ****** ; * permute no within effect, under null; permwith: do b=1 to nperm; permute2t=alldat; nalldat=nrow(alldat); do i=1 to nalldat; within = alldat[i,3:nvars]; scram_r = uniform(repeat(0,1,f1levels)); position = rank(scram_r); within=within[position]; temp=t(alldat[i,1:2])//within//t(alldat[i,3+&nwith:tvars]); permute2t[i,] = t(temp); end; *** Replace Common and Within variables with Residuals after partialling out the Partial variables.; if &partialvar=1 then do; do r=1 to f2levels; c = permute2t[loc(permute2t[,1] =r), 2]; w = permute2t[loc(permute2t[,1] =r), 3:nvars]; p = permute2t[loc(permute2t[,1] =r), 3+&nwith:tvars]; jvec=j(nrow(p), 1, 1); p = jvec || p; reg_p = p * inv(t(p)*p) * t(p); c_hat = reg_p * c; w_hat= reg_p * w; if r=1 then permute2 = permute2t[loc(permute2t[,1] =r),1] || (c - c_hat) || (w - w_hat); else permute2=permute2 // (permute2t[loc(permute2t[,1] =r),1] || (c - c_hat) || (w - w_hat)); end; end; else permute2=permute2t[,1:nvars]; ***; * create 'cvec_j' (correlation) - for within ; do i=1 to f2levels; left = permute2[loc(permute2[,1] =i),3:nvars]; right = permute2[loc(permute2[,1]=i),2]; if i=1 then do; %if &typecorr=P %then %do; temp= corz(left,right); ** Fisher within; %end; %else %if &typecorr=S %then %do; temp= cor(left,right); ** NO Fisher within; %end; if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; end; %if &typecorr=P %then %do; else temp=temp//corz(left,right); ** Fisher within; %end; %else %if &typecorr=S %then %do; else temp=temp//cor(left,right); ** NO Fisher within; %end; if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; end; cvec_j =temp; * bootstrap of permuted WITHIN; bc= shape(0,nboot,(nvars-2)*f2levels); do j=1 to nboot; do k=1 to f2levels; sub = permute2[loc(permute2[,1]=k),]; samsize = nrow(sub); NEXTSAMP2: bs_sub=sub[int(uniform( repeat(0,1,samsize))*samsize)+1,]; ** check for at least 3 unique observations; if ncol(unique(bs_sub[,2])) < 3 then goto NEXTSAMP2; if k=1 then bs=bs_sub; else bs = bs//bs_sub; end; do i=1 to f2levels; %if &typecorr=P %then %do; temp=corz(bs[loc(bs[,1]=i),3:nvars],bs[loc(bs[,1]=i),2]); ** Fisher within; %end; %else %if &typecorr=S %then %do; temp=cor(bs[loc(bs[,1]=i),3:nvars],bs[loc(bs[,1]=i),2]); ** NO Fisher within; %end; if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; if i=1 then temp1=temp; else temp1 =temp1//temp; end; bc[j,] = t(temp1); end; * variance from bootsrapped permuted data; vmat_j = var(bc); v = t( cont2_j*cvec_j ); stat2 = v * inv(cont2_j*vmat_j*t(cont2_j)) * t(v); allstat2 = allstat2//stat2; end; ******* BETWEEN*WITHIN INTERACTION ****** ; * permute no interaction effect, under null; if skipped=1 then goto theend2; do b=1 to nperm; between = alldat[,1]; ls=nrow(between); scram_s = uniform(repeat(0,ls,1)); position = rank(scram_s); between=between[position]; permute3t = between||alldat[,2:tvars]; nalldat = nrow(alldat); do i=1 to nalldat; within = permute3t[i,3:nvars]; scram_r = uniform(repeat(0,1,f1levels)); position = rank(scram_r); within=within[position]; temp=t(permute3t[i,1:2])//within//t(permute3t[i,3+&nwith:tvars]); permute3t[i,] = t(temp); end; *** Replace Common and Within variables with Residuals after partialling out the Partial variables.; if &partialvar=1 then do; do r=1 to f2levels; c = permute3t[loc(permute3t[,1] =r), 2]; w = permute3t[loc(permute3t[,1] =r), 3:nvars]; p = permute3t[loc(permute3t[,1] =r), 3+&nwith:tvars]; jvec=j(nrow(p), 1, 1); p = jvec || p; reg_p = p * inv(t(p)*p) * t(p); c_hat = reg_p * c; w_hat= reg_p * w; if r=1 then permute3 = permute3t[loc(permute3t[,1] =r),1] || (c - c_hat) || (w - w_hat); else permute3=permute3 // (permute3t[loc(permute3t[,1] =r),1] || (c - c_hat) || (w - w_hat)) ; end; end; else permute3=permute3t[,1:nvars]; ***; * create 'cvec_j' (correlation) - for interaction ; do i=1 to f2levels; left = permute3[loc(permute3[,1] =i),3:nvars]; right = permute3[loc(permute3[,1]=i),2]; if i=1 then do; temp= cor(left,right); ** NO Fisher interaction; * temp= corz(left,right); ** Fisher interaction; if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; end; else temp=temp//cor(left,right); ** NO Fisher interaction; * else temp=temp//corz(left,right); ** Fisher interaction; if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; end; cvec_j =temp; * bootstrap of permuted interaction; bc= shape(0,nboot,(nvars-2)*f2levels); do j=1 to nboot; do k=1 to f2levels; sub = permute3[loc(permute3[,1]=k),]; samsize = nrow(sub); NEXTSAMP3: bs_sub=sub[int(uniform( repeat(0,1,samsize))*samsize)+1,]; ** check for at least 3 unique observations; if ncol(unique(bs_sub[,2])) < 3 then goto NEXTSAMP3; if k=1 then bs=bs_sub; else bs = bs//bs_sub; end; do i=1 to f2levels; temp=cor(bs[loc(bs[,1]=i),3:nvars],bs[loc(bs[,1]=i),2]); ** NO Fisher interaction; * temp=corz(bs[loc(bs[,1]=i),3:nvars],bs[loc(bs[,1]=i),2]); ** Fisher interaction; if novar=1 then do; print "Execution Stopped -- No Variability"; goto novary; end; if i=1 then temp1=temp; else temp1 =temp1//temp; end; bc[j,] = t(temp1); end; * variance from bootsrapped permuted data; vmat_j = var(bc); v = t( cont3_j*cvec_j ); stat3 = v * inv(cont3_j*vmat_j*t(cont3_j)) * t(v); allstat3 = allstat3//stat3; end; theend2: END; * CALCULATE TAIL AREA, UNDER NULL; p_b=.; p_w=.; p_bw=.; if f2levels > 1 then do; num1 = sum(allstat1>csquare1); p_b = num1/nperm; if p_b>0 then tests[1,1]=1; end; num2 = sum(allstat2>csquare2); p_w = num2/nperm; if f1levels=1 then p_w=.; ** if only one within variable, then set p_w to missing; if p_w>0 then tests[2,1]=1; if f2levels > 1 then do; num3 = sum(allstat3>csquare3); p_bw = num3/nperm; if p_bw>0 then tests[3,1]=1; end; pvals= p_b || p_w || p_bw; print 'P-value for between factor: ' p_b; print 'P-value for within factor: ' p_w; print 'P-value for within*between interaction: ' p_bw; edit testcomp; col1=tests[1,1]; replace point 1; edit testcomp; col1=tests[2,1]; replace point 2; edit testcomp; col1=tests[3,1]; replace point 3; novary: errstop=-1; finish; run; %MEND CORAOV; %************************************************************************; %* *; %* MACRO: muecorna *; %* *; %* USAGE: %muecorna(in_data, in_flag, in_cntr, out_r, out_cntr, out_kp)*; %* *; %* DESCRIPTION: *; %* has M cols and N rows as input. Col is variable. Row *; %* is observation. N is the sample size. *; %* a flag which assumes value *; %* 0 : use user supplied as contrast matrix*; %* 1 : use O&F default contrast matrix *; %* 2 : use BBL default contrast matrix *; %* is a contrast matrix. It must be (M-1)*M. It will be *; %* ignored if is not 0. *; %* is the calculated correlation matrix (M-1)*(M-1). *; %* is the contrast matrix used. *; %* is the calculated Chi2 and Pvalue which has 2 cols and *; %* M rows. The first row is overall (Chi2, Pvalue). The *; %* rest are individual (Chi2, Pvalue) in the order of rows *; %* of contrast matrix. *; %* *; %* HISTORY: *; %* WHO WHEN WHY *; %* *; %* S. Xu 4/8/96 Initial version *; %* *; %************************************************************************; %macro muecorna(in_data, in_flag, in_cntr, out_r, out_cntr, out_kp); proc iml; /************************************************************************/ /* _Pearson: */ /* Pearson formula for correlation */ /* */ /* Input: */ /* x : a n*2 matrix */ /* */ /* Output: */ /* r : the correlation value (return value) */ /* */ /* History: */ /* WHO WHEN WHY */ /* S. Xu 6/12/95 Initial version */ /************************************************************************/ start _pearson(x); n_row = nrow(x); n_col = ncol(x); if n_col ^= 2 then do; print ,"ERROR (in _pearson): x must have 2 cols!!!"; return(-10); end; x_ = sum(x[,1]) / n_row; y_ = sum(x[,2]) / n_row; xi = x[,1] - x_; yi = x[,2] - y_; r = sum(xi # yi) / sqrt(sum(xi##2) * sum(yi##2)); return(r); finish; /************************************************************************/ /* _mkcov1a: */ /* Make a co-variance matrix from a correlation matrix based on */ /* Olkin & Finn equation 1 model A. */ /* */ /* Input: */ /* r : a m*m matrix */ /* n : sample size */ /* */ /* Output: */ /* cov1 : the co-variance matrix (return value) */ /* */ /* History: */ /* WHO WHEN WHY */ /* S. Xu 4/8/95 Initial version */ /************************************************************************/ start _mkcov1a (r, n); nc = ncol(r); nr = nrow(r); if (nc ^= nr) then do; print ,"ERROR (in _mkcov1a): R must be a square matrix!!!"; return; end; cov1a = j(nr - 1, nc - 1); do i = 1 to nr - 1; do j = i to nc - 1; if (i = j) then do; cov1a[i,j] = ((1 - r[1, i+1]##2)##2) / n; end; else do; cov1a[i,j] = ((r[i+1,j+1] - 0.5*r[1,i+1]*r[1,j+1]) * (1 - r[1,i+1]##2 - r[1,j+1]##2) + 0.5*r[1,i+1]*r[1,j+1]*r[i+1,j+1]##2) / n; cov1a[j,i] = cov1a[i,j]; end; end; end; return (cov1a); finish; /************************************************************************/ /* _mca: */ /* Calculate Chi2 and PValue based on Olkin & Finn method */ /* */ /* Input: */ /* r : a m*m correlation matrix */ /* cov1 : a (m-1)*(m-1) co-variance matrix */ /* y : a x*(m-1) contrast matrix (x = 1...(m-2)) */ /* n : sample size */ /* */ /* Output: */ /* k2 : the chi-2 value (return value) */ /* pvalue : the pvalue */ /* */ /* History: */ /* WHO WHEN WHY */ /* S. Xu 4/8/95 Initial version */ /************************************************************************/ start _mca (cov1, pvalue, r, y, n); nr_r = nrow(r); nc_r = ncol(r); nr_y = nrow(y); nc_y = ncol(y); if (nc_r ^= nr_r) then do; print ,"ERROR (in _mca): R must be a square matrix!!!"; return; end; if (nc_y ^= nr_r-1) then do; print ,"ERROR (in _mca): dim(R) ^= dim(Y) + 1 !!!"; return; end; /* compute v from y and r */ r1 = j(1, nr_r-1); do i = 1 to nr_r-1; r1[1,i] = r[i+1,1]; end; v = j(nr_y, 1); df = nr_y; do i = 1 to nr_y; v[i,1] = sum(r1#y[i,]); end; cov2 = y * cov1 * t(y); k2 = t(v) * inv(cov2) * v; pvalue = 1.0 - probchi (k2, df); return (k2); finish; use &in_data; read all var _num_ into x; /* calculate correlation matrix */ nc_x = ncol(x); nr_x = nrow(x); m = j(nr_x, 2); r = j(nc_x, nc_x, 1); do i = 1 to nc_x - 1; m[,1] = x[,i]; do j = i+1 to nc_x; m[,2] = x[,j]; r[i,j] = _pearson(m); r[j,i] = r[i,j]; end; end; /* degree of freedom */ df = nc_x - 2; /* case 0: the user supplied contrast matrix */ if &in_flag = 0 then do; use &in_cntr; read all var _num_ into y; end; /* case 1: the O&F default contrast matrix */ else if &in_flag = 1 then do; y = j(df, df + 1, 0); do i = 1 to df; y[i,1] = -1; y[i,i+1] = 1; end; end; /* case 2: the BBL default contrast matrix */ else do; y = j(df, df + 1, -1/df); do i = 1 to df; y[i,i] = 1; end; end; create _out_r from r; append from r; create _out_cnt from y; append from y; pvalue = 0; size = nr_x; cov1 = _mkcov1a (r, size); kp = j(df+1, 2, 0.0); /* overall k2 and pvalue */ k2 = _mca (cov1, pvalue, r, y, size); kp[1,1] = k2; kp[1,2] = pvalue; /* individual k2 and pvalue */ do i = 1 to df; yi = y[i,]; k2 = _mca (cov1, pvalue, r, yi, size); kp[i+1,1] = k2; kp[i+1,2] = pvalue; end; create _out_kp from kp; append from kp; quit; data &out_r; set _out_r; data &out_cntr; set _out_cnt; data &out_kp (rename=(col1=Chi_2 col2=P_value)); set _out_kp; run; %mend muecorna; *** Note that the default method hardcoded is to use Fisher's z transformation for the Within and Between effect calculations and NOT to use it for the Interaction effect. To change this, one must change whether cvec_j or cvec_jz and whether vmat_j or vmat_jz is used in the calculation of csquare1, csquare1, and csquare3 (test statistics based on original data), and whether the 'cor' or 'corz' procedure is used for the permutation steps. ***;