PRESERVE. SET printback=off. DEFINE !BCNON( SAMPLES=!TOKENS(1) !DEFAULT(1000) /VAR =!TOKENS(1) /ALPHA1 =!TOKENS(1) !DEFAULT(0.05) /ALPHA2 =!TOKENS(1) !DEFAULT(0.95) /DEBUG = !CMDEND !DEFAULT('N') ). **************************************************************************** * This macro produces a bootstrapped nonparametric confidence interval * for the variance of a variable on the active file. * The method is described in "An Introduction to the Bootstrap" by B Efron * and R J Tibshirani, Chapman & Hall, 1993, pages 184-188, describing the * BC method. * I am grateful to SPSS Inc for the resampling code, (see * http://www.spss.com/tech/answer/result.cfm?tech_tan_id=100000795 ) * and to Raynald Levesque for suggesting a method for getting the number * of observations in a file into a loop counter. * * A typical use of this macro might be:- * !BCNON SAMPLES=1000 VAR=hhinc ALPHA1=0.05 ALPHA2=0.95. * where * SAMPLES=1000 is the number of bootstrap samples to be drawn * VAR is the name of the variable for which the confidence interval is required * ALPHA1 and ALPHA2 specify the lower and upper limits required * * VAR is the only essential parameter; if the others are omitted * they will be replaced by defaults, so * !BCNON VAR=hhinc. * is the minimum specification. * * David Hitchin, University of Sussex, 7th March 2001 * ccfg4@sussex.ac.uk * ****************************************************************************. PRESERVE. !IF ( !DEBUG !EQ 'N') !THEN SET printback=off mprint off. !ELSE SET printback on mprint on. !IFEND . SET SEED=random. *---------------------------------------------------------------------------. * Save original active file to give back after macro is done. *---------------------------------------------------------------------------. !IF (!DEBUG !EQ 'N') !THEN SET RESULTS ON. DO IF $CASENUM=1. PRINT / "NOTE: ALL OUTPUT INCLUDING ERROR MESSAGES HAVE BEEN TEMPORARILY" / "SUPPRESSED. IF YOU EXPERIENCE UNUSUAL BEHAVIOR, RERUN THIS" / "MACRO WITH AN ADDITIONAL ARGUMENT /DEBUG='Y'." / "BEFORE DOING THIS YOU SHOULD RESTORE YOUR DATA FILE." / "THIS WILL FACILITATE FURTHER DIAGNOSIS OF ANY PROBLEMS.". END IF. !IFEND . SAVE OUTFILE='C:\\Temp\\rr__tmp1.sav'. SELECT IF (NOT MISSING(!VAR)). COMPUTE sample=1. COMPUTE id=$casenum. * A bit of trickery here to get number of cases into the input program loop. RANK VARIABLES=!VAR (A) /N INTO N. EXECUTE. DO IF $casenum=1. WRITE OUTFILE='C:\\Temp\\syntax.sps' /'DEFINE !NBCASES()'N '!ENDDEFINE.'. END IF. EXECUTE. INCLUDE FILE='C:\\Temp\\syntax.sps' . * DESCRIPTIVES VARIABLES=!VAR /STATISTICS=VARIANCE . SAVE OUTFILE='C:\\Temp\\temp1.sav'/keep=!VAR,sample,id. GET FILE='C:\\Temp\\temp1.sav'. AGGREGATE /OUTFILE='C:\\Temp\\sd.sav'/BREAK=sample/s = SD(!VAR). * The bootstrapping begins here. INPUT PROGRAM . LOOP SAMPLE=1 to !SAMPLES. LOOP N = 1 to !NBCASES. COMPUTE ID=TRUNC(UNIFORM(!NBCASES)) + 1. END CASE. LEAVE SAMPLE. END LOOP. END LOOP. END FILE. END INPUT PROGRAM . FORMATS ID SAMPLE N (F6.0). SORT CASES BY ID . MATCH FILES / FILE = * / TABLE 'C:\\Temp\\temp1.sav' / BY ID. AGGREGATE /OUTFILE=*/BREAK=sample/z = SD(!VAR). COMPUTE z=z*z. COMPUTE sample=1. MATCH FILES / FILE = * / TABLE 'C:\\Temp\\sd.sav' / BY sample. COMPUTE theta=s*s. COMPUTE thetals=(z < theta). AGGREGATE /OUTFILE='C:\\Temp\\theta.sav'/ BREAK=sample / stless=SUM(thetals) / thetabar=MEAN(z) / N_BREAK=N. MATCH FILES / FILE = * / TABLE 'C:\\Temp\\theta.sav' / BY sample. COMPUTE z0=stless/n_break. COMPUTE z0hat=IDF.NORMAL(z0,0,1). COMPUTE zalph=IDF.NORMAL(!ALPHA2,0,1). COMPUTE z1malph=IDF.NORMAL(!ALPHA1,0,1). COMPUTE t2=(thetabar-theta)*(thetabar-theta). COMPUTE t3=t2*(thetabar-theta). AGGREGATE /OUTFILE='C:\\Temp\\tt.sav'/ BREAK=sample / tt2=SUM(t2) / tt3=SUM(t3). MATCH FILES / FILE = * / TABLE 'C:\\Temp\\tt.sav' / BY sample. COMPUTE alphahat=tt3/ (6*tt2)**1.5 . COMPUTE a=z0hat+(z0hat+z1malph)/(1-alphahat*(z0hat+z1malph)). COMPUTE a1=CDF.NORMAL(a,0,1). COMPUTE a=z0hat+(z0hat+zalph)/(1-alphahat*(z0hat+zalph)). COMPUTE a2=CDF.NORMAL(a,0,1). COMPUTE pl=a1*100. COMPUTE pu=a2*100. COMPUTE cl=RND(n_break*a1). COMPUTE cu=RND(n_break*a2). PRINT FORMATS z to theta,thetabar,z0 to pu (f6.4)/ thetals,stless,cl,cu,n_break (f6.0)/ sample (f2.0). COMPUTE filter_$=($casenum=1). VARIABLE LABEL filter_$ '$casenum=1 (FILTER)'. VALUE LABELS filter_$ 0 'Not Selected' 1 'Selected'. FORMAT filter_$ (f1.0). FILTER BY filter_$. *DESCRIPTIVES VARIABLES=sample z s theta thetals stless thetabar n_break z0 z0hat zalph z1malph t2 t3 tt2 tt3 alphahat a a1 a2 pl pu cl cu /STATISTICS=MEAN STDDEV MIN MAX . FILTER OFF. USE ALL. SORT CASES BY z (A). IF ($casenum=cl) lci=z. IF ($casenum=cu) uci=z. AGGREGATE /OUTFILE=* /BREAK=sample /lci_1 = MIN(lci) /uci_1 = MAX(uci). RENAME VARIABLES (lci_1 =lci)(uci_1=uci). * SUMMARIZE /TABLES=lci uci /FORMAT=VALIDLIST NOCASENUM TOTAL LIMIT=100 * /TITLE='Case Summaries' /MISSING=VARIABLE /CELLS=COUNT . MATRIX. GET m /VARIABLES=lci,uci. GET s /FILE='C:\\Temp\\sd.sav' /VARIABLES=s. COMPUTE s=s*s. PRINT s /FORMAT="F14.6"/TITLE="Variance"/CLABELS=" ". PRINT m /FORMAT="F14.6"/TITLE="Confidence limits for variance"/CLABELS="Lower","Upper". COMPUTE lower=!ALPHA1. COMPUTE upper=!ALPHA2. PRINT lower/FORMAT="F7.3"/TITLE="Alpha1"/CLABELS=" ". PRINT upper/FORMAT="F7.3"/TITLE="Alpha2"/CLABELS=" ". END MATRIX. ERASE FILE='C:\\Temp\\temp1.sav'. ERASE FILE='C:\\Temp\\sd.sav'. ERASE FILE='C:\\Temp\\theta.sav'. ERASE FILE='C:\\Temp\\tt.sav'. ERASE FILE='C:\\Temp\\syntax.sps'. GET FILE='C:\\Temp\\rr__tmp1.sav'. RESTORE. !ENDDEFINE. RESTORE. * Test data from Efron, page 180. Test runs will not exactly * replicate the results in the book as different random samples * will be drawn in different runs. Data list free/zz. Begin data. 48 36 20 29 42 42 20 42 22 41 45 14 6 0 33 28 34 4 32 24 47 41 24 26 30 end data. !BCNON SAMPLES=2000 VAR=zz ALPHA1=0.05 ALPHA2=0.95.