* CI for the difference between two medians by Hodges-Lehmann method: Hodges JR and Lehmann EL (1963) 'Estimates of location based on rank tests' Annals of Mathematical Statistics 34, 598-611. * Improved sorting algorithm added 03/16/05 *. * Although the example provided has equal sample sizes, it will work with unequal sample sizes, too. * Code by Marta Garcia-Granero. * Example dataset *. DATA LIST FREE /group(F4.0) wghtgain(F8.0). BEGIN DATA 1 175 1 149 1 132 1 187 1 218 1 123 1 151 1 248 1 200 1 206 1 219 1 179 1 234 1 206 2 142 2 214 2 311 2 249 2 337 2 176 2 262 2 211 2 302 2 216 2 195 2 236 2 253 2 199 END DATA. VAR LABEL group 'Treatment group' /wghtgain 'Weight gain (pounds)'. VALUE LABELS group 1 'Control' 2 'Vit. A'. ************************************************ * CI CODE * ************************************************ * If n1*n2>10000, change MXLOOPS accordingly. * Data must be presorted by group and depvar. * A temporary listwise deletion of missing data is done before running MATRIX *. SET MXLOOPS=10000. SORT CASES BY group (A) wghtgain (A) . TEMPORARY. SELECT IF ((NOT MISSING(group)) AND (NOT MISSING(wghtgain))). MATRIX. PRINT /TITLE 'HODGES-LEHMANN CONFIDENCE INTERVAL FOR THE DIFFERENCE BETWEEN TWO MEDIANS'. PRINT /TITLE 'Underlying assumption: the distributions are similar in shape'. * Get data: replace variable names by your own *. GET group /VAR=group. GET depvar /VAR=wghtgain. * Get group values, count sample sizes & split data in two groups *. COMPUTE totaln=NROW(group). COMPUTE ngrp1=group(1). COMPUTE ngrp2=group(totaln). PRINT {ngrp1;ngrp2} /TITLE='Group values' /RLABELS='Group A=' 'Group B=' /FORMAT='f5.0'. COMPUTE n=CSUM(DESIGN(group)). PRINT {T(n)} /FORMAT='f5.0' /TITLE='Sample sizes' /RLABELS=' N(a)=' ' N(b)='. DO IF RMIN(n) GT 1. /* Don't compute CI if any ni=1. COMPUTE group1=MAKE(1,n(1),0). COMPUTE group2=MAKE(1,n(2),0). LOOP i=1 TO n(1). + COMPUTE group1(i)=depvar(i). END LOOP. LOOP i=1+n(1) TO totaln. + COMPUTE group2(i-n(1))=depvar(i). END LOOP. RELEASE ngrp1,ngrp2,group,depvar,totaln. * Five points summary for both samples *. COMPUTE min1=RMIN(group1). COMPUTE max1=RMAX(group1). COMPUTE pair=(TRUNC(n(1)/2) EQ n(1)/2). /* Check if n(1) is odd (0) or even (1) *. DO IF pair EQ 1. /* Tukey's hinges for even samples *. - COMPUTE lmedian=group1(n(1)/2). - COMPUTE umedian=group1(1+n(1)/2). - COMPUTE median1=(lmedian+umedian)/2. - RELEASE lmedian,umedian. - DO IF TRUNC(n(1)/4) NE (n(1)/4)). - COMPUTE q11=group1(1+(n(1)/2)/2). - COMPUTE q31=group1(1+(3*n(1)/2)/2). - ELSE. - COMPUTE lq11=group1(n(1)/4). - COMPUTE uq11=group1(1+n(1)/4). - COMPUTE lq31=group1(3*n(1)/4). - COMPUTE uq31=group1(1+3*n(1)/4). - COMPUTE q11=(lq11+uq11)/2. - COMPUTE q31=(lq31+uq31)/2. - RELEASE lq11,uq11,lq31,uq31. - END IF. ELSE. /* Tukey's hinges for odd samples *. - COMPUTE median1=group1((n(1)+1)/2). - DO IF TRUNC((1+n(1))/4) EQ (1+n(1))/4). - COMPUTE lq11=group1((n(1)+1)/4). - COMPUTE uq11=group1(1+(n(1)+1)/4). - COMPUTE lq31=group1(3*(n(1)+1)/4-1). - COMPUTE uq31=group1(3*(n(1)+1)/4). - COMPUTE q11=(lq11+uq11)/2. - COMPUTE q31=(lq31+uq31)/2. - RELEASE lq11,uq11,lq31,uq31. - ELSE. - COMPUTE q11=group1(((1+(n(1)+1)/2))/2). - COMPUTE q31=group1(((3*(n(1)+1)/2)-1)/2). - END IF. END IF. COMPUTE min2=RMIN(group2). COMPUTE max2=RMAX(group2). COMPUTE pair=(TRUNC(n(2)/2) EQ n(2)/2). /* Check if n(2) is odd (0) or even (1) *. DO IF pair EQ 1. /* Tukey's hinges for even samples *. - COMPUTE lmedian=group2(n(2)/2). - COMPUTE umedian=group2(1+n(2)/2). - COMPUTE median2=(lmedian+umedian)/2. - RELEASE lmedian,umedian. - DO IF TRUNC(n(2)/4) NE (n(2)/4)). - COMPUTE q12=group2(1+(n(2)/2)/2). - COMPUTE q32=group2(1+(3*n(2)/2)/2). - ELSE. - COMPUTE lq12=group2(n(2)/4). - COMPUTE uq12=group2(1+n(2)/4). - COMPUTE lq32=group2(3*n(2)/4). - COMPUTE uq32=group2(1+3*n(2)/4). - COMPUTE q12=(lq12+uq12)/2. - COMPUTE q32=(lq32+uq32)/2. - RELEASE lq12,uq12,lq32,uq32. - END IF. ELSE. /* Tukey's hinges for odd samples *. - COMPUTE median2=group2((n(2)+1)/2). - DO IF TRUNC((1+n(2))/4) EQ (1+n(2))/4). - COMPUTE lq12=group2((n(2)+1)/4). - COMPUTE uq12=group2(1+(n(2)+1)/4). - COMPUTE lq32=group2(3*(n(2)+1)/4-1). - COMPUTE uq32=group2(3*(n(2)+1)/4). - COMPUTE q12=(lq12+uq12)/2. - COMPUTE q32=(lq32+uq32)/2. - RELEASE lq12,uq12,lq32,uq32. - ELSE. - COMPUTE q12=group2(((1+(n(2)+1)/2))/2). - COMPUTE q32=group2(((3*(n(2)+1)/2)-1)/2). - END IF. END IF. PRINT {median1,q11,q31,min1,max1,(q31-q11),(max1-min1);median2,q12,q32,min2,max2,(q32-q12),(max2-min2)} /FORMAT='f8.1' /TITLE='Five point summaries & measures of spread' /CLABELS='Median' 'Q1' 'Q3' 'Min' 'Max' 'IQR' 'Range' /RLABELS='Group A' 'Group B'. RELEASE median1,q11,q31,min1,max1,median2,q12,q32,min2,max2. * Compute vector of all differences *. COMPUTE n1n2=n(1)&*n(2). COMPUTE diff=MAKE(1,n1n2,0). LOOP i=1 TO n(1). - LOOP j=1 TO n(2). - COMPUTE diff(n(2)*(i-1)+j)=group1(i)-group2(j). - END LOOP. END LOOP. * Sort differences (algorithm by R Ristow & J Peck) *. COMPUTE sdiff = diff. COMPUTE sdiff(grade(diff)) = diff. RELEASE diff,group1,group2. * Compute median of differences *. COMPUTE pair=(TRUNC(n1n2/2) EQ n1n2/2). /* Check if n1n2 is odd (0) or even (1) *. DO IF pair EQ 0. /* Median formula for odd samples. - COMPUTE median=sdiff((n1n2+1)/2). ELSE. /* Median formula for even samples *. - COMPUTE median=(sdiff(n1n2/2)+sdiff(1+(n1n2/2)))/2. END IF. RELEASE pair. PRINT median /TITLE='Difference between population medians * (A - B) ' /RLABELS='Point' /FORMAT='f8.1'. PRINT /TITLE='(*) Remember it is NOT the same as the difference between sample medians'. * Exact or asymptotic 95% & 99% CI *. DO IF ((n(1) LE 20) AND (n(2) LE 20)). /* Exact Mann-Whitney's critical values *. - COMPUTE d95={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0; 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,2,2,2,2; 0,0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8; 0,0,0,0,1,2,3,4,4,5,6,7,8,9,10,11,11,12,13,13; 0,0,0,1,2,3,5,6,7,8,9,11,12,13,14,15,17,18,19,20; 0,0,1,2,3,5,6,8,10,11,13,14,16,17,19,21,22,24,25,27; 0,0,1,3,5,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34; 0,0,2,4,6,8,10,13,15,17,19,22,24,26,29,31,34,36,38,41; 0,0,2,4,7,10,12,15,17,20,23,26,28,31,34,37,39,42,45,48; 0,0,3,5,8,11,14,17,20,23,26,29,33,36,39,42,45,48,52,55; 0,0,3,6,9,13,16,19,23,26,30,33,37,40,44,47,51,55,58,62; 0,1,4,7,11,14,18,22,26,29,33,37,41,45,49,53,57,61,65,69; 0,1,4,8,12,16,20,24,28,33,37,41,45,50,54,59,63,67,72,76; 0,1,5,9,13,17,22,26,31,36,40,45,50,55,59,64,69,74,78,83; 0,1,5,10,14,19,24,29,34,39,44,49,54,59,64,70,75,80,85,90; 0,1,6,11,15,21,26,31,37,42,47,53,59,64,70,75,81,86,92,98; 0,2,6,11,17,22,28,34,39,45,51,57,63,69,75,81,87,93,99,105; 0,2,7,12,18,24,30,36,42,48,55,61,67,74,80,86,93,99,106,112; 0,2,7,13,19,25,32,38,45,52,58,65,72,78,85,92,99,106,113,119; 0,2,8,13,20,27,34,41,48,55,62,69,76,83,90,98,105,112,119,127}. - COMPUTE d99={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0; 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0; 0,0,0,0,0,0,0,0,0,0,0,1,1,1,2,2,2,2,3,3; 0,0,0,0,0,0,0,1,1,2,2,3,3,4,5,5,6,6,7,8; 0,0,0,0,0,1,1,2,3,4,5,6,7,7,8,9,10,11,12,13; 0,0,0,0,1,2,3,4,5,6,7,9,10,11,12,13,15,16,17,18; 0,0,0,0,1,3,4,6,7,9,10,12,13,15,16,18,19,21,22,24; 0,0,0,1,2,4,6,7,9,11,13,15,17,18,20,22,24,26,28,30; 0,0,0,1,3,5,7,9,11,13,16,18,20,22,24,27,29,31,33,36; 0,0,0,2,4,6,9,11,13,16,18,21,24,26,29,31,34,37,39,42; 0,0,0,2,5,7,10,13,16,18,21,24,27,30,33,36,39,42,45,48; 0,0,1,3,6,9,12,15,18,21,24,27,31,34,37,41,44,47,51,54; 0,0,1,3,7,10,13,17,20,24,27,31,34,38,42,45,49,53,57,60; 0,0,1,4,7,11,15,18,22,26,30,34,38,42,46,50,54,58,63,67; 0,0,2,5,8,12,16,20,24,29,33,37,42,46,51,55,60,64,69,73; 0,0,2,5,9,13,18,22,27,31,36,41,45,50,55,60,65,70,74,79; 0,0,2,6,10,15,19,24,29,34,39,44,49,54,60,65,70,75,81,86; 0,0,2,6,11,16,21,26,31,37,42,47,53,58,64,70,75,81,87,92; 0,0,3,7,12,17,22,28,33,39,45,51,57,63,69,74,81,87,93,99; 0,0,3,8,13,18,24,30,36,42,48,54,60,67,73,79,86,92,99,105}. - COMPUTE u95=d95(n(1),n(2)). - COMPUTE u99=d99(n(1),n(2)). - PRINT /TITLE='Exact confidence intervals calculated (N(a) and N(b) =< 20)'. - DO IF u95 EQ 0. - PRINT /TITLE'Warning: sample size is too low for accurate 95%CI. Discard it.'. - COMPUTE u95=1. /* Replace 0 by 1 to avoid a computing error. - END IF. - DO IF u99 EQ 0. - PRINT /TITLE='Warning: sample size is too low for accurate 99%CI. Discard it.'. - COMPUTE u99=1. /* Replace 0 by 1 to avoid a computing error. - END IF. - RELEASE d95,d99. ELSE. /* Asymptotic Mann-Whitney's critical values *. - COMPUTE u95=TRUNC(n1n2/2-0.5-1.960*sqrt(n1n2*(n(1)+n(2)+1)/12)). - COMPUTE u99=TRUNC(n1n2/2-0.5-2.576*sqrt(n1n2*(n(1)+n(2)+1)/12)). - PRINT /TITLE'Asymptotic confidence intervals calculated (N(a) and/or N(b) > 20)'. END IF. COMPUTE low95=sdiff(u95). COMPUTE high95=sdiff(n1n2-u95). COMPUTE low99=sdiff(u99). COMPUTE high99=sdiff(n1n2-u99). PRINT {low95,high95;low99,high99} /FORMAT='f8.1' /TITLE='CI for difference between medians' /RLABELS='95%Level' '99%Level' /CLABELS='Lower' 'Upper'. ELSE. PRINT /TITLE='At least one sample size = 1. No CI can be calculated.'. END IF. END MATRIX.