1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
* replace outliers with the values of mean +/- n times sd over or under the mean.
* This assumes you know it is appropriate to do this replacement! In the next warning
* box, I reproduce recommendations made by Arthur J. Kendal.
 
*********************** WARNING! ****************************************.
* Changing the values of cases suspected to be outliers frequently leads to
  misleading results.
* This procedure should be used very sparingly. Be sure that suspected
  outliers are not data entry errors before using this syntax.
* Analyze your data with and without value replacement.
* It is advisable to consult with a statistician before changing the values
  of suspected outliers.
*************************************************************************.



* This macro is a generalization of a syntax posted by Mirko Wendland to the SPSSX-L list 
*	on 2002/09/02.

* Macro posted to SPSSX-L on 2002/09/02 by Raynald Levesque.
* SPSS site at http://pages.infinit.net/rlevesqu/index.htm.

*/////////////////.
DEFINE !mac1 (savfile=!TOKENS(1) /tmpdir=!TOKENS(1) /grp=!TOKENS(1) /nbsd=!TOKENS(1) /vars=!CMDEND)


/* INPUT PARAMETERS 							*/
/*savfile	path and name of sav file to work with 		*/
/*tmpdir	directory to save temporary files 			*/
/*grp		name of categorical variable defining groups	*/
/*nbsd	number of standard deviation to use			*/
/*vars	list of variables where outliers are to be 	*/
/*		replaced by mean +/- !nbsd * SD. Variables	*/
/*		names must have a maximum of 7 characters		*/

/* OUTPUT									*/
/* new variables are created with the values limited		*/
/* to the given number of sd from the mean			*/
/* names of new variables are teh concatenation of old name */
/* and 2. Eg new values of salary are in salary2.		*/

GET FILE=!savfile.  
SORT CASES BY !grp.
SAVE OUTFILE=!tmpdir + 'sort.sav'.

* compute mean and sd for every (sub)group.

AGGREGATE OUTFILE = *
  /PRESORTED
  /BREAK = !grp
!DO !v !IN (!vars)
  /!CONCAT(!v,m) = mean(!v)
  /!CONCAT(!v,s) = sd(!v)
!DOEND.

AUTORECODE VARIABLES=!grp /INTO grpcode.
COMPUTE nobreak=1.
RANK VARIABLES=grpcode BY nobreak  /N INTO n .
DO IF $CASENUM=1.
WRITE OUTFILE=!tmpdir + 'syntax.sps' /"DEFINE !n()"n"!ENDDEFINE.".
END IF.
EXECUTE.
INCLUDE !tmpdir + 'syntax.sps'. 

!mac2 nbgrp=!EVAL(!n) tmpdir=!tmpdir grp=!grp nbsd=!nbsd vars=!vars.
!ENDDEFINE.
*----------------.


*/////////////////.
DEFINE !mac2(nbgrp=!TOKENS(1) /tmpdir=!TOKENS(1) /grp=!TOKENS(1) /nbsd=!TOKENS(1) /vars=!CMDEND)
/* compute the values for every border*/


!DO !v !IN (!vars)
!DO !grpnb = 1 !TO !nbgrp
+  DO IF !grp=!grpnb.
+	COMPUTE !CONCAT(!v,l) = !CONCAT(!v,m)  - (!nbsd * !CONCAT(!v,s)).
+	COMPUTE !CONCAT(!v,h) = !CONCAT(!v,m)  + (!nbsd * !CONCAT(!v,s)).
+  END IF.
!DOEND
!DOEND

SAVE OUTFILE=!tmpdir + 'lohi.sav'.

* match the rawdata with the mean- and sd-data.

MATCH FILES FILE=!tmpdir + 'sort.sav' 
        /TABLE=!tmpdir + 'lohi.sav'
        /BY=!grp.

* replace the outliers with hivar or lovar.

DO REPEAT num=1 TO !nbgrp.
DO IF (grpcode = num).
  !DO !v !IN (!vars)
+	COMPUTE #tmp = !v.
+	IF (!v < !CONCAT(!v,l)) #tmp = !CONCAT(!v,l).
+	IF (!v > !CONCAT(!v,h)) #tmp = !CONCAT(!v,h).
+	COMPUTE !CONCAT(!v,2) = #tmp.
  !DOEND
END IF.
END REPEAT PRINT.

!ENDDEFINE.
*----------------.


**** Call macro.

SET MPRINT=yes.
!mac1 savfile='c:\\program files\\spss\\employee data.sav' 
	tmpdir='c:\\temp\\' grp=jobcat nbsd=2 vars=salary prevexp .
EXECUTE.