1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
'SPSS to STATA conversion and data documentation utility version 0.0. Alasdair Crockett, UK Data Archive, University of Essex, 2004-5

' For a description of what this script does read the accompanying documentation
' which is available at #URL here#

'The software has not been developed or tested to a commercial standard and therefore is made available
'strictly On the basis that you accept it as is, and that you are solely responsible for any use made of it.
'The University of Essex does give any warranties, including without limitation, as to the accuracy of the
'software and disclaims any liability to you or any third party anywhere in the world for any injury, damage,
'direct or indirect loss, consequential or economic loss or any other loss suffered as a result of the use of
'or reliance upon the software to the maximum extent permitted by law.

Sub Main
	Dim strPath As String, statapath As String, strFileMask As String, strDataOutputDir As String, posn As Integer, fName As String, FileName As String, strFname As String, warningdir As String
	'get user input variables and make directories
	strPath = InputBox$("Enter path to directory containing SPSS .sav files, e.g. C:\myarea\data")
	stataexepath = InputBox$("Enter path to wstata.exe, e.g. C:\Program files\stata, or if network version, e.g. Z:")
	strPath=strPath & "\"
	strFileMask	="*.sav"
	strDataOutputDir=strPath & "tab\"
	On Error GoTo direrror
	MkDir strDataOutputDir
	statapath= strPath & "stata\"
	MkDir statapath
	warningdir=strPath & "data_documentation\"
	MkDir warningdir
	'write introductory info to SPSS_to_Stata conversion warning file
	Open warningdir & "SPSS_to_STATA_conversion.rtf" For Output As #3
	Print #3, "{\rtf1\ansi\deff0\deftab1700{\fonttbl{\f0\fswiss MS Sans Serif;}{\f1\froman\fcharset2 Symbol;}{\f2\fswiss Arial;}{\f3\fswiss\fprq2 Arial;}{\f4\fmodern\fprq5 Courier New;}{\f5\fswiss Arial;}}"
	Print #3, 	"{\colortbl;\red0\green0\blue0;\red255\green0\blue0;\red0\green0\blue255;\red0\green0\blue255;\red10\green10\blue160;}"
	Print #3, 	"\deflang2057\pard\plain\f2\fs16\cf1"
	Print #3,  "\par{\fs28\b\ul UK Data Archive SPSS to STATA Conversion Information File" & "}{\fs24\b" & Chr(13) & Chr(10) & Chr(13) & Chr(10)
	Print #3,  "\par" & Chr(13) & Chr(10)
	Print #3,  "\par" & Chr(13) & Chr(10)
	Print #3,  "\par }{\f2\cf2\fs20 This is a log of actual or potential sources of data and metadata (label) loss upon conversion from SPSS to STATA. Though rare, such losses are unavoidable given the differential data handling limits of the two packages." & Chr(13) & Chr(10)
	Print #3,  "\par }{" & Chr(13) & Chr(10)
	Print #3,  "\par }{\f2\cf1\b\ul\fs20 The losses/potential losses reported are:" & Chr(13) & Chr(10)
	Print #3,  "\par }{"
	Print #3,  "\par \f2\cf1\fs20\ul Truncation and potential truncation of data and/or labels
	Print #3,  "\par }{" & Chr(13) & Chr(10)
	Print #3,  "\par \f2\cf1\fs20 1. String variables defined with a width of > 80 characters (the intercooled STATA limit) or 244 characters (the special edition limit) in the SPSS file" & Chr(13) & Chr(10)
	Print #3,  "\par 2. Variable labels that are > 80 characters (the STATA limit) in the SPSS file" & Chr(13) & Chr(10)
	Print #3,  "\par 3. Value labels that are > 32 characters (the STATA limit) in the SPSS file" & Chr(13) & Chr(10)
	Print #3,  "\par" & Chr(13) & Chr(10)
	Print #3,  "\par \f2\cf1\fs20\ul Outright loss of value labels" & Chr(13) & Chr(10)
	Print #3,  "\par }{" & Chr(13) & Chr(10)
	Print #3,  "\par \f2\cf1\fs20 4. String variables that have value labels in the SPSS file" & Chr(13) & Chr(10)
	Print #3,  "\par 5. Non-integer values that have value labels in the SPSS file" & Chr(13) & Chr(10)
	Print #3,  "\par  " & Chr(13) & Chr(10)
	Print #3,  "\par" & Chr(13) & Chr(10)
	Print #3,  "\par If any warnings are logged for any of the data files below please refer to the SPSS data dictionary, supplied as <data file name>_UKDA_Data_Dictionary.rtf, which shows the metadata as it was in SPSS." & Chr(13) & Chr(10)
	Print #3,  "\par " & Chr(13) & Chr(10)
	Print #3,  "\par Note: all SPSS discrete user missing values have been translated to STATA missing values (.a,.b and .c). The originating SPSS value has been appended to the start of the value label in the STATA data file." & Chr(13) & Chr(10)
	Print #3,  "\par " & Chr(13) & Chr(10)
	Print #3,  "\par }{\f2\cf1\b\ul\fs20 Start of conversion warning log:" & Chr(13) & Chr(10)
	'now call main subroutine to do everything else
	Call conversion (strPath, strFileMask, strDataOutputDir, posn, fName, FileName, strFname, statapath, stataexepath, warningdir)
	'now save rtf SPSS_to_Stata conversion log file
	Print #3,  "\par }}" & Chr(13) & Chr(10)
	Close #3
	'Produce popup box to indicate script has finished. Note dialog routine doesn't work if using SPSS ver 11.0
	'3 beeps To indicate script has finished - will not sound if you don't have admin rights to machine
	Shell "c:\winnt\system32\command.com /c echo "
    Wait 1
    Shell "c:\winnt\system32\command.com /c echo "
    Wait 1
    Shell "c:\winnt\system32\command.com /c echo "
	'dialog box to also indicate script has finished
    Begin Dialog UserDialog 200,120
    Text 10,10,180,15,"Script has finished running"
	TextBox 10,25,180,20,.Text$
    OKButton 80,90,40,20
    End Dialog
    Dim dlg As UserDialog
    dlg.Text$ = "about time too!"
    Dialog dlg
    DirError: 'error trapping for if a directory already exists
	Debug.Print "directory already exists so skipping creation"
    Resume Next
End Sub

Sub conversion (strPath As String, strFileMask As String, strDataOutputDir As String, posn As Integer, fName As String, FileName As String, strFname As String, statapath As String, stataexepath As String, warningdir As String)
'export data to tab-delimited file and remove spaces to create system missing values in Stata
Dim size As Variant, textqual As String, DateVar As Integer, Varformat As Long, VarType As Variant, VarWidth As Variant, VarFracts As Variant, objDataDoc As ISpssDataDoc, objDocuments As ISpssDocuments, count As Integer, MissingValues As Variant, MissingCounts As Variant, Missing As Long, flag As Integer, count2 As Integer, test As Variant, numvars As Variant, numvars2 As Variant, NumCases As Variant, VarName As Variant, VarName2 As Variant, VarLabel As Variant, ValName As Variant
strFname = Dir$(strPath & strFileMask)
While strFname <> ""
	posn = 0
	fName = Right(strFname, Len(strFname) - posn)
	'get filename without extension
	posn = InStr(fName, ".")
	If posn <> 0 Then
    	fName = Left(fName, posn - 1)
    End If
    FileName = fName
	strCmd = "get file='" & strPath & strFname & "'." & vbCr
	objSpssApp.ExecuteCommands strCmd , True
	size=FileLen(strPath & strFname) ' size used to determine amount of memory to give stata
	size=100+Int(1.2*(size/1024))
	'create tab-delimited format data file from .sav file
	strCmd = "save translate outfile='" & strDataOutputDir & FileName & ".tab'" & "/type=tab /map /fieldnames /replace." & vbCr
	objSpssApp.ExecuteCommands strCmd , True
	'get conversion log ready for data file
	Print #3,  "\par }{\f2\cf5\fs20" & Chr(13) & Chr(10)
	Print #3,  "\par Warnings for " & "\b " & FileName & Chr(13) & Chr(10)
	Print #3,  "\par }{" & Chr(13) & Chr(10)
	Print #3,  "\par }{\f2\cf1 i) Truncation and potential truncation of data and/or labels" & Chr(13) & Chr(10)
	Print #3,  "\par }{" & Chr(13) & Chr(10)
	'Create the .do file
	textqual="""
	Open statapath & FileName & ".do" For Output As #2
	quote=Chr(34)
	Print #2, "insheet using " & quote & strDataOutputDir & FileName & ".tab" & quote & ", clear double"
	Print #2, "destring, replace"
	Print #2, "compress"
	'define additional variables needed
	Set objDocuments = objSpssApp.Documents
	Set objDataDoc = objDocuments.GetDataDoc (0)
	Set objSPSSInfo = objSpssApp.SpssInfo
	numvars=objSPSSInfo.NumVariables-1
	flag=0
	Missing = objDataDoc.GetVariableMissingValues(MissingCounts, MissingValues)
	'get data dictionary file ready
	NumCases = objDataDoc.GetNumberOfCases
	numvars2=objSPSSInfo.NumVariables
	Open warningdir & FileName & "_Data_Dictionary.rtf" For Output As #1
	Print #1, "{\rtf1\ansi\deff0\deftab1200{\fonttbl{\f0\fswiss MS Sans Serif;}{\f1\froman\fcharset2 Symbol;}{\f2\fswiss Arial;}{\f3\fswiss\fprq2 Arial;}{\f4\fmodern\fprq5 Courier New;}{\f5\fswiss Arial;}}{\colortbl;\red0\green0\blue0;\red255\green0\blue0;\red100\green100\blue100;\red0\green0\blue255;\red10\green10\blue160;}\deflang2057\pard\plain\f2\fs20\cf1"
	Print #1, "\par {\fs28\b\ul UK Data Archive Data Dictionary"
	Print #1, "\par"
	Print #1, "\par }{\b\f2\fs20\cf1\ File-level information:"
	Print #1, "\par"
	Print #1, "\par }{\f2\fs20\cf1 File Name = " & Chr (9) & Chr (9) & "\f2\fs20\cf5" & FileName
	Print #1, "\par }{\f2\fs20\cf1 Number of variables = " & Chr (9) & "\f2\fs20\cf5 " & numvars2
	Print #1, "\par }{\f2\fs20\cf1 Number of cases = " & Chr (9) & "\f2\fs20\cf5 " & NumCases
	Print #1, "\par"
	Print #1, "\par"
	Print #1, "\par }{\f2\fs20\cf1\b Variable-level information:"
	'variable level loop to add variable labels and recreate date formats
	For I=0 To numvars
		count=0
		VarTy= objSPSSInfo.VarType(I)
		VarLength=objSPSSInfo.VarLength(I)
		VarName= objSPSSInfo.VariableAt(I)
		VarName2=LCase(VarName)
		VarLabel= objSPSSInfo.VariableLabelAt(I)
		VarLabel=Replace (VarLabel,textqual,"'")
		Varformat = objDataDoc.GetVariableFormats(VarType, VarWidth, VarFract)
		Print #2, "label variable " & VarName2 & " " & textqual & VarLabel & textqual
		'redefine date/time variables
		DateVar=0
		If VarType (I) = 20 Or VarType (I) = 22 Or VarType (I) = 23 Or VarType (I) = 24 Or VarType (I) = 28  Or VarType (I) = 38  Or VarType (I) = 39 Then DateVar=1
		If DateVar=1 Then Print #2, "generate tempvar = date(" & VarName2 & ", " & textqual & "mdy" & textqual & ")"
		If DateVar=1 Then Print #2, "destring " & VarName2 & ", replace force"
		If DateVar=1 Then Print #2, "replace " & VarName2 & "=tempvar"
		If DateVar=1 Then Print #2, "format " & VarName2 & " %d"
		If DateVar=1 Then Print #2, "drop tempvar"
		'redefine discrete user missing values in STATA
		If MissingCounts(I)>0 Then Print #2, "recode " & VarName2 & " (" & MissingValues(I,0) & "=.a)"
		If MissingCounts(I)>1 Then Print #2, "recode " & VarName2 & " (" & MissingValues(I,1) & "=.b)"
		If MissingCounts(I)>2 Then Print #2, "recode " & VarName2 & " (" & MissingValues(I,2) & "=.c)"
		If MissingCounts(I)=-3 Then Print #2, "recode " & VarName2 & " (" & MissingValues(I,2) & "=.a)"
		' write information to data dictionary file
		Dim info As Long, info1 As Variant, info2 As Variant, info3 As Variant, info4 As Variant, info5 As Variant, NV As Integer
		info = objDataDoc.GetVariableInfo(info1, info2, info3, info4, info5)
		NV= I+1
		Print #1, "\par }{\cf1\b"
		If Len(VarLabel)>1 Then Print #1, "\par Pos. = " & "}{\f2\fs20\cf4 " & NV & Chr (9) & "}{\b\cf1 Variable = " & "}{\f2\fs20\cf4 " & VarName & Chr (9) & "}{\b\cf1 Variable label = " & "}{\cf4 " & VarLabel
		If Len(VarLabel)<2 Then Print #1, "\par Pos. = " & "}{\f2\fs20\cf4 " & NV & Chr (9) & "}{\b\cf1 Variable = " & "}{\f2\fs20\cf4 " & VarName & Chr (9) & "}{\cf5 This variable has no label in SPSS"
		If VarTy=0 And info4(I)=1 Then Print #1, "\par }{\cf3 This variable is  }{\cf5\i numeric}{\cf3, the SPSS measurement level is }{\cf5\i nominal."
		If VarTy=0 And info4(I)=2 Then Print #1, "\par }{\cf3 This variable is  }{\cf5\i numeric}{\cf3, the SPSS measurement level is }{\cf5\i ordinal."
		If VarTy=0 And info4(I)=3 Then Print #1, "\par }{\cf3 This variable is  }{\cf5\i numeric}{\cf3, the SPSS measurement level is }{\cf5\i scale."
		If VarTy=1 Then Print #1, "\par }{\cf3 This variable is  }{\cf5\i string}{\cf3\, the SPSS measurement level is }{\cf5\i nominal."
		If VarTy=2 Then Print #1, "\par }{\cf3 This variable is  }{\cf5\i 'other' }{\cf3 (not numeric or string)."
		If MissingCounts(I)=3 Then Print #1, "\par }{\cf1 SPSS user missing values = " & "\cf4\fs20 " & MissingValues(I,0) & " \fs16 (.a in STATA) \cf1 and \cf4\fs20 " & MissingValues(I,1) & " \cf4\fs16 (.b in STATA)" & " \cf1\fs16 and \cf4\fs20 " & MissingValues(I,2) & " \fs16 (.c in STATA)"
		If MissingCounts(I)=-3 Then Print #1, "\par }{\cf1 SPSS user missing values = " & "\cf4\fs20 " & MissingValues(I,0) & " \fs16\cf1 thru \cf4\fs20 " & MissingValues(I,1) & " \cf1\fs16 and \cf4\fs20 " & MissingValues(I,2) & " \fs16 (.a in STATA)"
		If MissingCounts(I)=2 Then Print #1, "\par }{\cf1 SPSS user missing values = " & "\cf4\fs20 " & MissingValues(I,0) & " \fs16 (.a in STATA) \cf1 and \cf4\fs20 " & MissingValues(I,1) & " \fs16 (.b in STATA)"
		If MissingCounts(I)=-2 Then Print #1, "\par }{\cf1 SPSS user missing values = " & "\cf4\fs20 " & MissingValues(I,0) & " \fs16\cf1 thru \cf4\fs20 " & MissingValues(I,1)
		If MissingCounts(I)=1 Then Print #1, "\par }{\cf1 SPSS user missing value = " & "\cf4\fs20 " & MissingValues(I,0) & " \fs16 (.a in STATA)"
  		NumVals= objSPSSInfo.NumberOfValueLabels(I)-1
		'log potential truncation to SPSS_To_Stata rtf conversion file
	  	If VarTy =1 And VarLength>80 Then Print #3,  "\par }{\f2\cf5 Variable = }{\cf4\b " & VarName & "}{\f2\cf1\ " & Chr(9) & "String variable of width up to " & VarLength & " chars in SPSS. STATA max. is 80 chars (244 in SE)." & Chr(13) & Chr(10)
		varlablength= Len (VarLabel)
		If varlablength>80 Then Print #3,  "\par }{\f2\cf5 Variable = }{\cf4\b " & VarName & "}{\f2\cf1\ "& Chr(9) & "Variable label in SPSS is " & varlablength & " chars, this will be truncated to 80 chars in the STATA file." & Chr(13) & Chr(10)
		count2=0
		'value level loop to add commands to add value labels in STATA where appropriate, adding SPSS missing value codes to start of labels for user missing values
		For K=0 To NumVals
    		ValLabel= objSPSSInfo.ValueLabelAt(I,K)
    		ValLabel=Replace (ValLabel,textqual,"'")
			ValName=objSPSSInfo.ValueAt(I,K)
			If VarTy <> 1 Then test=CDbl(ValName)
			If VarTy <> 1 Then testint=CLng (test)
			If VarTy=1 Then test2=ValName
		Debug.Print VarName
		Debug.Print test
		Debug.Print MissingValues(I,0)
			If VarTy <>1 And (MissingValues(I,0)=test Or MissingValues(I,1)=test Or MissingValues(I,2)=test) Then flag=1 Else flag=0
			If Varty <>1 And test=testint Or (MissingValues(I,0)=test And MissingCounts(I)>0) Or (MissingValues(I,1)=test And MissingCounts(I)>0) Or (MissingValues(I,2)=test) Then count=count+1
			If count=1 Then Print #2, "label define " & VarName2 & " ";
			If Varty <>1 And test=testint And flag=0 Then Print #2, testint & " " & textqual & ValLabel & textqual & " ";
			If Varty <>1 And  MissingValues(I,0)=test And MissingCounts(I)>0  Then Print #2, ".a " & textqual & "(" & test & ") " & ValLabel & textqual & " ";
			If Varty <>1 And  MissingValues(I,0)=test And MissingCounts(I)<0  Then Print #2, test & " " & textqual & ValLabel & textqual & " ";
			If Varty <>1 And  MissingValues(I,1)=test And MissingCounts(I)>1  Then Print #2, ".b " & textqual & "(" & test & ") " & ValLabel & textqual & " ";
			If Varty <>1 And  MissingValues(I,1)=test And MissingCounts(I)<0  Then Print #2, test & " " & textqual & ValLabel & textqual & " ";
			If Varty <>1 And  MissingValues(I,2)=test And MissingCounts(I)>2  Then Print #2, ".c " & textqual & "(" & test & ") " & ValLabel & textqual & " ";
			If Varty <>1 And  MissingValues(I,2)=test And MissingCounts(I)=-3 Then Print #2, ".a " & textqual & "(" & test & ") " & ValLabel & textqual & " ";
			'log any value label truncation to SPSS_to_Stata rtf conversion file
			If Len (ValLabel)>32 And Varty<>1 And count2=0 Then Print #3,  "\par }{\f2\cf5 Variable = }{\cf4\b " & VarName & "}{\f2\cf1\ " & Chr(9) & "One or more long value labels in SPSS, which will be truncated at 32 chars in the STATA file." & Chr(13) & Chr(10)
			If Len (ValLabel)>32 Then count2=count2+1
			'write value-level info to data dictionary
			If K=0 Then Print #1, "\par " & Chr (9) & "}{\cf3\ul\fs16 Value label information for " & VarName
			If Varty<>1 Then Print #1, "\par }{\cf1\fs16" & Chr (9) & "Value = " & "}{\cf4\fs16 " & test & Chr (9) & "}{\cf1\fs16 Label = " & "}{\cf4\fs16 " & ValLabel
			If Varty=1 Then Print #1, "\par }{\cf1\fs16" & Chr (9) & "Value = " & "}{\cf4\fs16 " & test2 & Chr (9) & "}{\cf1\fs16 Label = " & "}{\cf4\fs16 " & ValLabel
		Next k
		If count>0 Then Print #2, vbCr & "label values " & VarName2 & " " & VarName2 & vbCr
		If count=0 Then Print #2, vbCr
	Next I
	Print #1, "\par }}"
	Close #1
	' now repeat loop to log outright loss of value labels in SPSS_to_Stata rtf conversion file
	Print #3,  "\par }{" & Chr(13) & Chr(10)
	Print #3,  "\par }{\f2\cf1\ ii) Outright loss of value labels" & Chr(13) & Chr(10)
	Print #3,  "\par }{" & Chr(13) & Chr(10)
	test=1
	testint=1
	For L=0 To numvars
		count3=0
		VarName= objSPSSInfo.VariableAt(L)
		VarName2=LCase(VarName)
		VarTy= objSPSSInfo.VarType(L)
		nvaluelabs=objSPSSInfo.NumberOfValueLabels(L)
		NumVals= objSPSSInfo.NumberOfValueLabels(L)-1
		' return STATA variable name case to that of SPSS
		If VarName <> VarName2 Then Print #2, "rename " & VarName2 & " " & VarName
		If VarTy = 1 And nvaluelabs>0 Then Print #3,  "\par }{\f2\cf5 Variable = }{\cf4\b " & VarName & "}{\f2\cf1\ " & Chr(9) & "There are value labels for string variables in SPSS. These will not exist in the STATA file." & Chr(13) & Chr(10)
		For M=0 To NumVals
			ValLabel= objSPSSInfo.ValueLabelAt(L,M)
			ValName=objSPSSInfo.ValueAt(L,M)
			If VarTy <> 1 Then test=CDbl(ValName)
    		If VarTy <> 1 Then testint=CLng (test)
			If VarTy <> 1 And testint <> test And Len (ValLabel)>0 And count3=0 Then Print #3,  "\par }{\f2\cf5 Variable = }{\cf4\b " & VarName & "}{\f2\cf1\ " & Chr(9) & "There are value labels for non-integer values in SPSS. These will not exist in the STATA file." & Chr(13) & Chr(10)
			If VarTy <> 1 And testint <> test And Len (ValLabel)>0 Then count3=1
		Next M
	Next L
	Set objSPSSInfo = Nothing
	'lastly, save .do file and execute using wstata.exe to create .dta file
	Print #2, "save " & quote & statapath & FileName & ".dta" & quote & ", replace" & vbCr
	Close #2
	ChDir statapath
	Set objOutputDoc = objSpssApp.GetDesignatedOutputDoc
	objOutputDoc.SelectAll
	objOutputDoc.Delete
	size=CStr(size)
	Shell stataexepath & "\wstata.exe /e /k" & size & " do " & quote & statapath & FileName & quote & ".do nolog"
	strFname = Dir$()
	Wend
End Sub