guidway
asked on
NetCdf VB Code, is there anyway to increase efficiency on this code?
This is a continuation of: https://www.experts-exchange.com/questions/20960898/parsing-large-data-in-VB-preferred-method.html (this is a separate issue than what I originally posted though)
Hopefully this is not too big, my apologies if so. Chatting with some of the experts in the above thread I've been told that I should be able to convert large data files in VB with little or no problems, however, I noticed there is a speed difference between running the code in VB and running it in Perl (about 2 minute difference on large datasets > 1GB). I'm wondering if anything in my VB code can be made more efficient to cut down on this speed issue and thought I'd post the code here and have some experts check it. I looked over it myself however this is the first time I read a binary file in VB so I may not be doing it the best way. I'm only posting the main parts of the code. The other subroutines that are not included seem to be pretty good although if you want me to post them also I can. Again, hopefully this is not too large and I would add more points if I could. thanks for any assistance!
'importClass.cls
Option Explicit
Private Type DimInfo
sName As String
lValue As Long
End Type
Private Type GattsInfo
sName As String
sType As Long
sValue As String
End Type
Private Type AttsInfo
sName As String
sType As Long
sValue As String
End Type
Private Type VarsInfo
sName As String
sValue As String
atts() As AttsInfo
dimsNum As Long
dimid() As Long
nc_type As Long
vsize As Long
begin As Long
fillValue As Long
End Type
Private Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (dest As Any, source As Any, ByVal numBytes As Long)
Private Const NC_BYTE As Integer = 1
Private Const NC_CHAR As Integer = 2
Private Const NC_SHORT As Integer = 3
Private Const NC_INT As Integer = 4
Private Const NC_FLOAT As Integer = 5
Private Const NC_DOUBLE As Integer = 6
Private Const NC_DIMENSION As Integer = 10
Private Const NC_VARIABLE As Integer = 11
Private Const NC_ATTRIBUTE As Integer = 12
Private salFile As Integer
Private outFile As Integer
Private dataType As Long
Private dims() As DimInfo
Private gatts() As GattsInfo
Private atts() As AttsInfo
Private vars() As VarsInfo
Sub extractNetCDFData()
Dim sheader As String
Dim lrecords As Long
Dim llength As Long
Dim lnext As Long
Dim mnext As Long
Dim strTemp As String
Dim tmpDouble As Double
Dim tmpShort As Integer
Dim i As Long, j As Long, k As Long
salFile = FreeFile
'testing a single file
Open "D:\dataFormats\NetCDF\tem p\test.nc" For Binary As #salFile
Dim tmp As String
outFile = FreeFile
Open "d:\dataFormats\NetCDF\tem p\test.txt " For Binary As #outFile
lnext = 1
mnext = 1
tmp = Getrec(lnext, 3)
sheader = tmp & Asc(Getrec(lnext, 1)) ' Read Header pos, length
lrecords = DecodeUSINT(Getrec(lnext, 4)) ' from position 5 for 4 decodes unsigned int
ReDim vars(1)
vars(0).begin = 0
Do
If Not vars(0).begin + 1 = lnext Then
dataType = DecodeUSINT(Getrec(lnext, 4)) ' also advances lNext
readDataType lnext, dataType, True
Else
For i = 0 To UBound(vars) - 1 Step 1
If vars(i).begin + 1 = lnext Then
If vars(i).nc_type = NC_DOUBLE Then
Put #outFile, , vbLf + vars(i).sName & ": "
For j = 0 To dims(i).lValue - 1 Step 1
tmp = Getrec(lnext, 8)
tmpDouble = Conversions.IEEEtoDouble(t mp)
If Not vars(i).fillValue = tmpDouble Or tmpDouble = 0 Then
Put #outFile, , tmpDouble & ","
'Debug.Print tmpDouble
Else
Put #outFile, , "_,"
End If
'Debug.Print "printed"
Next
ElseIf vars(i).nc_type = NC_SHORT Then
PBarForm.DataProgress.Min = 0
PBarForm.DataProgress.Max = vars(i).vsize
PBarForm.Label1.Caption = "Processing... test.nc"
Put #outFile, , vbLf + vars(i).sName & ": "
For j = 0 To vars(i).vsize - 1 Step 1
If j <> 0 Then
If vars(i).vsize Mod j = 0 Then
PBarForm.DataProgress.valu e = j
PBarForm.Refresh
DoEvents
End If
End If
tmp = Getrec(lnext, 2)
tmpShort = DecodeSHORT(tmp)
If Not vars(i).fillValue = tmpShort And Not tmpShort = 0 Then
Put #outFile, , tmpShort & ","
Else
Put #outFile, , "_,"
End If
Next
End If
Else
'Debug.Print "test"
End If
Next
End If
Loop While Not EOF(1)
Close #salFile
Close #outFile
End Sub
Private Function readDataType(curPos As Long, dataType As Long, globalAttr As Boolean) As Variant
Static curVarCount As Integer
Dim offset As Integer 'type offset
Dim i As Integer, j As Integer 'counter variables
Dim byteLength As Long, bytePaddedLength As Long
Dim dataValue As Variant
Dim iType As Integer
Dim lType As Long
Dim sType As String
Dim siType As Single
Dim dType As Double
Dim bType As Byte
Dim totalCnt As Long
Select Case dataType
Case NC_BYTE:
offset = 1 ' byte
bType = returnType(curPos, offset)
Case NC_CHAR:
offset = 1 ' char
sType = returnType(curPos, offset)
readDataType = sType
Case NC_SHORT:
offset = 2 ' short
sType = returnType(curPos, offset)
readDataType = sType
Case NC_INT:
offset = 4 ' signed int
lType = returnType(curPos, offset)
readDataType = lType
Case NC_FLOAT:
offset = 4 ' float(single)
siType = returnType(curPos, offset)
readDataType = siType
Case NC_DOUBLE:
offset = 8 ' double
dType = returnType(curPos, offset)
Case Else:
Select Case dataType
Case NC_DIMENSION:
totalCnt = DecodeUSINT(Getrec(curPos, 4))
ReDim dims(totalCnt)
For i = 0 To UBound(dims) - 1 Step 1
byteLength = DecodeUSINT(Getrec(curPos, 4))
bytePaddedLength = byteLength
While bytePaddedLength Mod 4 > 0
bytePaddedLength = bytePaddedLength + 1 ' make sure whole byte is read
Wend 'If lsize Mod 2 > 0 Then lsize = lsize + 1 ' make sure whole byte is read
dims(i).sName = Left(Getrec(curPos, bytePaddedLength), byteLength)
dims(i).lValue = DecodeUSINT(Getrec(curPos, 4))
'Debug.Print dims(i).sName + " >> " + str(dims(i).lValue)
Put #outFile, , dims(i).sName + " >> " + str(dims(i).lValue) + vbLf
Next
Case NC_VARIABLE:
totalCnt = DecodeUSINT(Getrec(curPos, 4))
ReDim vars(totalCnt)
For i = 0 To UBound(vars) - 1 Step 1
byteLength = DecodeUSINT(Getrec(curPos, 4))
bytePaddedLength = byteLength
While bytePaddedLength Mod 4 > 0
bytePaddedLength = bytePaddedLength + 1
Wend
vars(i).sName = Left(Getrec(curPos, bytePaddedLength), byteLength)
vars(i).dimsNum = DecodeUSINT(Getrec(curPos, 4))
ReDim vars(i).dimid(vars(i).dims Num)
For j = 0 To UBound(vars(i).dimid) - 1
vars(i).dimid(j) = DecodeUSINT(Getrec(curPos, 4))
Next
'Debug.Print vbLf + UCase(vars(i).sName)
Put #outFile, , vbLf + UCase(vars(i).sName)
dataType = DecodeUSINT(Getrec(curPos, 4))
curVarCount = i
dataValue = readDataType(curPos, dataType, False)
'Debug.Print dataValue
Next
Case NC_ATTRIBUTE:
If globalAttr = True Then
totalCnt = DecodeUSINT(Getrec(curPos, 4))
ReDim gatts(totalCnt)
For i = 0 To UBound(gatts) - 1 Step 1
byteLength = DecodeUSINT(Getrec(curPos, 4))
bytePaddedLength = byteLength
While bytePaddedLength Mod 4 > 0
bytePaddedLength = bytePaddedLength + 1 ' make sure whole byte is read
Wend
gatts(i).sName = Left(Getrec(curPos, bytePaddedLength), byteLength)
dataType = DecodeUSINT(Getrec(curPos, 4))
gatts(i).sType = dataType
dataValue = readDataType(curPos, dataType, True)
gatts(i).sValue = dataValue
'Debug.Print gatts(i).sName + " >> " + gatts(i).sValue
Put #outFile, , gatts(i).sName + " >> " + gatts(i).sValue + vbLf
Next
Else
totalCnt = DecodeUSINT(Getrec(curPos, 4))
ReDim vars(curVarCount).atts(tot alCnt)
For i = 0 To UBound(vars(curVarCount).a tts) - 1 Step 1
byteLength = DecodeUSINT(Getrec(curPos, 4))
bytePaddedLength = byteLength
While bytePaddedLength Mod 4 > 0
bytePaddedLength = bytePaddedLength + 1
Wend
vars(curVarCount).atts(i). sName = Left(Getrec(curPos, bytePaddedLength), byteLength)
dataType = DecodeUSINT(Getrec(curPos, 4))
vars(curVarCount).atts(i). sType = dataType
If vars(curVarCount).atts(i). sName = "_FillValue" Then
dataValue = readDataType(curPos, dataType, True)
vars(curVarCount).fillValu e = dataValue
'Debug.Print vars(curVarCount).atts(i). sName + " >> " + str(vars(curVarCount).fill Value)
Put #outFile, , vars(curVarCount).atts(i). sName + " >> " + str(vars(curVarCount).fill Value) + vbLf
Else
dataValue = readDataType(curPos, dataType, True)
vars(curVarCount).atts(i). sValue = dataValue
'Debug.Print vars(curVarCount).atts(i). sName + " >> " + vars(curVarCount).atts(i). sValue
Put #outFile, , vars(curVarCount).atts(i). sName + " >> " + vars(curVarCount).atts(i). sValue + vbLf
End If
Next
dataType = DecodeUSINT(Getrec(curPos, 4))
vars(curVarCount).nc_type = dataType
'dataValue = readDataType(curPos, dataType, False)
vars(curVarCount).vsize = DecodeUSINT(Getrec(curPos, 4))
'Debug.Print "variable size: " + str(vars(curVarCount).vsiz e)
vars(curVarCount).begin = DecodeUSINT(Getrec(curPos, 4))
'Debug.Print "variable begin: " + str(vars(curVarCount).begi n)
'Debug.Print dataType
readDataType = dataType
End If
End Select
End Select
End Function
' here are some suggested functions:
Function Getrec(plPos As Long, plLen As Long) As String
' read part of file from pos for length
Dim sBuf As String
sBuf = Space(plLen)
Get #salFile, plPos, sBuf
Getrec = sBuf
plPos = plPos + plLen
End Function
Hopefully this is not too big, my apologies if so. Chatting with some of the experts in the above thread I've been told that I should be able to convert large data files in VB with little or no problems, however, I noticed there is a speed difference between running the code in VB and running it in Perl (about 2 minute difference on large datasets > 1GB). I'm wondering if anything in my VB code can be made more efficient to cut down on this speed issue and thought I'd post the code here and have some experts check it. I looked over it myself however this is the first time I read a binary file in VB so I may not be doing it the best way. I'm only posting the main parts of the code. The other subroutines that are not included seem to be pretty good although if you want me to post them also I can. Again, hopefully this is not too large and I would add more points if I could. thanks for any assistance!
'importClass.cls
Option Explicit
Private Type DimInfo
sName As String
lValue As Long
End Type
Private Type GattsInfo
sName As String
sType As Long
sValue As String
End Type
Private Type AttsInfo
sName As String
sType As Long
sValue As String
End Type
Private Type VarsInfo
sName As String
sValue As String
atts() As AttsInfo
dimsNum As Long
dimid() As Long
nc_type As Long
vsize As Long
begin As Long
fillValue As Long
End Type
Private Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (dest As Any, source As Any, ByVal numBytes As Long)
Private Const NC_BYTE As Integer = 1
Private Const NC_CHAR As Integer = 2
Private Const NC_SHORT As Integer = 3
Private Const NC_INT As Integer = 4
Private Const NC_FLOAT As Integer = 5
Private Const NC_DOUBLE As Integer = 6
Private Const NC_DIMENSION As Integer = 10
Private Const NC_VARIABLE As Integer = 11
Private Const NC_ATTRIBUTE As Integer = 12
Private salFile As Integer
Private outFile As Integer
Private dataType As Long
Private dims() As DimInfo
Private gatts() As GattsInfo
Private atts() As AttsInfo
Private vars() As VarsInfo
Sub extractNetCDFData()
Dim sheader As String
Dim lrecords As Long
Dim llength As Long
Dim lnext As Long
Dim mnext As Long
Dim strTemp As String
Dim tmpDouble As Double
Dim tmpShort As Integer
Dim i As Long, j As Long, k As Long
salFile = FreeFile
'testing a single file
Open "D:\dataFormats\NetCDF\tem
Dim tmp As String
outFile = FreeFile
Open "d:\dataFormats\NetCDF\tem
lnext = 1
mnext = 1
tmp = Getrec(lnext, 3)
sheader = tmp & Asc(Getrec(lnext, 1)) ' Read Header pos, length
lrecords = DecodeUSINT(Getrec(lnext, 4)) ' from position 5 for 4 decodes unsigned int
ReDim vars(1)
vars(0).begin = 0
Do
If Not vars(0).begin + 1 = lnext Then
dataType = DecodeUSINT(Getrec(lnext, 4)) ' also advances lNext
readDataType lnext, dataType, True
Else
For i = 0 To UBound(vars) - 1 Step 1
If vars(i).begin + 1 = lnext Then
If vars(i).nc_type = NC_DOUBLE Then
Put #outFile, , vbLf + vars(i).sName & ": "
For j = 0 To dims(i).lValue - 1 Step 1
tmp = Getrec(lnext, 8)
tmpDouble = Conversions.IEEEtoDouble(t
If Not vars(i).fillValue = tmpDouble Or tmpDouble = 0 Then
Put #outFile, , tmpDouble & ","
'Debug.Print tmpDouble
Else
Put #outFile, , "_,"
End If
'Debug.Print "printed"
Next
ElseIf vars(i).nc_type = NC_SHORT Then
PBarForm.DataProgress.Min = 0
PBarForm.DataProgress.Max = vars(i).vsize
PBarForm.Label1.Caption = "Processing... test.nc"
Put #outFile, , vbLf + vars(i).sName & ": "
For j = 0 To vars(i).vsize - 1 Step 1
If j <> 0 Then
If vars(i).vsize Mod j = 0 Then
PBarForm.DataProgress.valu
PBarForm.Refresh
DoEvents
End If
End If
tmp = Getrec(lnext, 2)
tmpShort = DecodeSHORT(tmp)
If Not vars(i).fillValue = tmpShort And Not tmpShort = 0 Then
Put #outFile, , tmpShort & ","
Else
Put #outFile, , "_,"
End If
Next
End If
Else
'Debug.Print "test"
End If
Next
End If
Loop While Not EOF(1)
Close #salFile
Close #outFile
End Sub
Private Function readDataType(curPos As Long, dataType As Long, globalAttr As Boolean) As Variant
Static curVarCount As Integer
Dim offset As Integer 'type offset
Dim i As Integer, j As Integer 'counter variables
Dim byteLength As Long, bytePaddedLength As Long
Dim dataValue As Variant
Dim iType As Integer
Dim lType As Long
Dim sType As String
Dim siType As Single
Dim dType As Double
Dim bType As Byte
Dim totalCnt As Long
Select Case dataType
Case NC_BYTE:
offset = 1 ' byte
bType = returnType(curPos, offset)
Case NC_CHAR:
offset = 1 ' char
sType = returnType(curPos, offset)
readDataType = sType
Case NC_SHORT:
offset = 2 ' short
sType = returnType(curPos, offset)
readDataType = sType
Case NC_INT:
offset = 4 ' signed int
lType = returnType(curPos, offset)
readDataType = lType
Case NC_FLOAT:
offset = 4 ' float(single)
siType = returnType(curPos, offset)
readDataType = siType
Case NC_DOUBLE:
offset = 8 ' double
dType = returnType(curPos, offset)
Case Else:
Select Case dataType
Case NC_DIMENSION:
totalCnt = DecodeUSINT(Getrec(curPos,
ReDim dims(totalCnt)
For i = 0 To UBound(dims) - 1 Step 1
byteLength = DecodeUSINT(Getrec(curPos,
bytePaddedLength = byteLength
While bytePaddedLength Mod 4 > 0
bytePaddedLength = bytePaddedLength + 1 ' make sure whole byte is read
Wend 'If lsize Mod 2 > 0 Then lsize = lsize + 1 ' make sure whole byte is read
dims(i).sName = Left(Getrec(curPos, bytePaddedLength), byteLength)
dims(i).lValue = DecodeUSINT(Getrec(curPos,
'Debug.Print dims(i).sName + " >> " + str(dims(i).lValue)
Put #outFile, , dims(i).sName + " >> " + str(dims(i).lValue) + vbLf
Next
Case NC_VARIABLE:
totalCnt = DecodeUSINT(Getrec(curPos,
ReDim vars(totalCnt)
For i = 0 To UBound(vars) - 1 Step 1
byteLength = DecodeUSINT(Getrec(curPos,
bytePaddedLength = byteLength
While bytePaddedLength Mod 4 > 0
bytePaddedLength = bytePaddedLength + 1
Wend
vars(i).sName = Left(Getrec(curPos, bytePaddedLength), byteLength)
vars(i).dimsNum = DecodeUSINT(Getrec(curPos,
ReDim vars(i).dimid(vars(i).dims
For j = 0 To UBound(vars(i).dimid) - 1
vars(i).dimid(j) = DecodeUSINT(Getrec(curPos,
Next
'Debug.Print vbLf + UCase(vars(i).sName)
Put #outFile, , vbLf + UCase(vars(i).sName)
dataType = DecodeUSINT(Getrec(curPos,
curVarCount = i
dataValue = readDataType(curPos, dataType, False)
'Debug.Print dataValue
Next
Case NC_ATTRIBUTE:
If globalAttr = True Then
totalCnt = DecodeUSINT(Getrec(curPos,
ReDim gatts(totalCnt)
For i = 0 To UBound(gatts) - 1 Step 1
byteLength = DecodeUSINT(Getrec(curPos,
bytePaddedLength = byteLength
While bytePaddedLength Mod 4 > 0
bytePaddedLength = bytePaddedLength + 1 ' make sure whole byte is read
Wend
gatts(i).sName = Left(Getrec(curPos, bytePaddedLength), byteLength)
dataType = DecodeUSINT(Getrec(curPos,
gatts(i).sType = dataType
dataValue = readDataType(curPos, dataType, True)
gatts(i).sValue = dataValue
'Debug.Print gatts(i).sName + " >> " + gatts(i).sValue
Put #outFile, , gatts(i).sName + " >> " + gatts(i).sValue + vbLf
Next
Else
totalCnt = DecodeUSINT(Getrec(curPos,
ReDim vars(curVarCount).atts(tot
For i = 0 To UBound(vars(curVarCount).a
byteLength = DecodeUSINT(Getrec(curPos,
bytePaddedLength = byteLength
While bytePaddedLength Mod 4 > 0
bytePaddedLength = bytePaddedLength + 1
Wend
vars(curVarCount).atts(i).
dataType = DecodeUSINT(Getrec(curPos,
vars(curVarCount).atts(i).
If vars(curVarCount).atts(i).
dataValue = readDataType(curPos, dataType, True)
vars(curVarCount).fillValu
'Debug.Print vars(curVarCount).atts(i).
Put #outFile, , vars(curVarCount).atts(i).
Else
dataValue = readDataType(curPos, dataType, True)
vars(curVarCount).atts(i).
'Debug.Print vars(curVarCount).atts(i).
Put #outFile, , vars(curVarCount).atts(i).
End If
Next
dataType = DecodeUSINT(Getrec(curPos,
vars(curVarCount).nc_type = dataType
'dataValue = readDataType(curPos, dataType, False)
vars(curVarCount).vsize = DecodeUSINT(Getrec(curPos,
'Debug.Print "variable size: " + str(vars(curVarCount).vsiz
vars(curVarCount).begin = DecodeUSINT(Getrec(curPos,
'Debug.Print "variable begin: " + str(vars(curVarCount).begi
'Debug.Print dataType
readDataType = dataType
End If
End Select
End Select
End Function
' here are some suggested functions:
Function Getrec(plPos As Long, plLen As Long) As String
' read part of file from pos for length
Dim sBuf As String
sBuf = Space(plLen)
Get #salFile, plPos, sBuf
Getrec = sBuf
plPos = plPos + plLen
End Function
SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
it would be great to read it all into a buffer but some of the data probably would come extremely close to maxing out my memory... thanks
SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
thanks for the advice. still implementing it right now, but wanted to go ahead and close the question. :)
IncData should be NewBuff...