Link to home
Start Free TrialLog in
Avatar of guidway
guidwayFlag for United States of America

asked on

NetCdf VB Code, is there anyway to increase efficiency on this code?

This is a continuation of: https://www.experts-exchange.com/questions/20960898/parsing-large-data-in-VB-preferred-method.html (this is a separate issue than what I originally posted though)

Hopefully this is not too big, my apologies if so. Chatting with some of the experts in the above thread I've been told that I should be able to convert large data files in VB with little or no problems, however, I noticed there is a speed difference between running the code in VB and running it in Perl (about 2 minute difference on large datasets > 1GB). I'm wondering if anything in my VB code can be made more efficient to cut down on this speed issue and thought I'd post the code here and have some experts check it. I looked over it myself however this is the first time I read a binary file in VB so I may not be doing it the best way. I'm only posting the main parts of the code. The other subroutines that are not included seem to be pretty good although if you want me to post them also I can. Again, hopefully this is not too large and I would add more points if I could. thanks for any assistance!

'importClass.cls
Option Explicit

Private Type DimInfo
   sName As String
   lValue As Long
End Type

Private Type GattsInfo
   sName As String
   sType As Long
   sValue As String
End Type

Private Type AttsInfo
   sName As String
   sType As Long
   sValue As String
End Type

Private Type VarsInfo
   sName As String
   sValue As String
   atts() As AttsInfo
   dimsNum As Long
   dimid() As Long
   nc_type As Long
   vsize As Long
   begin As Long
   fillValue As Long
End Type

Private Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (dest As Any, source As Any, ByVal numBytes As Long)

Private Const NC_BYTE As Integer = 1
Private Const NC_CHAR As Integer = 2
Private Const NC_SHORT As Integer = 3
Private Const NC_INT As Integer = 4
Private Const NC_FLOAT As Integer = 5
Private Const NC_DOUBLE As Integer = 6
Private Const NC_DIMENSION As Integer = 10
Private Const NC_VARIABLE As Integer = 11
Private Const NC_ATTRIBUTE As Integer = 12

Private salFile As Integer
Private outFile As Integer
Private dataType As Long

Private dims() As DimInfo
Private gatts() As GattsInfo
Private atts() As AttsInfo
Private vars() As VarsInfo

Sub extractNetCDFData()

    Dim sheader As String
    Dim lrecords As Long
    Dim llength As Long
    Dim lnext As Long
    Dim mnext As Long
    Dim strTemp As String
    Dim tmpDouble As Double
    Dim tmpShort As Integer
    Dim i As Long, j As Long, k As Long
   
    salFile = FreeFile
    'testing a single file
    Open "D:\dataFormats\NetCDF\temp\test.nc" For Binary As #salFile
   
    Dim tmp As String
    outFile = FreeFile
    Open "d:\dataFormats\NetCDF\temp\test.txt" For Binary As #outFile
    lnext = 1
    mnext = 1
    tmp = Getrec(lnext, 3)
    sheader = tmp & Asc(Getrec(lnext, 1))  ' Read Header pos, length
    lrecords = DecodeUSINT(Getrec(lnext, 4)) ' from position 5 for 4 decodes unsigned int
    ReDim vars(1)
    vars(0).begin = 0
    Do
       If Not vars(0).begin + 1 = lnext Then
            dataType = DecodeUSINT(Getrec(lnext, 4)) ' also advances lNext
            readDataType lnext, dataType, True
       Else
          For i = 0 To UBound(vars) - 1 Step 1
             If vars(i).begin + 1 = lnext Then
                 If vars(i).nc_type = NC_DOUBLE Then
                    Put #outFile, , vbLf + vars(i).sName & ": "
                    For j = 0 To dims(i).lValue - 1 Step 1
                 
                       tmp = Getrec(lnext, 8)
                       tmpDouble = Conversions.IEEEtoDouble(tmp)
                       If Not vars(i).fillValue = tmpDouble Or tmpDouble = 0 Then
                          Put #outFile, , tmpDouble & ","
                          'Debug.Print tmpDouble
                       Else
                          Put #outFile, , "_,"
                       End If
                          'Debug.Print "printed"
                       
                    Next
                 ElseIf vars(i).nc_type = NC_SHORT Then
                   
                    PBarForm.DataProgress.Min = 0
                    PBarForm.DataProgress.Max = vars(i).vsize
                    PBarForm.Label1.Caption = "Processing... test.nc"
                    Put #outFile, , vbLf + vars(i).sName & ": "
                    For j = 0 To vars(i).vsize - 1 Step 1
                       If j <> 0 Then
                          If vars(i).vsize Mod j = 0 Then
                             PBarForm.DataProgress.value = j
                             PBarForm.Refresh
                             DoEvents
                          End If
                       End If
                       tmp = Getrec(lnext, 2)
                       tmpShort = DecodeSHORT(tmp)
                       If Not vars(i).fillValue = tmpShort And Not tmpShort = 0 Then
                          Put #outFile, , tmpShort & ","
                       Else
                          Put #outFile, , "_,"
                       End If
                    Next
                 End If
             Else
                'Debug.Print "test"
             End If
          Next
       End If
    Loop While Not EOF(1)
    Close #salFile
    Close #outFile
   
End Sub

Private Function readDataType(curPos As Long, dataType As Long, globalAttr As Boolean) As Variant
    Static curVarCount As Integer
    Dim offset As Integer 'type offset
    Dim i As Integer, j As Integer 'counter variables
    Dim byteLength As Long, bytePaddedLength As Long
    Dim dataValue As Variant
    Dim iType As Integer
    Dim lType As Long
    Dim sType As String
    Dim siType As Single
    Dim dType As Double
    Dim bType As Byte
    Dim totalCnt As Long

    Select Case dataType
       Case NC_BYTE:
          offset = 1 ' byte
          bType = returnType(curPos, offset)
       Case NC_CHAR:
          offset = 1 ' char
          sType = returnType(curPos, offset)
          readDataType = sType
       Case NC_SHORT:
          offset = 2 ' short
          sType = returnType(curPos, offset)
          readDataType = sType
       Case NC_INT:
          offset = 4 ' signed int
          lType = returnType(curPos, offset)
          readDataType = lType
       Case NC_FLOAT:
          offset = 4 ' float(single)
          siType = returnType(curPos, offset)
          readDataType = siType
       Case NC_DOUBLE:
          offset = 8 ' double
          dType = returnType(curPos, offset)
       Case Else:
          Select Case dataType
             Case NC_DIMENSION:
                 totalCnt = DecodeUSINT(Getrec(curPos, 4))
                 ReDim dims(totalCnt)
                 For i = 0 To UBound(dims) - 1 Step 1
                   byteLength = DecodeUSINT(Getrec(curPos, 4))
                   bytePaddedLength = byteLength
                   While bytePaddedLength Mod 4 > 0
                      bytePaddedLength = bytePaddedLength + 1 ' make sure whole byte is read
                   Wend 'If lsize Mod 2 > 0 Then lsize = lsize + 1 ' make sure whole byte is read
                   dims(i).sName = Left(Getrec(curPos, bytePaddedLength), byteLength)
                   dims(i).lValue = DecodeUSINT(Getrec(curPos, 4))
                   'Debug.Print dims(i).sName + " >> " + str(dims(i).lValue)
                   Put #outFile, , dims(i).sName + " >> " + str(dims(i).lValue) + vbLf
                 Next
             Case NC_VARIABLE:
                 totalCnt = DecodeUSINT(Getrec(curPos, 4))
                 ReDim vars(totalCnt)
                 For i = 0 To UBound(vars) - 1 Step 1
                   byteLength = DecodeUSINT(Getrec(curPos, 4))
                   bytePaddedLength = byteLength
                   While bytePaddedLength Mod 4 > 0
                      bytePaddedLength = bytePaddedLength + 1
                   Wend
                   vars(i).sName = Left(Getrec(curPos, bytePaddedLength), byteLength)
                   vars(i).dimsNum = DecodeUSINT(Getrec(curPos, 4))
                   ReDim vars(i).dimid(vars(i).dimsNum)
                   For j = 0 To UBound(vars(i).dimid) - 1
                      vars(i).dimid(j) = DecodeUSINT(Getrec(curPos, 4))
                   Next
                   'Debug.Print vbLf + UCase(vars(i).sName)
                   Put #outFile, , vbLf + UCase(vars(i).sName)
                   dataType = DecodeUSINT(Getrec(curPos, 4))
                   curVarCount = i
                   dataValue = readDataType(curPos, dataType, False)
                   
                   'Debug.Print dataValue
                 Next
             Case NC_ATTRIBUTE:
                 If globalAttr = True Then
                    totalCnt = DecodeUSINT(Getrec(curPos, 4))
                    ReDim gatts(totalCnt)
                    For i = 0 To UBound(gatts) - 1 Step 1
                      byteLength = DecodeUSINT(Getrec(curPos, 4))
                      bytePaddedLength = byteLength
                      While bytePaddedLength Mod 4 > 0
                         bytePaddedLength = bytePaddedLength + 1 ' make sure whole byte is read
                      Wend
                      gatts(i).sName = Left(Getrec(curPos, bytePaddedLength), byteLength)
                      dataType = DecodeUSINT(Getrec(curPos, 4))
                      gatts(i).sType = dataType
                      dataValue = readDataType(curPos, dataType, True)
                      gatts(i).sValue = dataValue
                      'Debug.Print gatts(i).sName + " >> " + gatts(i).sValue
                      Put #outFile, , gatts(i).sName + " >> " + gatts(i).sValue + vbLf
                    Next
                  Else
                    totalCnt = DecodeUSINT(Getrec(curPos, 4))
                    ReDim vars(curVarCount).atts(totalCnt)
                    For i = 0 To UBound(vars(curVarCount).atts) - 1 Step 1
                       byteLength = DecodeUSINT(Getrec(curPos, 4))
                       bytePaddedLength = byteLength
                       While bytePaddedLength Mod 4 > 0
                          bytePaddedLength = bytePaddedLength + 1
                       Wend
                       vars(curVarCount).atts(i).sName = Left(Getrec(curPos, bytePaddedLength), byteLength)
                       dataType = DecodeUSINT(Getrec(curPos, 4))
                       vars(curVarCount).atts(i).sType = dataType
                       
                       If vars(curVarCount).atts(i).sName = "_FillValue" Then
                          dataValue = readDataType(curPos, dataType, True)
                          vars(curVarCount).fillValue = dataValue
                          'Debug.Print vars(curVarCount).atts(i).sName + " >> " + str(vars(curVarCount).fillValue)
                          Put #outFile, , vars(curVarCount).atts(i).sName + " >> " + str(vars(curVarCount).fillValue) + vbLf
                       Else
                          dataValue = readDataType(curPos, dataType, True)
                          vars(curVarCount).atts(i).sValue = dataValue
                          'Debug.Print vars(curVarCount).atts(i).sName + " >> " + vars(curVarCount).atts(i).sValue
                          Put #outFile, , vars(curVarCount).atts(i).sName + " >> " + vars(curVarCount).atts(i).sValue + vbLf
                       End If
                    Next
                    dataType = DecodeUSINT(Getrec(curPos, 4))
                    vars(curVarCount).nc_type = dataType
                    'dataValue = readDataType(curPos, dataType, False)
                    vars(curVarCount).vsize = DecodeUSINT(Getrec(curPos, 4))
                    'Debug.Print "variable size: " + str(vars(curVarCount).vsize)
                    vars(curVarCount).begin = DecodeUSINT(Getrec(curPos, 4))
                    'Debug.Print "variable begin: " + str(vars(curVarCount).begin)
                    'Debug.Print dataType
                    readDataType = dataType
                  End If
          End Select
    End Select
   
End Function

' here are some suggested functions:
Function Getrec(plPos As Long, plLen As Long) As String
' read part of file from pos for length
Dim sBuf As String
sBuf = Space(plLen)
Get #salFile, plPos, sBuf
Getrec = sBuf
plPos = plPos + plLen
End Function
SOLUTION
Avatar of LunaSkye
LunaSkye

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
Avatar of LunaSkye
LunaSkye

Sorry.. my cut/paste blunders...

IncData should be NewBuff...

Avatar of guidway

ASKER

it would be great to read it all into a buffer but some of the data probably would come extremely close to maxing out my memory... thanks
SOLUTION
Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
ASKER CERTIFIED SOLUTION
Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
Avatar of guidway

ASKER

thanks for the advice. still implementing it right now, but wanted to go ahead and close the question. :)