Link to home
Start Free TrialLog in
Avatar of wormboy__6
wormboy__6

asked on

finding multiple file instances

How can i find all the files on my computer that have the same filename and store these filenames in a variable with their full paths.

I need a new variable for each set of filenames.

Can someone supply some code for me please..
Avatar of Vbmaster
Vbmaster

What you want to do is

  1) Search the drive for ALL files, and store in a array with the filename at start then a Chr$(0) and the path

  2) Sort the array

  3) Iterate thru the array, get the filename (Left$(array(index), Instr(array(Index), Chr$(0))-1) and compare it with the filename before the item and after it, clear all items which filenames are not similar to the previous or next item

I don't know what kind of variable you would like after that, array variant or a long string for every filename or a string array for every filename??

Go to www.planet-source-code.com to get sourcecode for 1) and 2).
Avatar of wormboy__6

ASKER

Can you write some code for part 3 though....i understand the first two...but i need some help with the third step.


Private Sub Command1_Click()

Dim sfiles(100000, 1) As String
Dim lCount As Long
Dim r As Long
Dim c

Call file_search("c:\", sfiles(), lCount)

r = lCount

Call QuickSortNumbers(sfiles(), 1, r)

For c = 1 To lCount
    If c > 1 And sfiles(c - 1, 1) = sfiles(c, 1) Then
        Debug.Print sfiles(c, 0) & sfiles(c, 1)
    End If
Next



End Sub


Attribute VB_Name = "Module1"
Declare Function FindFirstFile Lib "kernel32.dll" Alias "FindFirstFileA" (ByVal lpFileName As _
String, lpFindFileData As WIN32_FIND_DATA) As Long

Declare Function FindNextFile Lib "kernel32.dll" Alias "FindNextFileA" (ByVal hFindFile As _
Long, lpFindFileData As WIN32_FIND_DATA) As Long


Declare Function FindClose Lib "kernel32.dll" (ByVal hFindFile As Long) As Long

Type FILETIME
  dwLowDateTime As Long
  dwHighDateTime As Long
End Type

Type WIN32_FIND_DATA
  dwFileAttributes As Long
  ftCreationTime As FILETIME
  ftLastAccessTime As FILETIME
  ftLastWriteTime As FILETIME
  nFileSizeHigh As Long
  nFileSizeLow As Long
  dwReserved0 As Long
  dwReserved1 As Long
  cFileName As String * 260
  cAlternate As String * 14
End Type


Private Const FILE_ATTRIBUTE_ARCHIVE = &H20
    '      An archive file (which most files are).
Private Const FILE_ATTRIBUTE_COMPRESSED = &H800
     '     A file residing in a compressed drive or directory.
Private Const FILE_ATTRIBUTE_DIRECTORY = &H10
      '    A directory instead of a file.
Private Const FILE_ATTRIBUTE_HIDDEN = &H2
       '   A hidden file, not normally visible to the user.
Private Const FILE_ATTRIBUTE_NORMAL = &H80
        '  An attribute-less file (cannot be combined with other attributes).
Private Const FILE_ATTRIBUTE_READONLY = &H1
         ' A read-only file.
Private Const FILE_ATTRIBUTE_SYSTEM = &H4


Public Function file_search(sPath As String, gFiles() As String, gCount As Long)

    Dim hsearch As Long  ' handle to the file search
    Dim findinfo As WIN32_FIND_DATA  ' receives info about matching files
    Dim success As Long  ' will be 1 if successive searches are successful, 0 if not
    Dim buffer As String  ' string buffer to use to process the filename(s)
    Dim retval As Long  ' generic return value
   
    Dim dirs() As String
   
    Dim lDir As Long
    Dim lSize As Long
   
    Dim c As Long
   
    lSize = 10
   
   
   
    ReDim dirs(lSize)
   
   

' Begin a file search:
    hsearch = FindFirstFile(sPath & "*.*", findinfo)
    If hsearch = -1 Then  ' no files match the search string
      Debug.Print "(no files matched search parameter)"
      End  ' abort program
    End If

    ' Display name of each file that matches the search.  Note that the name is displayed, the
    ' next file (if any) is found, and then the loop restarts.  This way the first file
    ' (found above) will also be displayed.
    Do  ' begin loop
      ' Extract the filename from the fixed-length string:
      buffer = Left(findinfo.cFileName, InStr(findinfo.cFileName, vbNullChar) - 1)
      If findinfo.dwFileAttributes = FILE_ATTRIBUTE_DIRECTORY Then
       
        If buffer <> "." And buffer <> ".." Then
          lDir = lDir + 1
          If lDir > lSize Then
            lSize = lSize + 10
            ReDim Preserve dirs(lSize)
          End If
                               
          dirs(lDir) = sPath & buffer & IIf(Right(sbuffer, 1) <> "\", "\", "")
         
        End If
         
      Else
     
        gCount = gCount + 1
        gFiles(gCount, 0) = sPath
        gFiles(gCount, 1) = buffer
               
      End If
 
       
      ' Get the next matching file and loop if it exists:
      success = FindNextFile(hsearch, findinfo)
    Loop Until success = 0  ' keep looping until no more matching files are found
    retval = FindClose(hsearch)
   
    For c = 1 To lDir
        Call file_search(dirs(c), gFiles(), gCount)
    Next

    ' Close the file search handle

End Function


Public Sub QuickSortNumbers(iArray() As String, l As Long, r As Long)

    Dim i As Long, j As Long
   
    Dim x
    Dim y
   
  '  Form1.text1.Text = Form1.text1.Text + 1
   ' Form1.Refresh
   
    i = l
    j = r
   
    x = iArray((l + r) / 2, 1)
   
    While i <= j
   
        While iArray(i, 1) < x And i < r
       
            i = i + 1
           
        Wend
       
        While x < iArray(j, 1) And j > l
       
            j = j - 1
           
        Wend
       
        If i <= j Then
       
            y = iArray(i, 0)
            iArray(i, 0) = iArray(j, 0)
            iArray(j, 0) = y
            y = iArray(i, 1)
            iArray(i, 1) = iArray(j, 1)
            iArray(j, 1) = y
           
            i = i + 1
            j = j - 1
           
     '       Form1.Text2.Text = Form1.Text2.Text + 1
           
        End If
           
    Wend
   
    If l < j Then Call QuickSortNumbers(iArray, l, j)
    If i < r Then Call QuickSortNumbers(iArray, i, r)

End Sub


found a bug in the .bas module, it ends in error sonmetimes

Attribute VB_Name = "Module1"
Declare Function FindFirstFile Lib "kernel32.dll" Alias "FindFirstFileA" (ByVal lpFileName As _
String, lpFindFileData As WIN32_FIND_DATA) As Long

Declare Function FindNextFile Lib "kernel32.dll" Alias "FindNextFileA" (ByVal hFindFile As _
Long, lpFindFileData As WIN32_FIND_DATA) As Long


Declare Function FindClose Lib "kernel32.dll" (ByVal hFindFile As Long) As Long

Type FILETIME
  dwLowDateTime As Long
  dwHighDateTime As Long
End Type

Type WIN32_FIND_DATA
  dwFileAttributes As Long
  ftCreationTime As FILETIME
  ftLastAccessTime As FILETIME
  ftLastWriteTime As FILETIME
  nFileSizeHigh As Long
  nFileSizeLow As Long
  dwReserved0 As Long
  dwReserved1 As Long
  cFileName As String * 260
  cAlternate As String * 14
End Type


Private Const FILE_ATTRIBUTE_ARCHIVE = &H20
    '      An archive file (which most files are).
Private Const FILE_ATTRIBUTE_COMPRESSED = &H800
     '     A file residing in a compressed drive or directory.
Private Const FILE_ATTRIBUTE_DIRECTORY = &H10
      '    A directory instead of a file.
Private Const FILE_ATTRIBUTE_HIDDEN = &H2
       '   A hidden file, not normally visible to the user.
Private Const FILE_ATTRIBUTE_NORMAL = &H80
        '  An attribute-less file (cannot be combined with other attributes).
Private Const FILE_ATTRIBUTE_READONLY = &H1
         ' A read-only file.
Private Const FILE_ATTRIBUTE_SYSTEM = &H4


Public Function file_search(sPath As String, gFiles() As String, gCount As Long)

    Dim hsearch As Long  ' handle to the file search
    Dim findinfo As WIN32_FIND_DATA  ' receives info about matching files
    Dim success As Long  ' will be 1 if successive searches are successful, 0 if not
    Dim buffer As String  ' string buffer to use to process the filename(s)
    Dim retval As Long  ' generic return value
   
    Dim dirs() As String
   
    Dim lDir As Long
    Dim lSize As Long
   
    Dim c As Long
   
    lSize = 10
   
   
   
    ReDim dirs(lSize)
   
   

' Begin a file search:
    hsearch = FindFirstFile(sPath & "*.*", findinfo)

    ' Display name of each file that matches the search.  Note that the name is displayed, the
    ' next file (if any) is found, and then the loop restarts.  This way the first file
    ' (found above) will also be displayed.
    Do  ' begin loop
      ' Extract the filename from the fixed-length string:
      buffer = Left(findinfo.cFileName, InStr(findinfo.cFileName, vbNullChar) - 1)
      If findinfo.dwFileAttributes = FILE_ATTRIBUTE_DIRECTORY Then
       
        If buffer <> "." And buffer <> ".." Then
          lDir = lDir + 1
          If lDir > lSize Then
            lSize = lSize + 10
            ReDim Preserve dirs(lSize)
          End If
                               
          dirs(lDir) = sPath & buffer & IIf(Right(sbuffer, 1) <> "\", "\", "")
         
        End If
         
      Else
     
        gCount = gCount + 1
        gFiles(gCount, 0) = sPath
        gFiles(gCount, 1) = buffer
               
      End If
 
       
      ' Get the next matching file and loop if it exists:
      success = FindNextFile(hsearch, findinfo)
    Loop Until success = 0  ' keep looping until no more matching files are found
    retval = FindClose(hsearch)
   
    For c = 1 To lDir
        Call file_search(dirs(c), gFiles(), gCount)
    Next

    ' Close the file search handle

End Function


Public Sub QuickSortNumbers(iArray() As String, l As Long, r As Long)

    Dim i As Long, j As Long
   
    Dim x
    Dim y
   
  '  Form1.text1.Text = Form1.text1.Text + 1
   ' Form1.Refresh
   
    i = l
    j = r
   
    x = iArray((l + r) / 2, 1)
   
    While i <= j
   
        While iArray(i, 1) < x And i < r
       
            i = i + 1
           
        Wend
       
        While x < iArray(j, 1) And j > l
       
            j = j - 1
           
        Wend
       
        If i <= j Then
       
            y = iArray(i, 0)
            iArray(i, 0) = iArray(j, 0)
            iArray(j, 0) = y
            y = iArray(i, 1)
            iArray(i, 1) = iArray(j, 1)
            iArray(j, 1) = y
           
            i = i + 1
            j = j - 1
           
     '       Form1.Text2.Text = Form1.Text2.Text + 1
           
        End If
           
    Wend
   
    If l < j Then Call QuickSortNumbers(iArray, l, j)
    If i < r Then Call QuickSortNumbers(iArray, i, r)

End Sub

Here's code for step 3 AND 4.. what it does it fills a new string array ( Dupe() ) with all the filenames that exist as ... duplicates. Here's the code, I saved all the testing code so you can see that it works. The array that contains all the filenames & Chr$(0) & directory is called x() here.


  Dim Dupe() As String
  Dim DupeCount As Long
  Dim IsSame As Boolean
  Dim OldFile As String
  Dim NewFile As String
  Dim x() As String
 
  ReDim x(4)
  x(1) = "adam" & Chr$(0) & "D:\1"
  x(2) = "adamd" & Chr$(0) & "D:\2"
  x(3) = "adam" & Chr$(0) & "D:\3"
  x(4) = "adam" & Chr$(0) & "D:\4"
 
  If (UBound(x) > 1) Then
    OldFile = Left$(x(1), InStr(x(1), Chr$(0)) - 1)
    For a = 2 To UBound(x)
      NewFile = Left$(x(a), InStr(x(a), Chr$(0)) - 1)
      If (OldFile = NewFile) Then
        DupeCount = DupeCount + 1
        ReDim Preserve Dupe(DupeCount)
        Dupe(DupeCount) = x(a - 1)
        IsSame = True
      ElseIf IsSame Then
        DupeCount = DupeCount + 1
        ReDim Preserve Dupe(DupeCount)
        Dupe(DupeCount) = x(a - 1)
        IsSame = False
        OldFile = NewFile
      Else
        OldFile = NewFile
      End If
    Next
    If IsSame Then
      DupeCount = DupeCount + 1
      ReDim Preserve Dupe(DupeCount)
      Dupe(DupeCount) = x(a - 1)
    End If
  End If
  Erase x

  Debug.Print Now
  For a = 1 To DupeCount
    Debug.Print Dupe(a)
  Next
vbmaster...this is a start to what i want to do..but there is one problem.

This code searches for the values that are the same as the first string in the array...i want to find all the duplicates in the array....not just the values matching the first value in the array.

It also needs to be fast, as there will be thousands of values in the array...please help
also, when a duplicate file is found could it be added to a string...and all other duplicates of that same file be added to that same string....say separated by a "," ie the filename and the filepath are separated by a character and then the other values in the string are separated by a ","...each value holding its filename and filepath.

Then when a duplicate of another file is found that is different to the previous duplicates..it is added to another string....etc etc


eg

If i searched directories which contained these files:

test.exe
hi.txt
me.txt
house.exe
test.exe
me.txt
bath.ini

At the end i would have 2 strings..one would contain:
"test.exe-C:\windows\test.exe,test.exe-C:\games\test.exe"
and the other would contain:
"me.txt-C:\games\me.txt,me.txt-C:\windows\me.txt"

Do you understand this?
I am not sure i am explaining it right.
sorry deighton but this doesnt work...it returns values that are not duplicates as well as duplicates
ASKER CERTIFIED SOLUTION
Avatar of deighton
deighton
Flag of United Kingdom of Great Britain and Northern Ireland image

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
wormboy_6: did you actually try to run the code I gave you or did you just look over the code and draw your own conclusions how it worked? Because for me it works just fine, even if the first element of the array is NOT a duplicate.

Anyway here's some remade program to make it more like you want it, the result now is a array with first the filename and then the pathnames of all the files separated with a Chr$(0) - you suggested a "," but this is not a good character since filenames is allowed to have this character.


  Dim Dupe() As String
  Dim DupeCount As Long
  Dim IsSame As Boolean
  Dim OldFile As String
  Dim NewFile As String
  Dim x() As String
   
  ReDim x(5)                                          'Only for testing phase
  x(1) = "adame" & Chr$(0) & "D:\1"       'Only for testing phase
  x(2) = "adame" & Chr$(0) & "D:\2"       'Only for testing phase
  x(3) = "adam" & Chr$(0) & "D:\3"         'Only for testing phase
  x(4) = "adam" & Chr$(0) & "D:\4"         'Only for testing phase
  x(5) = "adam" & Chr$(0) & "D:\3"         'Only for testing phase
   
  If (UBound(x) > 1) Then
    OldFile = Left$(x(1), InStr(x(1), Chr$(0)) - 1)
    For a = 2 To UBound(x)
      NewFile = Left$(x(a), InStr(x(a), Chr$(0)) - 1)
      If (OldFile = NewFile) Then
        If Not IsSame Then
          DupeCount = DupeCount + 1
          ReDim Preserve Dupe(DupeCount)
          Dupe(DupeCount) = NewFile
        End If
        Dupe(DupeCount) = Dupe(DupeCount) & Chr$(0) & Mid$(x(a - 1), InStr(x(a - 1), Chr$(0)) + 1)
        IsSame = True
      ElseIf IsSame Then
        Dupe(DupeCount) = Dupe(DupeCount) & Chr$(0) & Mid$(x(a - 1), InStr(x(a - 1), Chr$(0)) + 1)
        IsSame = False
        OldFile = NewFile
      Else
        OldFile = NewFile
      End If
    Next
    If IsSame Then
      Dupe(DupeCount) = Dupe(DupeCount) & Chr$(0) & Mid$(x(a - 1), InStr(x(a - 1), Chr$(0)) + 1)
    End If
  End If
  Erase x

  Debug.Print Now
  For a = 1 To DupeCount
    Debug.Print Dupe(a)
  Next

deighton...can you make one more modification.

I would like to be able to specify the extension to search for...not have it as *.* all the time.

Can you change your code to allow this because i cant seem to get it to work..thanks
sorry..but also can you write a little piece of code to manipulate a string...like the one given...to return the filename and the path to the files.

"ltremove.exe-C:\windows\TEMP\~WZS296F.TMP\ltremove.exe,ltremove.exe-C:\windows\Options\Cabs\ltremove.exe,ltremove.exe-C:\windows\SYSTEM\ltremove.exe"

eg. it would return ltremove.exe in a string called Filename, and in an array it would return the different paths to this filename..eg
Path(1)= "C:\windows\TEMP\~WZS296F.TMP\ltremove.exe"
Path(2)= C:\windows\Options\Cabs\ltremove.exe,ltremove.exe
Path(3) = C:\windows\SYSTEM\ltremove.exe

it needs to be able to work with strings that have 1 or more paths in them.....

thanks
dont worry about that last comment...can you tell me how to specify a file extension to search for though, please
wormboy, have you looked at my example yet?
yeh ive seen it...but i need duplicate files..

If you could tell me how to modify deighton's code to find duplicates with a certain file extension..then it would be an answer im looking for