genespos genespos - 22 days ago 12
Vb.net Question

Speed up search file using Multithreading or Parallel.ForEach

I wrote a code to search for files and folders and (to check all possible combinations of inserted words) I have a sub that gives all permutations of the inserted strings.

My problem is that I'm repeating the code for every permutated string (for 4 words it means 24 times) and I'm trying to use

MultiThreading
to speed up the code.

I've read a lot of examples but I wasn't able to really understand the logic for many reasons (some examples were in C; any example was wrote with different logic)

I've tried with

Parallel.For
Parallel.ForEach
ThreadPool


but I wasn't able to wait all threads before setting the List (containig all results) as datasource of a listbox.

My code logic is:

Get words by splitting the search string

If search type is "all words in any order" then I get all permutations

I start searching for each of permutated strings

I don't like to add too much code to a question but I think it's necessary in this case to know how I'm working:

Private Sub Btn_Search_Click(sender As Object, e As EventArgs) Handles Btn_Search.Click
Select Case True
Case RBtn_Exact.Checked
StartSearch(Me.TB_Pattern.Text.Trim)
Case RBtn_AllInOrder.Checked
Dim Pattern As String = ""
For Each Word As String In Me.TB_Pattern.Text.Split(New Char() {" "c})
If Word.Trim <> "" Then Pattern &= "*" & Word.Trim
Next
Pattern &= "*"
StartSearch(Pattern)
endsearch()
Case RBtn_AllWithoutOrder.Checked
Dim WordHash As New HashSet(Of String)
For Each Word As String In Split(Me.TB_Pattern.Text, " ")
If Word.Trim <> "" Then WordHash.Add(Word.Trim)
Next
If WordHash.Count > 5 Then
MessageBox.Show("Max 5 words allowed for this kind of search", MessageBoxButtons.OK, MessageBoxIcon.Exclamation)
Exit Sub
End If
'Get permutations into an array
StringPermutations()
'I need to add "*" at the end of each permutated string
For S As Integer = 0 To PermutationsArr.Length - 1
PermutationsArr(S) &= "*"
Next
'This is for searching without MultiThreading
For Each Pattern As String In PermutationsArr
StartSearch(Pattern)
Next
'This is my last test
'Parallel.ForEach(PermutationsArr,
' Sub(Pattern)
' StartSearch(Pattern)
' End Sub
' )
'Task.WaitAll()
endsearch()
Case RBtn_AnyWord.Checked
Dim WordHash As New HashSet(Of String)
For Each Word As String In Split(Me.TB_Pattern.Text, " ")
If Word.Trim <> "" Then WordHash.Add(Word.Trim)
Next
If WordHash.Count > 5 Then
MessageBox.Show("Max 5 words allowed for this kind of search", MessageBoxButtons.OK, MessageBoxIcon.Exclamation)
Exit Sub
End If
For Each Word As String In WordHash
StartSearch(pattern:="*" & Word & "*")
Next
endsearch()
End Select
End Sub

Private Sub StartSearch(ByVal pattern As String)
'Search for files
If Me.CBox_Files.Checked Then
FileSearch(Me.TB_StartFolder.Text, pattern)
End If
'Search for folders
If Me.CBox_Folders.Checked Then
ProcessDir(Me.TB_StartFolder.Text, pattern)

DirSearch(Me.TB_StartFolder.Text, pattern)
End If
End Sub

Sub endsearch()
Me.Btn_Search.Text = "Start"
Me.Btn_Search.BackColor = Me.BackColor
If Me.LB_Files.Items.Count > 0 Then
Me.Lbl_FilesFound.Text = Me.LB_Files.Items.Count.ToString
Me.Lbl_FilesFound.Visible = True
End If
If Me.LB_Folders.Items.Count > 0 Then
Me.Lbl_DirFound.Text = Me.LB_Folders.Items.Count.ToString
Me.Lbl_DirFound.Visible = True
End If
End Sub

Sub DirSearch(ByVal sDir As String, ByVal Pattern As String)
Try
For Each Dir As String In Directory.GetDirectories(sDir)
Try
For Each D As String In Directory.GetDirectories(Dir, Pattern)
Try
If LimitReached(LB_Folders) Then
Me.Lbl_LimitReached.Visible = True
Exit Sub
Else
If Me.CBox_Folders.Checked AndAlso Not LB_Folders.Items.Contains(D) Then LB_Folders.Items.Add(D)
End If
Catch ex As Exception
Continue For
End Try
Next
DirSearch(Dir, Pattern)
Catch ex As Exception
Continue For
End Try
Next
Catch ex As Exception
End Try
End Sub
Sub FileSearch(ByVal sDir As String, ByVal Pattern As String)
Dim d As String = ""
Try
For Each f As String In Directory.GetFiles(sDir, Pattern)
Try
If LimitReached(LB_Files) Then
Me.Lbl_LimitReached.Visible = True
Exit Sub
Else
If Me.CBox_LastModRange.Checked Then
If Me.CBox_Files.Checked AndAlso IntoRangeDate(f) AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
Else
If Me.CBox_Files.Checked AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
End If
End If
Catch ex As Exception
Continue For
End Try
Next
'Search for subfolders
For Each d In Directory.GetDirectories(sDir)
Try
ProcessDir(d, Pattern)
Catch ex As Exception
End Try
Try
FileSearch(d, Pattern)
Catch ex As Exception
End Try
Next
Catch excpt As System.Exception
End Try
End Sub

Private Sub ProcessDir(d As String, ByVal Pattern As String)
Try
For Each f As String In Directory.GetFiles(d, Pattern)
Try
If LimitReached(LB_Files) Then
Me.Lbl_LimitReached.Visible = True
Exit Sub
Else
If Me.CBox_LastModRange.Checked Then
If Me.CBox_Files.Checked AndAlso IntoRangeDate(f) AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
Else
If Me.CBox_Files.Checked AndAlso Not LB_Files.Items.Contains(f) Then LB_Files.Items.Add(f)
End If
End If
Catch ex As Exception
Continue For
End Try
Next
Catch ex As System.Exception
End Try
Try
For Each d In Directory.GetDirectories(d, Pattern)
Try
If Me.CBox_Folders.Checked AndAlso Not LB_Folders.Items.Contains(d) Then LB_Folders.Items.Add(d)
Catch ex As Exception
Continue For
End Try
Next
Catch ex As Exception
End Try
End Sub


EDIT

Below my code for getting permutations (I know it has a particular logic but it works and it seems enough fast):

Private Sub StringPermutations()
Try
Dim WordHash As New HashSet(Of String)
For Each Word As String In Split(Me.TB_Pattern.Text, " ")
If Word.Trim <> "" Then WordHash.Add(Word.Trim)
Next
Dim WordList As List(Of String) = WordHash.ToList
ReDim PermutationsArr(Factorial(WordList.Count) - 1)
AddString(WordList, 0)
Catch ex As Exception
MsgBox(ex.ToString)
End Try
End Sub

Private Function Factorial(ByVal Num As Integer) As Integer
Try
If Num > 0 AndAlso Num < 12 Then
Dim Result As Int32 = 1
Do
Result *= Num
Num -= 1
Loop Until Num <= 1
Return Result
Else
Return 0
End If
Catch ex As Exception
Return Nothing
End Try
End Function

Private Sub AddString(ByVal WordList As List(Of String), ByVal StartId As Integer)
Try
Dim InsLoop As Integer = Factorial(WordList.Count - 1)
If InsLoop = 0 Then InsLoop = 1
For Each Word As String In WordList
For InsWord As Integer = 1 To InsLoop
PermutationsArr(StartId + InsWord - 1) &= "*" & Word
Next
If WordList.Count > 1 Then
Dim Remaining As New List(Of String)
For Each RemWord As String In WordList
If RemWord <> Word Then Remaining.Add(RemWord)
Next
AddString(Remaining, StartId)
End If
StartId += InsLoop
Next
Catch ex As Exception
MsgBox(ex.ToString)
End Try
End Sub

Answer

Here's my Form class, based on yours but substantially simplified. I used Tasks for multithreading, ConcurrentDictionarys to capture the results with capacity limits, concurrency levels, and without duplicates, and populate the Listboxes in one call at the end to minimize UI updates and the associated slowness. Concurrency level is the number of tasks that will be spawned to feed the ConcurrentDictionary.

Imports System.Text.RegularExpressions

Public Class SearchForm
    Private FoldersList As Concurrent.ConcurrentDictionary(Of String, Object)
    Private FilesList As Concurrent.ConcurrentDictionary(Of String, Object)

    Private Tasks As New List(Of Task)
    Private Words As New List(Of String)

    Private StopWatch As New Stopwatch

    ' Capacity of the ConcurrentDictionary objects
    ' Set this from user input on form to limit # of results returned
    Private Capacity As Int32 = 0

    Private PermutationsArr() As String = Nothing

    Private Sub Btn_Search_Click(sender As Object, e As EventArgs) Handles Btn_Search.Click
        Btn_Search.Text = "Wait"

        ' Capacity of the ConcurrentDictionary objects
        ' Set this from user input on form to limit # of results returned
        Capacity = 10000

        Tasks.Clear()
        Words.Clear()

        LB_Folders.DataSource = Nothing
        LB_Files.DataSource = Nothing

        Me.Refresh()

        StopWatch.Restart()

        Words.AddRange(Regex.Split(Regex.Replace(Me.TB_Pattern.Text.Trim, "\*", String.Empty), "\s+"))

        Select Case True
            Case String.IsNullOrWhiteSpace(Me.TB_Pattern.Text.Trim)
                MsgBox("Too few words", vbOKOnly, "Oops")
            Case Words.Count < 1
                MsgBox("Too few words", vbOKOnly, "Oops")
            Case Words.Count > 5
                MsgBox("Too many words", vbOKOnly, "Oops")

            Case Me.CBox_LastModRange.Checked AndAlso Me.DT_ModRangeEnd.Value < Me.DT_ModRangeStart.Value
                MsgBox("Range Start must precede Range End", vbOKOnly, "Oops")

            Case Me.RBtn_Exact.Checked
                FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity)
                FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity)

                With Join(Words.ToArray)
                    If Me.CBox_Folders.Checked Then
                        ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True
                        SearchFolders(Me.TB_StartFolder.Text, .ToString, True)
                    Else
                        ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders
                        If Me.CBox_Files.Checked Then
                            SearchFiles(Me.TB_StartFolder.Text, .ToString, True, True)
                        End If
                    End If
                End With

            Case Me.RBtn_AllInOrder.Checked
                FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity)
                FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(1, Capacity)

                With String.Format("*{0}*", Join(Words.ToArray, "*"))
                    If Me.CBox_Folders.Checked Then
                        ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True
                        SearchFolders(Me.TB_StartFolder.Text, .ToString, True)
                    Else
                        ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders
                        If Me.CBox_Files.Checked Then SearchFiles(Me.TB_StartFolder.Text, .ToString, True, True)
                    End If
                End With

            Case Me.RBtn_AllWithoutOrder.Checked
                StringPermutations()

                ' Math.Min caps the concurrency level at 40
                FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(Math.Min(40, PermutationsArr.Count), Capacity)
                FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(Math.Min(40, PermutationsArr.Count), Capacity)

                For Each Pattern As String In PermutationsArr
                    If Me.CBox_Folders.Checked Then
                        ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True
                        SearchFolders(Me.TB_StartFolder.Text, Pattern, True)
                        'Tasks.Add(Task.Run(Sub() SearchFolders(Me.TB_StartFolder.Text, Pattern)))
                    Else
                        ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders
                        If Me.CBox_Files.Checked Then SearchFiles(Me.TB_StartFolder.Text, Pattern, True, True)
                    End If
                Next

            Case Me.RBtn_AnyWord.Checked
                FoldersList = New Concurrent.ConcurrentDictionary(Of String, Object)(Words.Count, Capacity)
                FilesList = New Concurrent.ConcurrentDictionary(Of String, Object)(Words.Count, Capacity)

                For Each Word In Words
                    With String.Format("*{0}*", Word)
                        If Me.CBox_Folders.Checked Then
                            ' NOTE: SearchFolders will evaluate CBox_Files.Checked and do SearchFiles if True
                            SearchFolders(Me.TB_StartFolder.Text, .ToString, True)
                        Else
                            ' NOTE: Only call SearchFiles from here if NOT doing SearchFolders
                            If Me.CBox_Files.Checked Then SearchFiles(Me.TB_StartFolder.Text, .ToString, True, True)
                        End If
                    End With
                Next
        End Select

        Task.WaitAll(Tasks.ToArray)

        Debug.Print("Tasks Completed in {0}", StopWatch.Elapsed.ToString)

        Debug.Print("Adding {0} Folders", FoldersList.Keys.Count.ToString)
        Me.LB_Folders.DataSource = FoldersList.Keys

        Debug.Print("Adding {0} Files", FilesList.Keys.Count.ToString)
        Me.LB_Files.DataSource = FilesList.Keys

        Btn_Search.Text = "Search"
    End Sub

    Private Sub SearchFolders(FolderPath As String, Pattern As String, Optional FirstCall As Boolean = False)
        Try
            Dim Folders() As String = IO.Directory.GetDirectories(FolderPath)

            For Each Folder As String In Folders
                Dim SubFolders() As String = IO.Directory.GetDirectories(Folder, Pattern)

                For Each SubFolder As String In SubFolders
                    Select Case True
                        Case Not FilesList.Count < Capacity
                            Exit For
                        Case Not Me.CBox_LastModRange.Checked
                            FoldersList.TryAdd(SubFolder, Nothing)
                        Case FolderInModRange(Folder)
                            FoldersList.TryAdd(SubFolder, Nothing)
                    End Select
                Next

                If Me.CBox_Files.Checked Then
                    ' Do NOT call this with Recursive = True from here!
                    SearchFiles(Folder, Pattern)
                End If

                If FirstCall Then
                    ' Perform multithreaded Recursion
                    Tasks.Add(Task.Run(Sub() SearchFolders(Folder, Pattern)))
                Else
                    ' Perform deep recursion within task thread...don't branch further
                    SearchFolders(Folder, Pattern)
                End If
            Next
        Catch ex As UnauthorizedAccessException
            ' Access Denied
        Catch ex As Exception
            Debug.Print("SearchFiles: {0}", ex.ToString)
        End Try
    End Sub

    Private Sub SearchFiles(FolderPath As String, Pattern As String, Optional Recursive As Boolean = False, Optional FirstCall As Boolean = False)
        ' Recursive and FirstCall should only be True if NOT doing SearchFolders
        ' Recursive should only be True if called from the main thread or this method to continue the deep dive
        ' FirstCall should only be True if called from the main thread

        Try
            For Each Filename As String In IO.Directory.GetFiles(FolderPath, Pattern)
                Select Case True
                    Case Not FilesList.Count < Capacity
                        Exit For
                    Case Not Me.CBox_LastModRange.Checked
                        FilesList.TryAdd(Filename, Nothing)
                    Case FileInModRange(Filename)
                        FilesList.TryAdd(Filename, Nothing)
                End Select
            Next

            If Recursive Then
                Try
                    Dim Folders() As String = IO.Directory.GetDirectories(FolderPath)
                    For Each Folder As String In Folders
                        If FirstCall Then
                            ' Perform multithreaded Recursion
                            Tasks.Add(Task.Run(Sub() SearchFiles(Folder, Pattern, Recursive)))
                        Else
                            ' Perform deep recursion within task thread...don't branch further
                            SearchFiles(Folder, Pattern, Recursive)
                        End If
                    Next
                Catch ex As Exception
                    ' Access Denied - Does this happen?
                    Debug.Print("Recursive FolderPath: {0}", ex.Message)
                End Try
            End If
        Catch ex As UnauthorizedAccessException
            ' Access Denied
        Catch ex As Exception
            Debug.Print("SearchFiles: {0}", ex.ToString)
        End Try
    End Sub

    Private Function FolderInModRange(Folder As String) As Boolean
        Try
            With New IO.DirectoryInfo(Folder)
                Select Case True
                    Case .LastWriteTime < Me.DT_ModRangeStart.Value
                        Return False
                    Case .LastWriteTime > Me.DT_ModRangeEnd.Value
                        Return False
                    Case Else
                        Return True
                End Select
            End With
        Catch ex As Exception
            Debug.Print("FolderInModRange: {0}{1}{2}", Folder, Environment.NewLine, ex.ToString)
        End Try

        ' Only if exception is thrown
        Return False
    End Function

    Private Function FileInModRange(Filename As String) As Boolean
        Try
            With New IO.FileInfo(Filename)
                Select Case True
                    Case .LastWriteTime < Me.DT_ModRangeStart.Value
                        Return False
                    Case .LastWriteTime > Me.DT_ModRangeEnd.Value
                        Return False
                    Case Else
                        Return True
                End Select
            End With
        Catch ex As IO.PathTooLongException
            ' Path Too Long
        Catch ex As Exception
            Debug.Print("FileInModRange: {0}{1}{2}", Filename, Environment.NewLine, ex.ToString)
        End Try

        ' Only if exception is thrown
        Return False
    End Function
End Class

Recursion avoids the UnauthorizedAccessException errors generated by .Net's GetDirectories and GetFiles methods when they run into folders that the user doesn't have access rights to.

References: