Mordecaidrake Mordecaidrake - 23 days ago 10
Python Question

Scanning files in directories and subdirectories

Essentially what I'm looking to do is search the files within a folder structure for a list of invoices that are provided and copy the desired data over to a new file. My below script works as described, however the script chokes on search folders that contain sub directories. I need to modify the script to scan the root folder and it's sub directories files. Any idea what to do, I've tried several different code updates but non seem to work:

import tkinter
import os
import fnmatch
from tkinter import *
from tkinter import messagebox as tkMessageBox
from tkinter.filedialog import askopenfilename
from tkinter.filedialog import askdirectory
from pathlib import PureWindowsPath
from pathlib import Path


content = ''
BrowsePath = ''
SearchPath = ''

top = tkinter.Tk()

#Browse entry field
Browse1 = Label(text="Search List:").grid(row=0)

BrowsePath = StringVar()
BrowsePath.set("Select File Containing Invoice Numbers")
BrowseL = Label(bd=5,textvariable=BrowsePath, width=100,relief=SUNKEN).grid(row=0,column=1)

#Search Folder
Searce1 = Label( text="Search Folder:").grid(row=1)

SearchPath = StringVar()
SearchPath.set("Select Folder to Search")
SearchL = Label(bd=5,textvariable=SearchPath, width=100,relief=SUNKEN).grid(row=1,column=1)

OutputL1 = Label( text="Output File:").grid(row=2)

OutputPath = StringVar()
OutputPath.set("File to Save Results to")
OutputL2 = Label(bd=5,textvariable=OutputPath, width=100,relief=SUNKEN).grid(row=2,column=1)


#Process complete function
def GetCallBack():
tkMessageBox.showinfo( "Find Invoices", "Processing complete!")

#********************************************************FILE PICKERS****************************************************************************

#Select file containing list of invoices
def GetFile():
global content
global BrowsePath
filename = askopenfilename()
infile = open(filename,'r')
content =
return content

#Select directory containing invoice files
def SearchDir():
global content
global SearchPath
pathname = askdirectory()
return content

#Creates the save file with isolated invoices
def SaveFile():

filename = os.path.abspath(os.path.join(SearchPath.get(),"Results.txt"))

OutputPath.set(filename) #update label with location of file

#********************************************************READING invoice LIST FILE****************************************************************************

def GetPOCount():
PO = [line.rstrip('\n') for line in open(os.path.realpath(BrowsePath.get()))] #isolates list of invoices
ponum_count = sum(1 for line in open(os.path.realpath(BrowsePath.get()))) #gets count of invoice numbers
return PO, ponum_count #can be indexed

def GetFileNames():
files = os.listdir(SearchPath.get()) #gets list of files
return files #can be indexed

def GetFileLineCount():
files = GetFileNames()
file_count = len(fnmatch.filter(os.listdir(SearchPath.get()),'*.*'))
line_count = sum(1 for line in open(os.path.realpath(os.path.join(SearchPath.get(),files[file_count-1])))) #gets count of lines in invoice file
return line_count, file_count

def FindPOs():
po_number = GetPOCount()[0]
po_counter = GetPOCount()[1]


file_counter = GetFileLineCount()[1]
file_name = GetFileNames()


# For each file
for filename in file_name:
print("Searching " + filename)

with open(os.path.join(SearchPath.get(),filename),'r') as content_file:
line_count = sum(1 for line in content_file) #gets count of lines in invoice file
po_line = [line.rstrip('\n') for line in open(os.path.realpath(os.path.join(SearchPath.get(),filename)))] #isolates each line
result_filename = os.path.abspath(os.path.join(os.path.dirname(SearchPath.get()),"Results.txt"))
log = os.path.abspath(os.path.join(os.path.dirname(SearchPath.get()),"FoundInvoices.txt"))

# For each line in file
#TODO: make this for each po_line
for PONum in po_number:
print("looking for " + PONum)

for line in range (0,line_count):

#locate Header Record
if po_line[line][16:18] == "10" or po_line[line][15:17] == "10":
print("On a header record")

if PONum in po_line[line].strip():
print("Looking for " + PONum)
# Write the current line to the results file
with open(result_filename,'a+') as file:
file.write(po_line[line] + '\n')

# Write this PONum to the log file
with open(log,'a+') as logs:
logs.write(PONum + '\n')

# Loop from the next line to the end
with open(result_filename,'a+') as file:
for z in range (line+1,line_count):
if ((po_line[z][16:18] != "10") and (po_line[z] != '\n') and (po_line[z][15:17] != "10") and (po_line[z][16:18] != "05")):
file.write(po_line[z] + '\n')
# Once we've found a "10" or newline, stop printing this PO



# Search List Browse Button logic
BrowseButton = tkinter.Button(text ="Browse", command = GetFile).grid(row=0,column = 2)

# Search Directory Button logic
SearchButton = tkinter.Button(text ="Search", command = SearchDir).grid(row=1,column = 2)

# Find POs Button Logic
FindButton = tkinter.Button( text ="Get Invoices", command = FindPOs).grid(row=4,column = 1)


Answer Source

You're code is a bit overly complex. Perhaps this is only a portion of the total code. For instance GetFileLineCount() returns two variables, but one of them is never used in your code. GetFileNames() could produce the same result.

def GetFileNames():
    files = os.listdir(SearchPath.get()) #gets list of files
    file_count = len(fnmatch.filter(files),'*.*'))
    return files, file_count #can be indexed

Or better yet you could replace GetFileNames with an os.walk() function:

def GetFileNames():
    filepaths = []
    for root,dir,files in os.walk(SearchPath.get()):
        if len(files) > 0:
            for file in files:
    return filepaths

This will give you a list of all of the files in your SearchPath. Then you can use the same loop but not have to join your SearchPath with your filename each time:

for filename in filepaths:
        print("Searching " + filename)

        with open(filename,'r') as content_file:
            line_count = sum(1 for line in content_file)

... and so on.

Note - I haven't rewritten all of your code for you. You will likely need to make some modifications here and there to make this work, but this should provide a solution to your problem.