Python Selenium Document Lookup and Download Automation Error - Target Machine actively refuses connection

Hi, I am using the below code to match some data from an input csv to 2 drop downs from a website, then download both a csv and pdf... (the csv, I am extracting info from, then overwriting it with the next download since I do not need it -- the pdf, I am keeping, if the day of the month is the 1st or the 15th... but, as you can see here, for testing, I have it set to today, the 14th, instead of 15th)

My problem is that it will run Ok, for the first 10 lines or so (the csv list is between 1200-2500 lines, so i'm not even close..)
but then, it will throw the below error:

[Errno 10061] No connection could be made because the target machine actively refused it

Can anyone help me resolve this? I am new to python, scraping AND selenium so would appreciate any help -- or suggestions regarding making the code more efficient in general. Thank you!

# -*- coding: utf-8 -*-

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import unittest, time, os, shutil, csv
from fuzzywuzzy import process
import datetime

class Here(unittest.TestCase):
    def setUp(self):
        path_to_chromedriver = 'chromedriver.exe'  # change path as needed
        self.driver = webdriver.Chrome(executable_path=path_to_chromedriver)
        self.base_url = "https://mybaseurl/"
        self.verificationErrors = []
        self.accept_next_alert = True
        self.dlfolder = "C:/Users/bgutt/Downloads"
        self.assolist = []
        self.reportlist = []
        self.inputfile = 'C:\\Users\\bgutt\\Downloads\\Input.csv'

    def wait(self, by_selector, element):
        print "Inside wait"
        try:
            WebDriverWait(self.driver, 30).until(EC.visibility_of_element_located((by_selector, element)))


        except Exception as e:
            print e
            self.restart()

    def login(self):

        print "Inside login"
        driver = self.driver
        driver.get("https://mywebsite")
        # self.assertEqual("", driver.title)
        driver.find_element_by_id("MainContent_Login1_ErlLogin_UserName").clear()
        driver.find_element_by_id("MainContent_Login1_ErlLogin_UserName").send_keys("myusername")
        driver.find_element_by_id("MainContent_Login1_ErlLogin_Password").clear()
        driver.find_element_by_id("MainContent_Login1_ErlLogin_Password").send_keys("mypassword")
        driver.find_element_by_id("MainContent_Login1_ErlLogin_LoginButton").click()
        # self.driver.implicitly_wait(30)

    def test_here(self):

        driver = self.driver
        self.login()
        self.wait(By.CLASS_NAME, "k-input")
        print "Login Done"

        csvlist = []
        csvlist_log = []
        csvfile = "Account History Report.csv"
        pdffile = "Account History Report.pdf"

        # get dropdown lists
        print "Getting Dropdown lists"

        self.getlists(driver)

        rows = csv.reader(open(self.inputfile, "rb"))
        defaulter = {}
        next(rows)
        failedlist = []
        j = 0
        for c in rows:

            if j >= 200:
                break
            
            j += 1
            print j, c

            if (c[1].strip() in defaulter) or (c[1].strip() == "MYCOMPANY.ORG, LLC") or (c[1].strip() == '') or (c[2].strip() == ''):
                print "taking from defaulter list"
                csvlist.append((c[0],))
                csvlist_log.append(defaulter[c[1].strip()] if c[1].strip() in defaulter else (datetime.datetime.now(), c[0], c[1], c[2], '', '', 'Error'))
                continue

            # Pick list of available communities
            comm_match = self.get_community(driver, c)
            print " Got community ",comm_match

            # Choose correct item from search result

            add_match = self.get_address(driver, c)

            if add_match == '':
                defaulter[c[1]] = (datetime.datetime.now(), c[0], c[1], c[2], comm_match, '', 'Error')
                csvlist.append((c[0],))
                csvlist_log.append((datetime.datetime.now(), c[0], c[1], c[2], comm_match, '', 'Error'))
                continue

            csvlist_log.append((datetime.datetime.now(), c[0], c[1], c[2], comm_match, add_match, 'Success'))
            print " Got address ", add_match

            # Pick account history report
            res = self.get_ahr(driver)
            print " Chose ahr", res

            if res == False:
                failedlist.append(c)
                continue

            # Download reports
            self.clear_downloads(csvfile)
            pdf = False
            if datetime.date.today().day in [1, 14]:
                self.clear_downloads(pdffile)
                pdf = True

            report = self.get_report(driver, True, pdf)

            if report == False:
                failedlist.append(c)
                continue

            print " Got AHR, Need PDF? ", pdf

            with open(os.path.join(self.dlfolder, csvfile), "r") as f:
                data = csv.reader(f)
                next(data)
                row = next(data)

            if pdf:
                filename = "Account History Report_" + c[0] + ".pdf"
                i = 0
                while True:
                    if os.path.isfile(os.path.join(os.getcwd(), filename)):
                        i += 1
                        filename = "Account History Report_" + c[0] + "_" + str(i) + ".pdf"
                    else:
                        break

                dest = os.path.join(os.getcwd(), filename)
                src = os.path.join(self.dlfolder, pdffile)
                shutil.copy(src, dest)

            csvlist.append((c[0], row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8]))

        print len(defaulter), csvlist
        self.create_CSVFile(csvlist, csvlist_log)
        driver.quit()


    def getlists(self, driver):

        driver.find_element_by_css_selector("span.k-input").click()
        elements = driver.find_elements_by_xpath("//ul[@id='AssociationList_listbox']/li")
        self.assolist = [element.text for element in elements]

        driver.find_element_by_xpath(
            ".//*[@id='content-container']/div[4]/div/div[1]/div[2]/div[2]/span/span/span[1]").click()
        elements_report = driver.find_elements_by_xpath("//*[@id='TransactionDetail']/table/tbody//td[4]")
        self.reportlist = [element.text for element in elements_report]

    def get_community(self, driver, c):
        print "Inside get_community"
        try:

            self.wait(By.CLASS_NAME, "k-input")
            driver.find_element_by_css_selector("span.k-input").click()

            match = c[1].strip() if "," not in c[1] else c[1].strip().split(",")[0]
            check = process.extractOne(match, self.assolist)

            #print check, match

            element = driver.find_element_by_xpath("//ul[@id='AssociationList_listbox']/li[text()='%s']" % check[0])
            element.click()
            return check[0]

        except Exception as e:
            print "No community ",e
            return ''

    def get_address(self, driver, c):
        print "Inside get_address"
        try:

            driver.find_element_by_css_selector("a.gridmenuitem.refresh").click()
            self.wait(By.ID, "TransactionDetail")
            text = driver.find_element_by_css_selector("input#SearchTransactions")
            text.send_keys(c[2].split()[0])
            text.send_keys(Keys.RETURN)

            time.sleep(2)

            add = driver.find_elements_by_xpath("//*[@id='TransactionDetail']/table/tbody//td[4]")
            address = [ele.text for ele in add]

            check = process.extractOne(c[2].strip(), address)

            #print c[2].strip(), check, address

            checkbox = driver.find_element_by_xpath(
                "//*[@id='TransactionDetail']/table[.//td[.='%s']][1]//td[.='%s']/ancestor::tr[1]/td//input[@type='checkbox'][1]" % (
                check[0], check[0]))
            checkbox.click()

            return check[0]

        except Exception as e:
            return ''

    def get_ahr(self, driver):

        print "Inside get_ahr"
        try:
            driver.find_element_by_xpath(
                ".//*[@id='content-container']/div[4]/div/div[1]/div[2]/div[2]/span/span/span[1]").click()

            self.wait(By.ID, 'ReportList_listbox')
            element = driver.find_element_by_xpath("//ul[@id='ReportList_listbox']/li[text()='Account History Report']")

            if element.text != 'Account History Report':
                self.get_ahr(driver)
            else:
                element.click()
                return True

        except Exception as e:
            print "Got error in AHR ",e
            self.restart()
            return False

    def restart(self):

        print "Inside restart"
        self.driver.quit()
        time.sleep(60)
        self.login()

    def get_report(self, driver, csv, pdf):

        print "Inside get_report"
        try:
            if csv == True:
                self.wait(By.ID,'iFrameReport')
                driver.switch_to.frame('iFrameReport')
                self.wait(By.ID, "ReportViewer_ctl05_ctl04_ctl00_Button")

                driver.find_element_by_css_selector("table#ReportViewer_ctl05_ctl04_ctl00_Button").click()
                ele = driver.find_element_by_xpath('//a[@title="CSV (comma delimited)"]')
                ele.click()

                if not os.path.join(self.dlfolder, "Account History Report.csv"):
                    self.get_report(driver, csv, False)

            if pdf:
                # Putting sleep because script will open new tab to download CSV file
                # so waiting for it to come back to main reports page.
                time.sleep(3)
                driver.find_element_by_css_selector("table#ReportViewer_ctl05_ctl04_ctl00_Button").click()
                ele = driver.find_element_by_xpath('//a[@title="Acrobat (PDF) file"]')
                ele.click()
                if not os.path.join(self.dlfolder, "Account History Report.pdf"):
                    self.get_report(driver, False, pdf)

            # Intentionally wait for 2 seconds so that all the file downloads are complete
            time.sleep(2)

            driver.switch_to_default_content()
            return True

        except Exception as e:
            print "Got error in get_report ", e
            self.restart()
            return False

    def clear_downloads(self, filename):
        print "Inside clear_downloads"
        for fname in os.listdir(self.dlfolder):
            if fname.startswith("Account History Report"):
                os.remove(os.path.join(self.dlfolder, fname))

    def create_CSVFile(self, mylist, mylist_log):
        '''(list)->()

        List of tuples will be given, it will write to CSV file

        '''

        print "Inside create_CSVFile"
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H-%M-%S")
        myfile = open('outputfile_' + timestamp +'.csv', 'wb')
        wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
        wr.writerow(("Ourfile", "CommunityAddress", "SettlementDate", "CommunityUnitCityStateZip", "UnitType",
                     "MailingAddress", "LastPaymentDate", "MailingUnitCityStateZip", "LastPaymentAmount",
                     "CurrentBalance"))
        wr.writerows(mylist)

        myfile_log = open('logfile_' + timestamp +'.csv', 'wb')
        wr = csv.writer(myfile_log, quoting=csv.QUOTE_ALL)
        wr.writerow(("Date&Time", "Ourfile", "Creditor", "propert_add", "matchedCreditor", "matched property address",
                     "successful or Error"))
        wr.writerows(mylist_log)


if __name__ == "__main__":
    unittest.main()

Open in new window

Brent GuttmannBusiness AnalystAsked:
Who is Participating?
I wear a lot of hats...

"The solutions and answers provided on Experts Exchange have been extremely helpful to me over the last few years. I wear a lot of hats - Developer, Database Administrator, Help Desk, etc., so I know a lot of things but not a lot about one thing. Experts Exchange gives me answers from people who do know a lot about one thing, in a easy to use platform." -Todd S.

Gerwin Jansen, EE MVETopic Advisor Commented:
Didn't look through the code but the site refuses the connection because it thinks you're are robot or not a real browser. You'd have to change the user agent settings to let the site think you are a real user.
Gerwin Jansen, EE MVETopic Advisor Commented:
Never got help? You didn't reply on my comment, did you try changing the user agent?
Brent GuttmannBusiness AnalystAuthor Commented:
Yes, i did, no dice... But you also said didnt read the code... I can post my updated script if you will take a look... Also now running into http 404 error
Gerwin Jansen, EE MVETopic Advisor Commented:
I didn't read your code because the error doesn't have to do with the code itself. As an expert, I would have appreciated a reply from you before you hit 'delete' ;)

404 means that you have a connection but what you are getting doesn't exist. Can you get that url manually?

Experts Exchange Solution brought to you by

Your issues matter to us.

Facing a tech roadblock? Get the help and guidance you need from experienced professionals who care. Ask your question anytime, anywhere, with no hassle.

Start your 7-day free trial
It's more than this solution.Get answers and train to solve all your tech problems - anytime, anywhere.Try it for free Edge Out The Competitionfor your dream job with proven skills and certifications.Get started today Stand Outas the employee with proven skills.Start learning today for free Move Your Career Forwardwith certification training in the latest technologies.Start your trial today
Selenium

From novice to tech pro — start learning today.