Link to home
Start Free TrialLog in
Avatar of patelbg2001
patelbg2001

asked on

Using Python to search & replace identified Excel cell values.

I have a working python script that scrapes tables. After the data is copied to an Excel file I would like to remove this particular value - '--' from all sheets in the Excel file and replaced with a zero value - '0'.
Whats the best way to do this using python?
Thanks

import urllib.request
import pandas as pd
import sys
import os
#import xlsxwriter
import openpyxl

sys.path.append('/Users/bhav\ 1/Documents/keepbak2/python')

#xlfile = 'countrydiedtest.xlsx'
xlfile = "/Users/bhav 1/Documents/dump/countrydiedtest.xlsx"

from html_table_parser import HTMLTableParser

Income = 'https://markets.ft.com/data/equities/tearsheet/financials?s=CWD:LSE&subview=IncomeStatement'
Balance = 'https://markets.ft.com/data/equities/tearsheet/financials?s=CWD:LSE&subview=BalanceSheet'
Cash = 'https://markets.ft.com/data/equities/tearsheet/financials?s=CWD:LSE&subview=CashFlow'
Historical = 'https://markets.ft.com/data/equities/tearsheet/historical?s=CWD:LSE'

#get website config Income
reqIncome = urllib.request.Request(url=Income)
fIncome = urllib.request.urlopen(reqIncome)
xhtmlIncome = fIncome.read().decode('utf-8')

#get website config Cash
reqCash = urllib.request.Request(url=Cash)
fCash = urllib.request.urlopen(reqCash)
xhtmlCash = fCash.read().decode('utf-8')

#get website config Balance
reqBalance = urllib.request.Request(url=Balance)
fBalance = urllib.request.urlopen(reqBalance)
xhtmlBalance = fBalance.read().decode('utf-8')

#get website config Historical
reqHistorical = urllib.request.Request(url=Historical)
fHistorical = urllib.request.urlopen(reqHistorical)
xhtmlHistorical = fHistorical.read().decode('utf-8')

#instantiate he parser and feed it
p1 = HTMLTableParser()
p1.feed(xhtmlIncome)

#instantiate he parser and feed it
p2 = HTMLTableParser()
p2.feed(xhtmlCash)

#instantiate he parser and feed it
p3 = HTMLTableParser()
p3.feed(xhtmlBalance)

#instantiate he parser and feed it
p4 = HTMLTableParser()
p4.feed(xhtmlHistorical)

Table_ListIncome = p1.tables
Table_ListCash = p2.tables
Table_ListBalance = p3.tables
Table_ListHistorical = p4.tables

# Use the Pandas from_dict method to read the tables into
# a Pandas data frame

df1 = pd.DataFrame.from_dict(Table_ListIncome[0])

#ddash = "--"
#zero = "0"

#df1.replace(--, 0)

df2 = pd.DataFrame.from_dict(Table_ListCash[0])
df3 = pd.DataFrame.from_dict(Table_ListBalance[0])
df4 = pd.DataFrame.from_dict(Table_ListHistorical[0])

# Create a Pandas Excel writer using XlsxWriter as the engine.
xf = pd.ExcelWriter(xlfile, engine='xlsxwriter')
#
#Convert the dataframe to an XlsxWriter Excel object.
df1.to_excel(xf, sheet_name='Income')
df2.to_excel(xf, sheet_name='Cash')
df3.to_excel(xf, sheet_name='Balance')
df4.to_excel(xf, sheet_name='Historical ')

#dash = "--"
#df2.replace(dash, "0")
#df1.replace(dash, "0")
#df3.replace(dash, "0")




#Drop First Coloum
#df1.drop(df1.columns[[0]], axis=1, inplace=True)

# Close the Pandas Excel writer and output the Excel file.
xf.save()
xf.close()

Open in new window

Avatar of Norie
Norie

Does this work?
df1 = pd.DataFrame.from_dict(Table_ListIncome[0])

df1.replace(to_replace="--", value="0")

df2 = pd.DataFrame.from_dict(Table_ListCash[0])

df2.replace(to_replace="--", value="0")

df3 = pd.DataFrame.from_dict(Table_ListBalance[0])

df3.replace(to_replace="--", value="0")

df4 = pd.DataFrame.from_dict(Table_ListHistorical[0])

df4.replace(to_replace="--", value="0")

Open in new window

ASKER CERTIFIED SOLUTION
Avatar of Subodh Tiwari (Neeraj)
Subodh Tiwari (Neeraj)
Flag of India image

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
Avatar of patelbg2001

ASKER

Thanks, simple and effective.