patelbg2001
asked on
Using Python to search & replace identified Excel cell values.
I have a working python script that scrapes tables. After the data is copied to an Excel file I would like to remove this particular value - '--' from all sheets in the Excel file and replaced with a zero value - '0'.
Whats the best way to do this using python?
Thanks
Whats the best way to do this using python?
Thanks
import urllib.request
import pandas as pd
import sys
import os
#import xlsxwriter
import openpyxl
sys.path.append('/Users/bhav\ 1/Documents/keepbak2/python')
#xlfile = 'countrydiedtest.xlsx'
xlfile = "/Users/bhav 1/Documents/dump/countrydiedtest.xlsx"
from html_table_parser import HTMLTableParser
Income = 'https://markets.ft.com/data/equities/tearsheet/financials?s=CWD:LSE&subview=IncomeStatement'
Balance = 'https://markets.ft.com/data/equities/tearsheet/financials?s=CWD:LSE&subview=BalanceSheet'
Cash = 'https://markets.ft.com/data/equities/tearsheet/financials?s=CWD:LSE&subview=CashFlow'
Historical = 'https://markets.ft.com/data/equities/tearsheet/historical?s=CWD:LSE'
#get website config Income
reqIncome = urllib.request.Request(url=Income)
fIncome = urllib.request.urlopen(reqIncome)
xhtmlIncome = fIncome.read().decode('utf-8')
#get website config Cash
reqCash = urllib.request.Request(url=Cash)
fCash = urllib.request.urlopen(reqCash)
xhtmlCash = fCash.read().decode('utf-8')
#get website config Balance
reqBalance = urllib.request.Request(url=Balance)
fBalance = urllib.request.urlopen(reqBalance)
xhtmlBalance = fBalance.read().decode('utf-8')
#get website config Historical
reqHistorical = urllib.request.Request(url=Historical)
fHistorical = urllib.request.urlopen(reqHistorical)
xhtmlHistorical = fHistorical.read().decode('utf-8')
#instantiate he parser and feed it
p1 = HTMLTableParser()
p1.feed(xhtmlIncome)
#instantiate he parser and feed it
p2 = HTMLTableParser()
p2.feed(xhtmlCash)
#instantiate he parser and feed it
p3 = HTMLTableParser()
p3.feed(xhtmlBalance)
#instantiate he parser and feed it
p4 = HTMLTableParser()
p4.feed(xhtmlHistorical)
Table_ListIncome = p1.tables
Table_ListCash = p2.tables
Table_ListBalance = p3.tables
Table_ListHistorical = p4.tables
# Use the Pandas from_dict method to read the tables into
# a Pandas data frame
df1 = pd.DataFrame.from_dict(Table_ListIncome[0])
#ddash = "--"
#zero = "0"
#df1.replace(--, 0)
df2 = pd.DataFrame.from_dict(Table_ListCash[0])
df3 = pd.DataFrame.from_dict(Table_ListBalance[0])
df4 = pd.DataFrame.from_dict(Table_ListHistorical[0])
# Create a Pandas Excel writer using XlsxWriter as the engine.
xf = pd.ExcelWriter(xlfile, engine='xlsxwriter')
#
#Convert the dataframe to an XlsxWriter Excel object.
df1.to_excel(xf, sheet_name='Income')
df2.to_excel(xf, sheet_name='Cash')
df3.to_excel(xf, sheet_name='Balance')
df4.to_excel(xf, sheet_name='Historical ')
#dash = "--"
#df2.replace(dash, "0")
#df1.replace(dash, "0")
#df3.replace(dash, "0")
#Drop First Coloum
#df1.drop(df1.columns[[0]], axis=1, inplace=True)
# Close the Pandas Excel writer and output the Excel file.
xf.save()
xf.close()
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
Thanks, simple and effective.
Welcome!
Open in new window