asked on

Using Python to search & replace identified Excel cell values.

I have a working python script that scrapes tables. After the data is copied to an Excel file I would like to remove this particular value - '--' from all sheets in the Excel file and replaced with a zero value - '0'.
Whats the best way to do this using python?
Thanks

import urllib.request
import pandas as pd
import sys
import os
#import xlsxwriter
import openpyxl

sys.path.append('/Users/bhav\ 1/Documents/keepbak2/python')

#xlfile = 'countrydiedtest.xlsx'
xlfile = "/Users/bhav 1/Documents/dump/countrydiedtest.xlsx"

from html_table_parser import HTMLTableParser

Income = 'https://markets.ft.com/data/equities/tearsheet/financials?s=CWD:LSE&subview=IncomeStatement'
Balance = 'https://markets.ft.com/data/equities/tearsheet/financials?s=CWD:LSE&subview=BalanceSheet'
Cash = 'https://markets.ft.com/data/equities/tearsheet/financials?s=CWD:LSE&subview=CashFlow'
Historical = 'https://markets.ft.com/data/equities/tearsheet/historical?s=CWD:LSE'

#get website config Income
reqIncome = urllib.request.Request(url=Income)
fIncome = urllib.request.urlopen(reqIncome)
xhtmlIncome = fIncome.read().decode('utf-8')

#get website config Cash
reqCash = urllib.request.Request(url=Cash)
fCash = urllib.request.urlopen(reqCash)
xhtmlCash = fCash.read().decode('utf-8')

#get website config Balance
reqBalance = urllib.request.Request(url=Balance)
fBalance = urllib.request.urlopen(reqBalance)
xhtmlBalance = fBalance.read().decode('utf-8')

#get website config Historical
reqHistorical = urllib.request.Request(url=Historical)
fHistorical = urllib.request.urlopen(reqHistorical)
xhtmlHistorical = fHistorical.read().decode('utf-8')

#instantiate he parser and feed it
p1 = HTMLTableParser()
p1.feed(xhtmlIncome)

#instantiate he parser and feed it
p2 = HTMLTableParser()
p2.feed(xhtmlCash)

#instantiate he parser and feed it
p3 = HTMLTableParser()
p3.feed(xhtmlBalance)

#instantiate he parser and feed it
p4 = HTMLTableParser()
p4.feed(xhtmlHistorical)

Table_ListIncome = p1.tables
Table_ListCash = p2.tables
Table_ListBalance = p3.tables
Table_ListHistorical = p4.tables

# Use the Pandas from_dict method to read the tables into
# a Pandas data frame

df1 = pd.DataFrame.from_dict(Table_ListIncome[0])

#ddash = "--"
#zero = "0"

#df1.replace(--, 0)

df2 = pd.DataFrame.from_dict(Table_ListCash[0])
df3 = pd.DataFrame.from_dict(Table_ListBalance[0])
df4 = pd.DataFrame.from_dict(Table_ListHistorical[0])

# Create a Pandas Excel writer using XlsxWriter as the engine.
xf = pd.ExcelWriter(xlfile, engine='xlsxwriter')
#
#Convert the dataframe to an XlsxWriter Excel object.
df1.to_excel(xf, sheet_name='Income')
df2.to_excel(xf, sheet_name='Cash')
df3.to_excel(xf, sheet_name='Balance')
df4.to_excel(xf, sheet_name='Historical ')

#dash = "--"
#df2.replace(dash, "0")
#df1.replace(dash, "0")
#df3.replace(dash, "0")




#Drop First Coloum
#df1.drop(df1.columns[[0]], axis=1, inplace=True)

# Close the Pandas Excel writer and output the Excel file.
xf.save()
xf.close()

Open in new window

Norie

Does this work?

df1 = pd.DataFrame.from_dict(Table_ListIncome[0])

df1.replace(to_replace="--", value="0")

df2 = pd.DataFrame.from_dict(Table_ListCash[0])

df2.replace(to_replace="--", value="0")

df3 = pd.DataFrame.from_dict(Table_ListBalance[0])

df3.replace(to_replace="--", value="0")

df4 = pd.DataFrame.from_dict(Table_ListHistorical[0])

df4.replace(to_replace="--", value="0")

Open in new window

ASKER CERTIFIED SOLUTION

Subodh Tiwari (Neeraj)

membership

This solution is only available to members.

To access this solution, you must be a member of Experts Exchange.

Start Free Trial

patelbg2001

ASKER

Thanks, simple and effective.

Subodh Tiwari (Neeraj)

Welcome!

Using Python to search &amp; replace identified Excel cell values.

Using Python to search & replace identified Excel cell values.