Help with python script to confront csv file to mysql and let user decide about fields to update

Hello there!

I've passed the last days fighting with this topic without great results since I'm a beginner.

I have a .csv files containing various fields: name, surname, city, etc

I want to prepare a python script that reads this file row by row and searches on the db for records with the same name, surname and city.

If I find such record I would like to make a comparison of all the other fields and allow the user to update or not the database with data contained (for the same record) in the .csv

Here's what I did but I can't get it to work.

Any kind of help is greatly appreciated: code correction, ideas, faux code, analysis...

I am ready to start from scratch (I did it more than once) but I can't understand how I should plan the program to let it work:

import os
import csv
import string
import datetime
from warnings import filterwarnings
import MySQLdb
import MySQLdb as Database
filterwarnings('ignore', category = Database.Warning)

os.system('cls' if os.name == 'nt' else 'clear')
now = datetime.datetime.now()
timestamp = "_"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+"_"+str(now.hour)+"-"+str(now.minute)+"-"+str(now.second)
#import_fields = {"cognome" : "", "nome" : "", "azienda" : "", "riferimento" : "", "carica" : "", "indirizzo" : "", "cellulare" : "", "cellulare2" : "", "cellulare3" : "", "telefono" : "", "telefono2" : "", "fax" : "", "email" : "", "email2" : "", "skyp" : "", "note" : ""}
import_fields = {}

db_fields = {}
csv_fields = {}

bold_start = "\033[1m"
bold_end = "\033[0;0m"

tot_non_doppioni = 0
tot_doppioni = 0
numero_riga = 0


def fields_check(csv_fields, db_fields):
    global tot_doppioni
    for k in csv_fields:
        csv_value = csv_fields[k]
        db_value = db_fields[k]
        if csv_value == db_value:
            pass
        elif csv_value != db_value:
            while True:
                print "The key is:", k
                print
                print "1) Field in csv:", csv_value
                print "2) Field in db: ", db_value
                print
                user_answer = raw_input("Which do you want to keep? [1 o 2]: ")
                print
                if user_answer == "1":
                    import_fields[k] = csv_value
                    print "You dedided to keep the field name:", k,
                    print "with value:", csv_value
                    break
                elif user_answer == "2":
                    import_fields[k] = db_value
                    print "You dedided to keep the field name:", k, "with value:", db_value
                    break
                else:
                    print "Error!"
    return dict(db_fields.items() + import_fields.items())


cognome_import = ""
nome_import = ""
riferimento_import = ""
carica_import = ""
note_import = ""
cellulare_import = ""
cellulare2_import = ""
telefono_import = ""
telefono2_import = ""
fax_import = ""
azienda_import = ""
email_import = ""
indirizzo_import = ""
residenza_import = ""




mydb = MySQLdb.connect(host='localhost',
    user='',
    passwd='',
    db='')
cursor = mydb.cursor()

csv_check_doppioni = csv.reader(file('doppioni.csv'))


for row in csv_check_doppioni:
    cognome_csv = row[0].lower().rstrip().lstrip()
    nome_csv = row[1].lower().rstrip().lstrip()
    riferimento_csv = row[2].lower().rstrip().lstrip()
    carica_csv = row[3].lower().rstrip().lstrip()
    note_csv = row[4].lower().rstrip().lstrip()
    cellulare_csv = row[5].lower().rstrip().lstrip()
    cellulare2_csv = row[6].lower().rstrip().lstrip()
    telefono_csv = row[7].lower().rstrip().lstrip()
    telefono2_csv = row[8].lower().rstrip().lstrip()
    fax_csv = row[9].lower().rstrip().lstrip()
    azienda_csv = row[10].lower().rstrip().lstrip()
    email_csv = row[11].lower().rstrip().lstrip()
    indirizzo_csv = row[12].lower().rstrip().lstrip()
    residenza_csv = row[13].lower().rstrip().lstrip()
    csv_fields = {"cognome" : cognome_csv, "nome" : nome_csv, "riferimento" : riferimento_csv, "carica" : carica_csv, "note" : note_csv, "cellulare" : cellulare_csv, "cellulare2" : cellulare2_csv, "telefono" : telefono_csv, "telefono2" : telefono2_csv, "fax" : fax_csv, "azienda" : azienda_csv, "email" : email_csv, "indirizzo" : indirizzo_csv}
    numero_riga += 1
    cursor.execute("SELECT * FROM contatti WHERE cognome like %s AND nome like %s", ("%"+cognome_csv+"%", "%"+nome_csv+"%"))
    result = cursor.fetchall()

    if result == ():
        tot_non_doppioni += 1
        with open("da_importare"+timestamp+".csv", "a") as text_file:
            text_file.write(', '.join(row))
            text_file.write('\r\n')
    else:
        id = result[0][0]
        id_comune = result[0][1]
        cognome_db = result[0][2].lower().rstrip().lstrip()
        nome_db = result[0][3].lower().rstrip().lstrip()
        azienda_db = result[0][4].lower().rstrip().lstrip()
        riferimento_db = result[0][5].lower().rstrip().lstrip()
        carica_db = result[0][6].lower().rstrip().lstrip()
        indirizzo_db = result[0][7].lower().rstrip().lstrip()
        cellulare_db = result[0][8].lower().rstrip().lstrip()
        cellulare2_db = result[0][9].lower().rstrip().lstrip()
        cellulare3_db = result[0][10].lower().rstrip().lstrip()
        telefono_db = result[0][11].lower().rstrip().lstrip()
        telefono2_db = result[0][12].lower().rstrip().lstrip()
        fax_db = result[0][13].lower().rstrip().lstrip()
        email_db = result[0][14].lower().rstrip().lstrip()
        email2_db = result[0][15].lower().rstrip().lstrip()
        skype_db = result[0][16].lower().rstrip().lstrip()
        note_db = result[0][17].lower().rstrip().lstrip()

        db_fields = {"id" : id, "id_comune" : id_comune, "cognome" : cognome_db, "nome" : nome_db, "riferimento" : riferimento_db, "carica" : carica_db, "note" : note_db, "cellulare" : cellulare_db, "cellulare2" : cellulare2_db, "telefono" : telefono_db, "telefono2" : telefono2_db, "fax" : fax_db, "azienda" : azienda_db, "email" : email_db, "indirizzo" : indirizzo_db}

        doppione_corretto = fields_check(csv_fields, db_fields)
        print doppione_corretto
        tot_doppioni += 1


cursor.close()

Open in new window

LVL 1
ltpittAsked:
Who is Participating?
I wear a lot of hats...

"The solutions and answers provided on Experts Exchange have been extremely helpful to me over the last few years. I wear a lot of hats - Developer, Database Administrator, Help Desk, etc., so I know a lot of things but not a lot about one thing. Experts Exchange gives me answers from people who do know a lot about one thing, in a easy to use platform." -Todd S.

peprCommented:
There are several things to fix.

For the timestamp. It may be the case that you cannot choose and you need to follow the string format given to you by someone else. However, it is better to follow the experience of others. When sorting the table by the timestamp column, then sorting by correctly constructed strings mean sorting by date. But it works only when you place year first, month as second, day, hour, minute, and second as the last. All timestamps also have to have the same number of characters. It is usually done by padding single digits by zero from the left. If it is acceptable, you can use the .strftime() method of your now. See below:
>>> import datetime
>>> now = datetime.datetime.now()

Open in new window

Your code produces
>>> timestamp = "_"+str(now.day)+"-"+str(now.month)+"-"+str(now.year)+"_"+str(no
w.hour)+"-"+str(now.minute)+"-"+str(now.second)
>>> timestamp
'_17-4-2014_10-48-23'

Open in new window

If you accept zero padding, you can use the .strftime
>>> now.strftime("_%d-%m-%Y_%H-%M-%S")
'_17-04-2014_10-48-23'

Open in new window

There even is a special function to get the ISO format of the timestamp (as a string). Notice the T is recommended to separate the date and time:
>>> now.isoformat()
'2014-04-17T10:48:23.456000'

Open in new window

But you may not want the fraction of seconds. You can use the strftime:
>>> now.strftime("%Y-%m-%dT%H:%M:%S")
'2014-04-17T10:48:23'

Open in new window

Sometimes more technical (dense) format is used:
>>> now.strftime("%Y%m%dT%H%M%S")
'20140417T104823'

Open in new window

Now for the following line:
csv_check_doppioni = csv.reader(file('doppioni.csv'))

Open in new window

It is tempting to use file as the file-object constructor (especially if you know say C++). However, the open function was always and is recommended. There is at least one serious reason. Python 3 does not have the file class at all.

Secondly, Python 2 documentation says that you have to open the file in binary mode when used by the csv module. Thirdly, the open file should always be closed. You know the with construct (as you show below) that ensures closing the object created using the with construct. Then the way to open CSV file should be:
with open('doppioni.csv', 'rb') as csv_input_file, \
        open("da_importare"+timestamp+".csv", "ab") as csv_output_file:
    reader = csv.reader(csv_input_file)
    writer = csv.writer(csv_output file)

    ...
    for row in reader:
        ...
        row = [element.strip() for element in row]   # strip all the elements (instead of .rstrip().lstrip()
        ...
        writer.writerow(row)

Open in new window

Now for construction of the csv_fields dictionary. You know the column names. I will simulate the row containing the row where elements are strings with leading and trailing spaces like this:
>>> row = ['  ' + str(e) + '  ' for e in range(14)]
>>> row
['  0  ', '  1  ', '  2  ', '  3  ', '  4  ', '  5  ', '  6  ', '  7  ', '  8  '
, '  9  ', '  10  ', '  11  ', '  12  ', '  13  ']

Open in new window

You can define outside the loop:
>>> columns = ["cognome", "nome", "riferimento", "carica", "note", "cellulare",
"cellulare2", "telefono", "telefono2", "fax", "azienda", "email", "indirizzo"]

Open in new window

You can put the two togeter using the standard zip function it returns a list of tuples where first element of the tuple is from the first list, the second element is from second list:
>>> zip(columns, row)
[('cognome', '  0  '), ('nome', '  1  '), ('riferimento', '  2  '), ('carica', '
  3  '), ('note', '  4  '), ('cellulare', '  5  '), ('cellulare2', '  6  '), ('t
elefono', '  7  '), ('telefono2', '  8  '), ('fax', '  9  '), ('azienda', '  10
 '), ('email', '  11  '), ('indirizzo', '  12  ')]

Open in new window

If you want to construct the dictionary with stripped values, you can use a dictionary comprehension (called also dictionary display):
>>> csv_fields = {t[0]:t[1].strip() for t in zip(columns, row)}
>>> csv_fields
{'fax': '9', 'indirizzo': '12', 'riferimento': '2', 'nome': '1', 'cognome': '0',
 'telefono2': '8', 'note': '4', 'azienda': '10', 'carica': '3', 'cellulare': '5'
, 'telefono': '7', 'email': '11', 'cellulare2': '6'}

Open in new window


The %s in the SQL after the LIKE should probably be wrapped in quotes.
0

Experts Exchange Solution brought to you by

Your issues matter to us.

Facing a tech roadblock? Get the help and guidance you need from experienced professionals who care. Ask your question anytime, anywhere, with no hassle.

Start your 7-day free trial
ltpittAuthor Commented:
Thank you for your precious time and priceless skill
0
It's more than this solution.Get answers and train to solve all your tech problems - anytime, anywhere.Try it for free Edge Out The Competitionfor your dream job with proven skills and certifications.Get started today Stand Outas the employee with proven skills.Start learning today for free Move Your Career Forwardwith certification training in the latest technologies.Start your trial today
Python

From novice to tech pro — start learning today.