Link to home
Create AccountLog in
Avatar of Gani tpt
Gani tpt

asked on

unable to read specific text from text file using python

I writing python program which read specific text from text file.

The program is working fine if it is static line index.

if it is dynamic search in the whole document, i am unable to read..

particularly, i want to get "Join Date","Relieve Date",etc. along with 3 main parameter(EmpNo,Name,Salary).

How to change the below code..?

Code attached in the below link

Sample TXT File https://1drv.ms/t/s!AiSRcgO5FUmNeTflVF53ZBkuv8Y
TEST_StackOverflow.txt
Avatar of HonorGod
HonorGod
Flag of United States of America image

import glob

def process_file( file_name ) :
    print 'process_file( "%s" )' % file_name
    items = {
              'Staff No'     : None
            , 'Name'         : None
            , 'Department'   : None
            , 'Join Date'    : None
            , 'Relieve Date' : None
            } 
    with open( file_name ) as f:
        data = f.readlines() 
    linenum = 0
    for line in data : 
        linenum += 1
        print '%2d: "%s"' % ( linenum, line.strip() )
        for item in items.keys() :
          pos = line.find( item )
          if not items[ item ] and pos > -1 :
              val = line[ pos + len( item ) + 2 : pos + len( item ) + 15 ].strip()
              tab = val.find( '\t' )
              if tab > -1 :
                val = val[ : tab ].strip()
              items[ item ] = val
    return items
              
pattern = "*.txt"
files = glob.glob( pattern ) 
              
print "files:", files

# all_data = [ process_file( file_name ) for file_name in files ]
for file_name in files :
    print process_file( file_name )

Open in new window




Sample output:
----------------------------------------------------------------------------------------------------
files: ['sample.txt']
process_file( "sample.txt" )
 1: "Please find below emp details,"
 2: ""
 3: "EMP DETAILS"
 4: "________________________________"
 5: ""
 6: "Emp No: 101       Name: RASUL K       SAL: 30000"
 7: "________________________________"
 8: ""
 9: "Join Date: 10 JAN 2010             From Native: First"
10: "Relieve Date: 25 APR 2018       To Native: Second"
11: "EmpRole: Manager"
12: "No.ofProjects handled: 10"
{'Department': None, 'Staff No': None, 'Relieve Date': '25 APR 2018', 'Name': 'RASUL K', 'Join Date': '10 JAN 2010'}
Avatar of Gani tpt
Gani tpt

ASKER

Thanks and this my exact solution.

Getting Error below.

File "<ipython-input-9-9b299f36369a>", line 4
    print 'process_file( "%s" )' % file_name
 ^
SyntaxError: invalid syntax


Also my output should come below and it will update into MS DB table.

EmpNo      Name      SAL         JoinDate               ReleiveDate
----------      --------      ------         -------------             ------------------
101             Rasul K   30000     10 JAN 2010        25 APR 2018   (First Text File)
102             Raj M      50000     20 FEB 2008        10 MAY 2015    (Second  Text File)


Finall y i composed below code. But, it's getting Error.


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-7-7017acad479c> in <module>()
     33 pattern = 'C:/Test/Python/Gan/*.txt'
     34 files = glob.glob(pattern)
---> 35 all_data = [process_file(file_name) for file_name in files]
     36 server = 'SIPSVA127'
     37 database = 'TEST'

<ipython-input-7-7017acad479c> in <listcomp>(.0)
     33 pattern = 'C:/Test/Python/Gan/*.txt'
     34 files = glob.glob(pattern)
---> 35 all_data = [process_file(file_name) for file_name in files]
     36 server = 'SIPSVA127'
     37 database = 'TEST'

<ipython-input-7-7017acad479c> in process_file(file_name)
     21         linenum += 1
     22         #print '%2d: "%s"' % ( linenum, line.strip() )
---> 23     return parse_line(line)
     24
     25 def write_db(data, server, database, username, password):

<ipython-input-7-7017acad479c> in parse_line(line)
      4 def parse_line(line):
      5     items = {'Staff No', 'Name', 'Department', 'Join Date', 'Relieve Date'}
----> 6     for item in items.keys() :
      7         pos = line.find( item )
      8         if not items[ item ] and pos > -1 :

AttributeError: 'set' object has no attribute 'keys'


My Code  :
---------------
import glob
import pyodbc
 
def parse_line(line):
    items = {'Staff No', 'Name', 'Department', 'Join Date', 'Relieve Date'}
    for item in items.keys() :
        pos = line.find( item )
        if not items[ item ] and pos > -1 :
                val = line[ pos + len( item ) + 2 : pos + len( item ) + 15 ].strip()
                tab = val.find( '\t' )
                if tab > -1 :
                    val = val[ : tab ].strip()
                    items[ item ] = val
    return items
 
def process_file(file_name):
    with open(file_name) as f: # No need to specify 'r': this is the default.
        data = f.readlines()
    linenum = 0
    for line in data :
        linenum += 1
        #print '%2d: "%s"' % ( linenum, line.strip() )
    return parse_line(line)
 
def write_db(data, server, database, username, password):
    connection_string = ('DRIVER={SQL Server Native Client 11.0};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password)
    with pyodbc.connect(connection_string, autocommit=False) as cnxn:
        cur = cnxn.cursor()
        print(data)
        sql = "INSERT INTO Emp (Staffno, EmpName, Department) VALUES (?, ?, ?)"
        cur.executemany(sql, data)
         
pattern = 'D:/Test/Python/Proj/*.txt'     # ==>   TEST FILE  Link (https://1drv.ms/f/s!AiSRcgO5FUmNdbySD6gz4PwTbzk)
files = glob.glob(pattern)
all_data = [process_file(file_name) for file_name in files]
server = 'SERVERXYZ'
database = 'TEST'
username = 'TEST123'
password = 'TEST123'
write_db(data=all_data, server=server, database=database, username=username, password=password) # replace srever, database, password with actual values
anyone can help....?
----------------------------------------
File "<ipython-input-9-9b299f36369a>", line 4
    print 'process_file( "%s" )' % file_name
 ^  

Open in new window

SyntaxError: invalid syntax
----------------------------------------


This means that the "print" statement doesn't have the same indentation as the preceding line.


----------------------------------------
File "<ipython-input-9-9b299f36369a>", line 4
    print 'process_file( "%s" )' % file_name
 ^  
SyntaxError: invalid syntax
----------------------------------------
<ipython-input-7-7017acad479c> in parse_line(line)
      4 def parse_line(line):
      5     items = {'Staff No', 'Name', 'Department', 'Join Date', 'Relieve Date'}
----> 6     for item in items.keys() :
      7         pos = line.find( item )
      8         if not items[ item ] and pos > -1 :

Open in new window

----------------------------------------

AttributeError: 'set' object has no attribute 'keys'


A "set" is a datatype.  You changed the syntax from what was provided (which was a dictionary) to the syntax for a set.
Dictionaries have keys.  So the items.keys() will be valid if items is a dictionary.

Which version of Python are you using?

What is the output of the following command?

python -V


import glob
import pyodbc

def parse_line( line ):
    #---------------------------------------------------------------------------
    # items is a dictionary, each value of which is indexed by a key
    # The items dictionary is initialized so that each value is None.
    #---------------------------------------------------------------------------
    items = { 'Staff No'     : None
            , 'Name'         : None
            , 'Department'   : None
            , 'Join Date'    : None
            , 'Relieve Date' : None
            }
    #---------------------------------------------------------------------------
    # For each dictionary key, see if it exists in the current line
    #---------------------------------------------------------------------------
    for item in items.keys() :
        pos = line.find( item )
        #-----------------------------------------------------------------------
        # "not items[ item ]" is True if the specified dictionary value has not
        # yet been located and extracted from the line.
        #-----------------------------------------------------------------------
        if not items[ item ] and pos > -1 :
            #-------------------------------------------------------------------
            # if pos > -1, then the dictionary key (in the variable "item") was
            # found in the current line.  For example:
            #-------------------------------------------------------------------
            # line = "Emp No: 101       Name: RASUL K       SAL: 30000"
            #         -----+----1----+--^-2-- ^            ^
            #                                 |            |
            # The value we want is from here: +------------+ to here
            # item = "Name"
            # pos  = 18
            # len( item ) = 4
            # pos + len( item ) = 22    <<  This really should be 24!
            # pos + len( item ) + 15    <<  This really should be 37!
            #-------------------------------------------------------------------
#           val = line[ pos + len( item ) + 2 : pos + len( item ) + 15 ].strip()
            val = line[ pos + len( item ) + 4 : pos + len( item ) + 13 ].strip()
            #-------------------------------------------------------------------
            #  This expects the located value to end just before the tab char.
            #-------------------------------------------------------------------
            tab = val.find( '\t' )
            if tab > -1 :
                val = val[ : tab ].strip()
                items[ item ] = val
    return items

def process_file( file_name ):
    #---------------------------------------------
    # The default open type is "read", so there is
    # no need to specify 'r' in the open call.
    #---------------------------------------------
    with open( file_name ) as f:
        data = f.readlines()
    #---------------------------------------------
    # If you really don't want to display the file
    # contents, comment out the whole loop.
    #---------------------------------------------
#   linenum = 0
#   for line in data :
#       linenum += 1
#       print '%2d: "%s"' % ( linenum, line.strip() )
    return parse_line(line)

def write_db(data, server, database, username, password):
    connection_string = ( 'DRIVER={SQL Server Native Client 11.0};SERVER='
                        + server
                        + ';DATABASE='
                        + database
                        + ';UID='
                        + username
                        + ';PWD='
                        + password
                        )
    with pyodbc.connect( connection_string, autocommit=False ) as cnxn:
        cur = cnxn.cursor()
        print( data )
        sql = "INSERT INTO Emp (Staffno, EmpName, Department) VALUES (?, ?, ?)"
        cur.executemany( sql, data )

# ==>   TEST FILE  Link (https://1drv.ms/f/s!AiSRcgO5FUmNdbySD6gz4PwTbzk)
pattern = 'D:/Test/Python/Proj/*.txt'
files = glob.glob( pattern )
all_data = [ process_file( file_name ) for file_name in files ]
server = 'SERVERXYZ'
database = 'TEST'
username = 'TEST123'
password = 'TEST123'
#----------------------------------------
# write_db( data=all_data
#         , server=server
#         , database=database
#         , username=username
#         , password=password
#         ) # replace srever, database, password with actual values
#----------------------------------------

Open in new window

Really you are rocking..and provide the accurate with good explanation.

Below is my answers and doubt

Which version of Python are you using?

3.7.0 . ( Currently I am using Jupyter notebook for development) ---> Anyohter alternative tool is there for easy debugging...?

Can you give some explanation about the below comments.

  # line = "Emp No: 101       Name: RASUL K       SAL: 30000"
            #         -----+----1----+--^-2-- ^            ^
            #                                 |            |
            # The value we want is from here: +------------+ to here
            # item = "Name"
            # pos  = 18
            # len( item ) = 4
            # pos + len( item ) = 22    <<  This really should be 24!
            # pos + len( item ) + 15    <<  This really should be 37

Doubts
----------
# pos  = 18  --> ?  How are we identify the exact item in each line. can you give for pos=18 for name..?

# len( item ) = 4 ---> is this for name ...?

     # pos + len( item ) = 22    <<  This really should be 24!  .....> Is this for which item...?
     # pos + len( item ) + 15    <<  This really should be 37  ....> Is this for which item...?
Formatted and Execute the below code. But getting error.

Output and Error
-------------------------
1 Please find below emp details,
2
3
4 EMP DETAILS
5 ________________________________
6
7 Staff No: 101             Name: RASUL L    SAL: 30000
8 ________________________________
9
10 Join Date:       10 JAN 2010      From Native:  First
11 Releave Date:       25 APR 2018      To Native:  Second
12 Emp Designation: Manager
13 No. of Projects handled:      10
14
15
1 Please find below emp details,
2
3
4 EMP DETAILS
5 ________________________________
6
7 Staff No: 102             Name: KUMAR L    SAL: 50000
8 ________________________________
9
10 Join Date:       10 JAN 2010      From Native:  First
11 Releave Date:       25 APR 2018      To Native:  Second
12 Emp Designation: Manager
13 No. of Projects handled:      10
14
15
[{'Staff No': None, 'Name': None, 'SAL': None, 'Join Date': None, 'Relieve Date': None}, {'Staff No': None, 'Name': None, 'SAL': None, 'Join Date': None, 'Relieve Date': None}]
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-13-2f8f19bdc257> in <module>()
     88 username = 'TEST123'
     89 password = 'TEST123'
---> 90 write_db(data=all_data, server=server, database=database, username=username, password=password) # replace srever, database, password with actual values

<ipython-input-13-2f8f19bdc257> in write_db(data, server, database, username, password)
     79         print( data )
     80         sql = "INSERT INTO test1 (Staffno, EmpName, SAL, JoinDate, RelieveDate) VALUES (?, ?, ?, ?, ?)"
---> 81         cur.executemany( sql, data )
     82
     83 pattern = 'D:/Test/Python/Proj/*.txt'

TypeError: ('Params must be in a list, tuple, or Row', 'HY000')


Source Code
-------------------
import glob
import pyodbc

def parse_line( line ):
    #---------------------------------------------------------------------------
    # items is a dictionary, each value of which is indexed by a key
    # The items dictionary is initialized so that each value is None.
    #---------------------------------------------------------------------------
    items = { 'Staff No'     : None
            , 'Name'         : None
            , 'SAL'          : None
            , 'Join Date'    : None
            , 'Relieve Date' : None
            }
    #---------------------------------------------------------------------------
    # For each dictionary key, see if it exists in the current line
    #---------------------------------------------------------------------------
    for item in items.keys() :
        pos = line.find( item )
        #-----------------------------------------------------------------------
        # "not items[ item ]" is True if the specified dictionary value has not
        # yet been located and extracted from the line.
        #-----------------------------------------------------------------------
        if not items[ item ] and pos > -1 :
            #-------------------------------------------------------------------
            # if pos > -1, then the dictionary key (in the variable "item") was
            # found in the current line.  For example:
            #-------------------------------------------------------------------
            # line = "Emp No: 101       Name: RASUL K       SAL: 30000"
            #         -----+----1----+--^-2-- ^            ^
            #                                 |            |
            # The value we want is from here: +------------+ to here
            # item = "Name"
            # pos  = 18
            # len( item ) = 4
            # pos + len( item ) = 22    <<  This really should be 24!
            # pos + len( item ) + 15    <<  This really should be 37!
            #-------------------------------------------------------------------
            #val = line[ pos + len( item ) + 2 : pos + len( item ) + 15 ].strip()
            val = line[ pos + len( item ) + 4 : pos + len( item ) + 13 ].strip()
            #-------------------------------------------------------------------
            #  This expects the located value to end just before the tab char.
            #-------------------------------------------------------------------
            tab = val.find( '\t' )
            if tab > -1 :
                val = val[ : tab ].strip()
                items[ item ] = val
    return items

def process_file( file_name ):
    #---------------------------------------------
    # The default open type is "read", so there is
    # no need to specify 'r' in the open call.
    #---------------------------------------------
    with open( file_name ) as f:
        data = f.readlines()
    #---------------------------------------------
    # If you really don't want to display the file
    # contents, comment out the whole loop.
    #---------------------------------------------
    linenum = 0
    for line in data :
        linenum += 1
        print( linenum, line.strip() )
    return parse_line(line)

def write_db(data, server, database, username, password):
    connection_string = ( 'DRIVER={SQL Server Native Client 11.0};SERVER='
                        + server
                        + ';DATABASE='
                        + database
                        + ';UID='
                        + username
                        + ';PWD='
                        + password
                        )
    with pyodbc.connect( connection_string, autocommit=False ) as cnxn:
        cur = cnxn.cursor()
        print( data )
        sql = "INSERT INTO test1 (Staffno, EmpName, SAL, JoinDate, RelieveDate) VALUES (?, ?, ?, ?, ?)"
        cur.executemany( sql, data )

pattern = 'C:/Test/Python/Proj/*.txt'
files = glob.glob( pattern )
all_data = [ process_file( file_name ) for file_name in files ]
server = 'SERVERXYZ'
database = 'TEST'
username = 'TEST123'
password = 'TEST123'
write_db(data=all_data, server=server, database=database, username=username, password=password) # replace srever, database, password with actual values

Open in new window

Part of the reason for the error is that I wrote it using Python 2.7 syntax, not Python 3.0 syntax.
# line = "Emp No: 101       Name: RASUL K       SAL: 30000"
#         -----+----1----+--^-2-- ^            ^   
#                                 |            |   
# The value we want is from here: +------------+ to here
# item = "Name"
# pos  = 18
# len( item ) = 4 
# pos + len( item ) = 22    <<  This really should be 24! 
# pos + len( item ) + 15    <<  This really should be 37

Open in new window


  Strings have a collection of methods, one of which (i.e., find() )
can be used to locate the position (pos) in the string of a specified
sequence of characters.  The nice thing about this method is that if
the specified sequence of characters isn't present, the method returns
-1,

  So, the value of item is one of the "words" for which we are searching.
As shown in the example, the current value is "Name".  The dashed line
below the literal string value is used to display the offsets of each
character in the string.  The first character is at offset, or position,
zero.  The "1" is used to help easily identify offset 10.  The "2" is
used to easily identify offset 20.  The "^" before the "2" is used to
identify the reason why the value of pos is 18.  The substring "Name"
occurs at offset 18, and ends 3 characters later.

  Here is an interactive Python session to show this more clearly.

$ python
Python 2.7.15 (v2.7.15:ca079a3ea3, Apr 29 2018, 20:59:26)
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> line = "Emp No: 101       Name: RASUL K       SAL: 30000"
>>> line.find( "Name" )
18
>>> line[ 18:22 ]
'Name'
>>> line[ 24:37 ]
'RASUL K      '
>>> line[ 24:37 ].strip()
'RASUL K'
>>>

TypeError                                 Traceback (most recent call last)
<ipython-input-13-2f8f19bdc257> in <module>()
     88 username = 'TEST123'
     89 password = 'TEST123'
---> 90 write_db(data=all_data, server=server, database=database, username=username, password=password) # replace srever, database, password with actual values

Indicates that the call to write_db() isn't correct.  Check the documentation for this routine, specifically the parameter, or argument list, to determine what is wrong with your statement.

<ipython-input-13-2f8f19bdc257> in write_db(data, server, database, username, password)
     79         print( data )
     80         sql = "INSERT INTO test1 (Staffno, EmpName, SAL, JoinDate, RelieveDate) VALUES (?, ?, ?, ?, ?)"
---> 81         cur.executemany( sql, data )
     82
     83 pattern = 'D:/Test/Python/Proj/*.txt'

TypeError: ('Params must be in a list, tuple, or Row', 'HY000')

The same can be said about this error as well.  Check the documentation for the executemany( ) method.
Can you pls. give the modified complete code with working condition.. Because i am getting all values output are none..?
I don't have the the database module.  What input files are you using?
It is text file.

pls. refer attached sample docs with this or the below link for sample text documents.

(https://1drv.ms/f/s!AiSRcgO5FUmNdbySD6gz4PwTbzk)

My MS-SQL DB table columns as below

Staff No
Name
SAL
Join Date
Relieve Date


final output should come below in db table (As per text file)

EmpNo            Name             SAL               JoinDate          RelieveDate
101                      RASUL K             30000      10 JAN 2010          25 APR 2018
102                       RAJ M             50000      20 FEB 2008          10 MAY 2015
Sample-File1.txt
Sample-File2-.txt
Part of your problem is the fact that your data (i.e., Sample Files)
are not well formed.

1. They include tab characters.  For example:

Sample File1.txt - with tab characters (i.e., '\t') changed to "^I"
--------------------------------------------------------------------------------
Please find below emp details,


EMP DETAILS^I
________________________________

 Emp No: 101  Name:RASUL K    SAL: 30000^I
________________________________

Join Date:^I 10 JAN 2010      From Native:  First ^I
Releave Date:^I 25 APR 2018      To Native:  Second ^I
Emp Designation: Manager ^I
No. of Projects handled:^I10


--------------------------------------------------------------------------------

2. They include typographical errors.  For example:

   "Relieve" is not spelled "Releave".

3. They aren't consistent with the way that values are separated
   from the associated field names.

   For example, the value of "Name" isn't separated from "Name:" with either a
   space, or a tab character.
   Both the value of "Join Date:" and "Relieve Date:" have both a tab and a
   space before the value

4. The fields (e.g., "SAL") aren't always in the same positions (column)

If we correct these errors in the input files, they become:

Sample File1.txt -
--------------------------------------------------------------------------------
Please find below emp details,


EMP DETAILS
________________________________

 Emp No: 101  Name: RASUL K    SAL: 30000
________________________________

Join Date: 10 JAN 2010       From Native:  First
Relieve Date: 25 APR 2018      To Native:  Second
Emp Designation: Manager
No. of Projects handled: 10


--------------------------------------------------------------------------------


Sample File2.txt -
--------------------------------------------------------------------------------
Please find below emp details,


EMP DETAILS
________________________________

 Emp No: 102  Name: RAJ M      SAL: 50000
________________________________

Join Date: 20 FEB 2008       From Native:  First
Relieve Date: 10 MAY 2015      To Native:  Second
Emp Designation: Manager
No. of Projects handled: 15


--------------------------------------------------------------------------------

Sample output:
--------------------------------------------------------------------------------
Processing: Sample File1.txt
 1: "Please find below emp details,"
 2: ""
 3: ""
 4: "EMP DETAILS"
 5: "________________________________"
 6: ""
 7: "Emp No: 101  Name: RASUL K    SAL: 30000"
item = "Name"  pos = 14  line[ 19:31 ] = " RASUL K    "
 8: "________________________________"
 9: ""
10: "Join Date: 10 JAN 2010       From Native:  First"
item = "Join Date"  pos = 0  line[ 10:22 ] = " 10 JAN 2010"
11: "Relieve Date: 25 APR 2018      To Native:  Second"
item = "Relieve Date"  pos = 0  line[ 13:25 ] = " 25 APR 2018"
12: "Emp Designation: Manager"
13: "No. of Projects handled: 10"
14: ""
15: ""
results: {'Name': 'RASUL K', 'Join Date': '10 JAN 2010', 'Relieve Date': '25 APR 2018'}
Processing: Sample File2.txt
 1: "Please find below emp details,"
 2: ""
 3: ""
 4: "EMP DETAILS"
 5: "________________________________"
 6: ""
 7: "Emp No: 102  Name: RAJ M      SAL: 50000"
item = "Name"  pos = 14  line[ 19:31 ] = " RAJ M      "
 8: "________________________________"
 9: ""
10: "Join Date: 20 FEB 2008       From Native:  First"
item = "Join Date"  pos = 0  line[ 10:22 ] = " 20 FEB 2008"
11: "Relieve Date: 10 MAY 2015      To Native:  Second"
item = "Relieve Date"  pos = 0  line[ 13:25 ] = " 10 MAY 2015"
12: "Emp Designation: Manager"
13: "No. of Projects handled: 15"
14: ""
15: ""
results: {'Name': 'RAJ M', 'Join Date': '20 FEB 2008', 'Relieve Date': '10 MAY 2015'}
all_data: [{'Name': 'RASUL K', 'Join Date': '10 JAN 2010', 'Relieve Date': '25 APR 2018'},
           {'Name': 'RAJ M', 'Join Date': '20 FEB 2008', 'Relieve Date': '10 MAY 2015'}]
--------------------------------------------------------------------------------

import glob
# import pyodbc

def parse_line( line ):
    #---------------------------------------------------------------------------
    # items is a dictionary, each value of which is indexed by a key
    # The items dictionary is initialized so that each value is None.
    #---------------------------------------------------------------------------
    items = { 'Staff No'     : None
            , 'Name'         : None
            , 'Department'   : None
            , 'Join Date'    : None
            , 'Relieve Date' : None
            }
    #---------------------------------------------------------------------------
    # For each dictionary key, see if it exists in the current line
    #---------------------------------------------------------------------------
    for item in items.keys() :
        pos, L = line.find( item ), len( item )
        #-----------------------------------------------------------------------
        # Does line contain the current item?
        #-----------------------------------------------------------------------
        if not items[ item ] and pos > -1 :
            start, finish = pos + L + 1, pos + L + 13
            print( 'item = "%s"  pos = %d  line[ %d:%d ] = "%s"' % (
                item
              , pos
              , start
              , finish
              , line[ start : finish ]
              ) 
            ) 
            val = line[ start : finish ].strip()
            #-------------------------------------------------------------------
            #  This expects the located value to end just before the tab char.
            #-------------------------------------------------------------------
            tab = val.find( '\t' )
            if tab > -1 :
                val = val[ : tab ].strip()
            items[ item ] = val
    return items
    
def process_file( file_name ):
    print( 'Processing:', file_name )
    #---------------------------------------------
    # The default open type is "read", so there is
    # no need to specify 'r' in the open call.
    #---------------------------------------------
    with open( file_name ) as f:
        data = f.readlines()
    #---------------------------------------------
    # If you really don't want to display the file
    # contents, comment out the whole loop.
    #---------------------------------------------
    linenum = 0
    results = {}
    for line in data :
        linenum += 1 
        print( '%2d: "%s"' % ( linenum, line.strip() ) )
        info = parse_line( line )
        for item in info.keys() :
            if info[ item ] :
                results[ item ] = info[ item ]
    print( 'results:', results )
    return results

# def write_db(data, server, database, username, password):
#     connection_string = ( 'DRIVER={SQL Server Native Client 11.0};SERVER='
#                         + server
#                         + ';DATABASE='
#                         + database
#                         + ';UID='
#                         + username
#                         + ';PWD='
#                         + password
#                         )
#     with pyodbc.connect( connection_string, autocommit=False ) as cnxn:
#         cur = cnxn.cursor()
#         print( data )
#         sql = "INSERT INTO Emp (Staffno, EmpName, Department) VALUES (?, ?, ?)"
#         cur.executemany( sql, data )

# ==>   TEST FILE  Link (https://1drv.ms/f/s!AiSRcgO5FUmNdbySD6gz4PwTbzk)
pattern = '*.txt'
files = glob.glob( pattern )
all_data = [ process_file( file_name ) for file_name in files ]
print( "all_data:", all_data )

# server = 'SERVERXYZ'
# database = 'TEST'
# username = 'TEST123'
# password = 'TEST123'
#----------------------------------------
# write_db( data=all_data
#         , server=server
#         , database=database
#         , username=username
#         , password=password
#         ) # replace srever, database, password with actual values
#----------------------------------------

Open in new window

Thanks a lot. i am trying to print and update only values  in a table like below.
{ '101', 'RASUL L', '30000','10 JAN 2010', '25 APR 2018'}
{'102','RASUL L','30000','25 MAY 2010', '10 JUNE 201'}

but, i am getting error.

Processing: C:/Test/Python/Proj\TEST1.txt
results: {'StaffNo': '101', 'Name': 'RASUL L', 'SAL': '30000', 'JoinDate': '10 JAN 2010', 'ReleaveDate': '25 APR 2018'}
Processing: C:/Test/Python/Gan\TEST2.txt
results: {'StaffNo': '102', 'Name': 'RASUL L', 'SAL': '30000', 'JoinDate': '25 MAY 2010', 'ReleaveDate': '10 JUNE 201'}
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-21-e24fc66c591f> in <module>()
     91 password = 'XXXX'
     92 #----------------------------------------
---> 93 write_db( data=all_data, server=server, database=database, username=username, password=password) # replace srever, database, password with actual values
     94 #----------------------------------------

<ipython-input-21-e24fc66c591f> in write_db(data, server, database, username, password)
     79             #print( data )
     80             sql = "INSERT INTO test1 (Staffno, EmpName, SAL, JoinDate, ReleaveDate) VALUES (?, ?, ?, ?, ?)"
---> 81             cur.executemany( sql, data )
     82
     83 pattern = 'C:/Test/Python/Proj/*.txt'

TypeError: ('Params must be in a list, tuple, or Row', 'HY000')
Sorry, I do not have the pyodbc module, or any type of database.
The error appears to indicate that the values passed to the write_db() routine are not what they are expected / required to be.

If you use the following statements just after "def write_db(...):", I think that you will see that the exception is correct.

    print( 'type( data ):', type( data ) )
    print( data )

Open in new window


  You may have to iterate over the individual values within data in order to do what you want to do...
For example, maybe something like this:

def write_db(data, server, database, username, password):
    print( 'type( data ():', type( data ) )
    print( data )
    connection_string = ( 'DRIVER={SQL Server Native Client 11.0};SERVER='
                        + server
                        + ';DATABASE='
                        + database
                        + ';UID='
                        + username
                        + ';PWD='
                        + password
                        )
    for entry in data :
        print( 'type( entry ):', type( entry ) )
        print( entry )
        with pyodbc.connect( connection_string, autocommit=False ) as cnxn:
            cur = cnxn.cursor()
            print( data ) 
            sql = "INSERT INTO Emp (Staffno, EmpName, Department) VALUES (?, ?, ?)"
            cur.executemany( sql, entry )

Open in new window

sorry for the late response.

i tried the above code. but, getting below error.

---------------------------------------------------------------------------
ProgrammingError                          Traceback (most recent call last)
<ipython-input-13-b678873a90b0> in <module>()
     96 password = 'xxxxxxxx'
     97 #----------------------------------------
---> 98 write_db( data=all_data, server=server, database=database, username=username, password=password) # replace srever, database, password with actual values
     99 #----------------------------------------

<ipython-input-13-b678873a90b0> in write_db(data, server, database, username, password)
     84             print( 'Entry-2 :', entry )
     85             sql = "INSERT INTO Emp (Staffno, EmpName, SAL) VALUES (?, ?, ?)"
---> 86           cur.executemany( sql, entry )
     87
     88 pattern = 'C:/Test/Python/Proj/*.txt'

ProgrammingError: The second parameter to executemany must be a sequence, iterator, or generator.


Sourcecode (DB Part)
-------------------------------
def write_db(data, server, database, username, password):
    #print( 'type( data ):', type( data ) )
    #print( data )
    connection_string = ( 'DRIVER={SQL Server Native Client 11.0};SERVER='
                         + server
                         + ';DATABASE='
                         + database
                         + ';UID='
                         + username
                         + ';PWD='
                         + password
                         )
    for entry in data :
        #print( 'type( entry ):', type( entry ) )
        #print( 'Entry-1 :', entry )
        with pyodbc.connect( connection_string, autocommit=False ) as cnxn:
            cur = cnxn.cursor()
            print( 'Entry-2 :', entry )
            sql = "INSERT INTO Emp (Staffno, EmpName, SAL) VALUES (?, ?, ?)"
            cur.executemany( sql, entry )


Note : the final entry value should come only values. But now, it will come key and values pair (ex : {StaffNo : 100 ; EmpName : Rasul L, SAL : 30000)

But, i want only values (Ex. { 101,Rasul,30000)
(102,Raj k, ,5000)
etc...
ASKER CERTIFIED SOLUTION
Avatar of HonorGod
HonorGod
Flag of United States of America image

Link to home
membership
Create a free account to see this answer
Signing up is free and takes 30 seconds. No credit card required.
See answer
Thanks for your extremely helpful...I really felt down your approaches and great solution..

We need some technically motivated people like you..appreciate you again...

Note : we have many documents like text, image, pdf,etc. which we want to read text / handwritten text from those documents...

pls. suggest and guide  how to  read hand written text/ normal text from those doc's
SOLUTION
Link to home
membership
Create a free account to see this answer
Signing up is free and takes 30 seconds. No credit card required.
great buddy....excellent...