Solved

last.c - function parseLine(char *line)

Posted on 1997-04-27
11
585 Views
Last Modified: 2012-06-21
I got this function, parseLine(char *line) from expert,
'LucHoltkamp' and I still do not understand it fully.
I am trying to filter out ONLY those words in a C source
code file which are user defined and print them out in a
report:

printf 43, 239, 332 /* occurances of printf on these line numbers. */

I SHALL be PURCHASING points this time around.

I start the offer at 100 points. If it works,
it gets an instant 'A' from me.

MY QUESTION IS:

HOW should this function WORK?

HERE'S the code. I've already run a file named:PARSE1.C
through a program to produce the file named:PARSE1.OUT,
with line numbers prefixed.

Please pay particular attention to the function:parseLine()
It's not working properly, I don't know why.

-----------------------------------------------------------
/* LAST.C */

/* Last updated or modified by Jim Nowlin: 4/27/97 */

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <malloc.h>

#define MAXARRAY 80

#define LT(x,y) (strcmp((x), (y)) < 0)
#define GT(x,y) (strcmp((x), (y)) > 0)

#define TRUE 0
#define FALSE 1


typedef struct tnode {
      char *key;
      struct tnode *left, *right;
} TREE_NODE;



static int inComment = 0;
char file_buffer[MAXARRAY], buffer[MAXARRAY];
TREE_NODE *root, *second_root;

/* FUNCTION PROTOTYPES */

void parseLine(char *line);
void free_node_mem(TREE_NODE *p);
TREE_NODE *addtree(TREE_NODE *p, char *w);
char *strdup(char *w);
TREE_NODE *talloc(void);
TREE_NODE* TreeSearch(TREE_NODE *root, char* word);
TREE_NODE *Newtree(TREE_NODE *p, char *w);
void visit_node(TREE_NODE *p);


int main(void)
{

FILE *infp, *check_file;


if ((infp = fopen("reserved.txt","r")) == NULL) {
     printf("Error in input file name.\n");
     exit(1);
     }

        while( fgets(file_buffer,MAXARRAY , infp)  != NULL ) {
     file_buffer[strlen(file_buffer) - 1] = '\0';
     printf("%s\n",file_buffer);
     root = addtree(root, file_buffer);
     }




     if ((check_file = fopen("parse1.out","r")) == NULL) {
     printf("Error in input file name.\n");
     exit(1);
     }

     while( fgets(buffer,MAXARRAY , check_file)  != NULL ) {
     parseLine(buffer);
     /* buffer[strlen(buffer) - 1] = '\0'; */
     /* printf("%s",file_buffer); */

     }
      free_node_mem(root);

return 0;
}  /* end main */


/* PARSE each and every line of the file, bypassing spaces, words within
      comments, escape character, '/', and others.
*/


void parseLine(char *line)
{
int i=0;

while (line[i])
   {
      if (inComment == TRUE) /* are we still in a comment ?? */ {


/* while we are at the beginning of a comment..... */

 while (line[i] && line[i] != '/' && line[i+1] != '*') ++i; /* start of comment */
 if (line[i]) { i+=2; inComment = 0; }
      } /* end if (inComment) */

/* when we reach, in a line, the ending comment symbols, '/*', .......
   inComment is assigned FALSE
*/
      else if (line[i] == '*' && line[i+1] == '/') /* end of comment ? */
      {
            inComment = FALSE; i+=2;
      }

      else if (isalpha(line[i]) || line[i] == '_') /* is this the start of word? */
      {

 char word[80]; int j = 0;

 /* create the user-defined word */
  while ( (line[i]) && ( isalpha(line[i]) || isdigit(line[i]) || line[i] == '_') )
 word[j++] = line[i++];
 word[j] = '\0';

 /* check to see if word is a C reserved word. If not, add word to Newtree */
 if (!TreeSearch(root,word)) {
 second_root =  Newtree(second_root, word);
 }

 else if (line[i] == '"') /* is this a string?  We don't WANT strings.  */
  ++i;


/* while line[i] is NOT a string, trap for the '\\' escape character sequence. */

 while (line[i] && line[i] != '"')
     {
      if (line[i] == '\\') ++i; /* Watch for special escape char HERE !! */
      ++i;
     }
 }

 else if (line[i] == '\'') /* start of tab, newline character, etc. ?? */
 {
 ++i;
      while (line[i] && line[i] != '\'')
        {
            if (line[i] == '\\')
            ++i; /* Watch special escape char !! */

            ++i;
        }  /* end while (line[i] && line[i] != '\'')  */

 } /* end else-if */


/* This next else-if is an attempt to save the line numbers which have
   been prefixed to each line of the file using another program.
*/

 else if ( isdigit(line[i]) /* line numbers 1-9 */
 || ( isdigit(line[i]) && isdigit(line[i++]))/*line numbers 10-99*/
 || ( isdigit(line[i]) && isdigit(line[i++]) && isdigit(line[i+2])/*line numbers 100-999*/)  ) /* start of number ?? */
{
char line_number[4];
while ( isdigit(line[i]) ) {
         /* SAVE THE LINE NUMBER */
strcpy (line_number, line);
line++;
}
 /* Left as a practice for you, remember you have to filter out all
possibilities
 like 0xA23F or 1e-3. (You must avoid the loop to recognize
 the x, e, A or F as a word) */
 }
 else ++i; /* skip everything else */
   }  /* end while */
} /* end parseLine(*w) */


/* This function, addtree(*p,*w), is designed to create a binary search
   tree and add all of the reserved words in C, all 32 of them, into
   the tree. It is called just after fgets reads a line from a file
   named: RESERVED.TXT. RESERVED.TXT contains, in Unsorted order, one
   reserved word on each line at a time, the 32 reserved words in C.
*/


TREE_NODE *addtree(TREE_NODE *p, char *w)
{
  int cond;

  if (p == NULL) {
      p= talloc();      /* Make a new node. */
      p->key = strdup(w);
      p->left = p->right = NULL;

/* Check to see if the reserved word is already in the tree. */

  } else if ((cond = strcmp(w, p->key)) == 0) {
      printf("Error: key \"%s\" already in tree structure", w);
      exit(-1);
    }

/* If the word is NOT already in the tree, go to the left OR right subtree. */

  else if (cond < 0) /* go into left subtree. */
      p->left = addtree(p->left, w);
  else               /* go into right subtree. */
      p->right = addtree(p->right, w);

  return p;
}  /* End addtree. */



/* The function strdup(*w) copies the string w to a safe place in memory, return the location
   pointer to caller. strdup allocates the EXACT memory needed, no more,
   no less. */


char *strdup(char *w)


{
  char *p;

  p = (char *) malloc(strlen(w)+1); /* 1 added for null terminator. */
  if (p != NULL)
      strcpy(p, w);
  return p;
}


/* The functon talloc(void) is as follows. It creates, dynamically,
   a struct of type TREE_NODE, as the number of nodes which NEED
   to be created warrants.
*/



TREE_NODE *talloc(void)

/* Cast pointer , return NULL if malloc fails. */

{

  return (TREE_NODE *) malloc(sizeof(TREE_NODE));
}


/* This function, free_node_mem(*p), makes a call to free. free takes
   the pointer to the type TREE_NODE as its argument.
*/


void free_node_mem(TREE_NODE *p)
{
  free(p);
}


/* This function, TreeSearch( root, word ), was copied from the class
   textbook, "Data Structures and Program Design in C", 2nd edition,
   by Robert Kruse, C.L. Tondo and Bruce Leung. It utilizes 2 macros
   to determine WHERE in the established BINARY SEARCH TREE a string
   passed to the function is. It is determined in main() whether
   the string passed is IN the tree or not.
*/

TREE_NODE* TreeSearch(TREE_NODE *root, char* word)
{

             if(root)
            if (LT(word, root->key))
                  root = TreeSearch(root->left, word);
            else if (GT(word, root->key))
                  root = TreeSearch(root->right, word);

            return (root);

} /* end TreeSearch */




/* Newtree(*p,*w) is designed to create a NEW binary tree for the user
   defined words AND the line numbers. This function is called from within
   the function parseLine(*s) after a test to see if 'word' is in fact one
   of the 32 reserved words in C.
*/


TREE_NODE *Newtree(TREE_NODE *p, char *w)
{
  int cond;

  if (p == NULL) {
      p= talloc();      /* Make a new node. */
      p->key = strdup(w);
      p->left = p->right = NULL;


  }

/* If the word is NOT already in the tree, go to the left OR right subtree. */

  if ( (cond = strcmp(w, p->key) <  0  ) ) /* go into left subtree. */
      p->left = Newtree(p->left, w);
  else if ( (cond = strcmp(w, p->key) >  0 ) )            /* go into right subtree. */
      p->right = Newtree(p->right, w);

      /* if they are equal, send it to the left subtree. */
      else
       p->left = Newtree(p->left, w);

  return p;
}  /* End Newtree. */


/* inorder_treeprint - use in-order traversal of tree p */
void inorder_treeprint(TREE_NODE *p)
{
  if (p != NULL) {
    inorder_treeprint(p->left);
    visit_node(p);
    inorder_treeprint(p->right);
  }
  free_node_mem(p);
}  /* End inorder_treeprint. */


/* Function visit_node(*p), takes a pointer to a TREE_NODE, p,
   and outputs the string of each of the nodes it 'visits'
   when it is called to do so.
*/


void visit_node(TREE_NODE *p)
{
 printf("%s\n", p->key);


----------------------------------------------------------
PARSE1.OUT

1 #include <stdio.h>
2
3 int integer;
4
5 char character;
6
7 double doubled;
8
9 /* Comment is added here. */
10
11 float check_sum_total( float one, int two);
12
13 int main(void)
14 {
15
16 float first = 0.0, second = 0.0;
17
18 check_sum_total( first, second);
19
20 return 0;
21
22 }
23
24 float check_sum_total( float one, int two)
25 {
26
27 float hold;
28
29 hold = one + two;
30
31 return (hold);
32 }


THANK YOU.

Jim
0
Comment
Question by:jnowlin
  • 3
  • 3
  • 2
  • +2
11 Comments
 

Expert Comment

by:gambito042797
ID: 1250133
Hi,

first of all you have to change the definitions for the constants TRUE to 1 and FALSE to 0, because in this way they lead to less confussion. After this you have to change the beginning two ifs in your ParseLine function as indicated below:

You have to put the TRUE, FALSE and inComment in its rigth positions, I include them where you'll see only for remember you to change its values.

I've only corrected the beginning two ifs because the rest seems to be OK, but if it fails, please let me know. Hope this helps you.


#define TRUE 1
#define FALSE 0
               
               
static int inComment = FALSE;
               

void parseLine(char *line)
{
int i=0;

while (line[i])
{

   if (inComment == TRUE) /* are we in a comment ?? */ {

      /* wE SKIP ALL THE COMMENT ..... */
      while (line[i] && (line[i] != '*' || line[i+1] != '/')) ++i; /* END of comment */      

      if (line[i]) /* if don't reached the end of file */
          { i+=2; inComment = FALSE; }
   } /* end if (inComment) */

   /* when we reach, in a line, the BEGINNING comment symbols, '/*', .......
   inComment is assigned TRUE */
   
   else if (line[i] == '/' && line[i+1] == '*') /* beginning of comment ? */
   {
      inComment = TRUE; i+=2;
   }

   else if (isalpha(line[i]) || line[i] == '_') /* is this the start of word? */
   {

      char word[80]; int j = 0;

      /* create the user-defined word */
      while ( (line[i]) && ( isalpha(line[i]) || isdigit(line[i]) || line[i] == '_') )
      word[j++] = line[i++];
      word[j] = '\0';

      /* check to see if word is a C reserved word. If not, add word to Newtree */
      if (!TreeSearch(root,word)) {
         second_root = Newtree(second_root, word);
      }

      else if (line[i] == '"') /* is this a string? We don't WANT strings. */
            ++i;


      /* while line[i] is NOT a string, trap for the '\\' escape character sequence. */

      while (line[i] && line[i] != '"')
      {
         if (line[i] == '\\') ++i; /* Watch for special escape char HERE !! */
            ++i;
      }
   }

   else if (line[i] == '\'') /* start of tab, newline character, etc. ?? */
   {
      ++i;
      while (line[i] && line[i] != '\'')
      {
         if (line[i] == '\\')
            ++i; /* Watch special escape char !! */

         ++i;
      } /* end while (line[i] && line[i] != '\'') */

   } /* end else-if */


   /* This next else-if is an attempt to save the line numbers which have
   been prefixed to each line of the file using another program.
   */

   else if ( isdigit(line[i]) /* line numbers 1-9 */
   || ( isdigit(line[i]) && isdigit(line[i++]))/*line numbers 10-99*/
   || ( isdigit(line[i]) && isdigit(line[i++]) && isdigit(line[i+2])/*line numbers 100-999*/) ) /* start of number ?? */
   {
      char line_number[4];
      while ( isdigit(line[i]) ) {
         /* SAVE THE LINE NUMBER */
         strcpy (line_number, line);
         line++;
      }
      /* Left as a practice for you, remember you have to filter out all
      possibilities
      like 0xA23F or 1e-3. (You must avoid the loop to recognize
      the x, e, A or F as a word) */
   }
   else ++i; /* skip everything else */
} /* end while */
} /* end parseLine(*w) */

 
0
 
LVL 3

Expert Comment

by:LucHoltkamp
ID: 1250134
Hi jnowlin,
I was browsing the questions and checked this one out because it was a familiar function (parseLine(char*)).
When I look at your code I agree with gambito, you should use 1 for TRUE and 0 for FALSE, and there is a problem with the first if statements.
But furthermore:
* you forgot (or placed wrongly) parenthesis somewhere. (You made
  an error copying the code I've written)
* you cannot test for your linenumberdigits IN the while-loop.
  The while loop can also encounter digits somewhere inside the
  line, and this would go wrong.
* I advice you to do the parsing first, and afterwards add the
  linenumbers

I can understand that you made an error copying my code, because all the indentions in the code are gone in experts-exchange.
If you send me an email with you emailadress I send you the function again. (Including a little exampleprogram) I've added a numbercheck too (the piece I left you as a practice) and I expanded it so that it skip's #define.

Luc Holtkamp (email: lholtkam@plex.nl)
0
 

Expert Comment

by:gambito042797
ID: 1250135
LucHoltkamp...Excuse me please if I've put my fingers in a previous initiated item. Simply I've seen the item and enjoied by solving it.
0
 

Expert Comment

by:gambito042797
ID: 1250136
jnowlin, please, reject the answer and give the points to LucHoltkamp, thanks, next time I'll look better at the history.
0
 

Author Comment

by:jnowlin
ID: 1250137
Hello Luc Holtkamp!
Hello gambito!

I don't know who this is going to, but I am rejecting
it from gambito in order to award the points to LucHoltkamp, since it was LucHoltkamp's code from the start.

I don't know why the code came messed up. I guess that when
it got placed into the HTML file at Experts Exchange, it
was fouled up there.

My name is Jim Nowlin.
My email address is: jnowlin@ma.ultranet.com

I don't do coding for a living. I only get involved as I dabble with it in my spare time or, as in the case of the last 13 weeks,
in an academic setting. So the Experts Exchange is an invaluable
tool for radiation safety specialists such as myself.

Another expert ( and our professor hinted at tokens ) suggested
I use strtok(). srttok() is a BORLAND-ONLY function, it is NOT, that I'm aware of, ANSI. I think I should stick with Luc's because I've been trying to adhere to ANSI specs.

I appologize if this transmission is duplicated or MORE.

Jim Nowlin

jnowlin@ma.ultranet.com
0
How to run any project with ease

Manage projects of all sizes how you want. Great for personal to-do lists, project milestones, team priorities and launch plans.
- Combine task lists, docs, spreadsheets, and chat in one
- View and edit from mobile/offline
- Cut down on emails

 

Author Comment

by:jnowlin
ID: 1250138
Hello Luc Holtkamp!
Hello gambito!

I don't know who this is going to, but I am rejecting
it from gambito in order to award the points to LucHoltkamp, since it was LucHoltkamp's code from the start.

I don't know why the code came messed up. I guess that when
it got placed into the HTML file at Experts Exchange, it
was fouled up there.

My name is Jim Nowlin.
My email address is: jnowlin@ma.ultranet.com

I don't do coding for a living. I only get involved as I dabble with it in my spare time or, as in the case of the last 13 weeks,
in an academic setting. So the Experts Exchange is an invaluable
tool for radiation safety specialists such as myself.

Another expert ( and our professor hinted at tokens ) suggested
I use strtok(). srttok() is a BORLAND-ONLY function, it is NOT, that I'm aware of, ANSI. I think I should stick with Luc's because I've been trying to adhere to ANSI specs.

I appologize if this transmission is duplicated or MORE.

Jim Nowlin

jnowlin@ma.ultranet.com
0
 

Expert Comment

by:jfbe
ID: 1250139
OK, first of all I didn't catch entirelly what was your purpose
but few comments helped me to understand a bit...

 One obvious problem is that you have a condition:

  while (line[i] && line[i]!='/' && line[i+1]!='*') i++;

  your purpose here seems to detect and discard something within a comment: the logic is bad, if you have  a=12/3  than your code
will detect the begining of a comment, or even if you have a
multiplication also. you should write:
if (line[i]=='/' && line[i+1]=='*')
{
   /* begining of a comment, do what you want
}  

  A small advice: your code is hardly lisible, the indentation may be due to the fact that you imported a file for an email,
but you should have only ONE loop where i is increment at one place, andall your flags inside: beginComent(false or true), beginstring (false or true) etc... This would be much simpler to read!!!

A second problem that I saw is that you wrote somewhere
else if(line[i]==""") /* is this a string
   ++i

 you seems to want to discard strings... Up to you, you're in a free country, but your else if before is written after a !treesearch condition, so I suspect that this else will
never be executed since your top condition before was to go
there only if the string started with a isdigit char... in other words, your else if line[i]=="""" is imbricated at a level wich is not the right one. I'm not even sure that you wrote a """ rather than a '"', or perhaps a '\"', I've not my books with me.
Well, there may be other errors, but there's at least 2 logical buggs that I detected and I stopped there: please, rewrite it
with a single loop if you want to understand your own code, probably you won't need any help in such a case...

 Jean-Francois

0
 

Expert Comment

by:jfbe
ID: 1250140
Well, I've just reread your source, this first line about a comment, it seems you want to reach the begining of a comment
and flush anything that is before (don't think it was your purpose) but it is the only thing it could do...But it is inside
a if (incomment==true) while you incomment seems to never be assigned with a true value nowhere...Anyway, as I said, one loop is enough.
0
 
LVL 2

Expert Comment

by:Philippe
ID: 1250141

Jnowlin,

If you need to parse things, you should use the right tools. In your case I would use lex, which generates the parse function from a short description of the language. It should take no more than 20 lines to do your parser with lex and it would be much more readable and reliable. Lex exists on all unix system and most certainly also for windows/dos.

Also, to parse a string, you would be better of using the strtok function from the standard lib, which can return the next substring not containing some delimitors that you specify.

I'd be happy to do the lex version of your parser if you put a question up for it.

 cheers,

   Philippe

0
 
LVL 3

Accepted Solution

by:
LucHoltkamp earned 100 total points
ID: 1250142
To All,

For those that were reading this, the comment part did have a bug in it, it should be
if (line[i] && (line[i] != '*' || line[i] != '/')) and not
if (line[i] && (line[i] != '*' && line[i] != '/'))
sorry for that.
Yes, code-indentions get mangled in experts-exchange. So I'll send jnowlin a email with a simple but working parser.
It skips comment and add's linenumbers. Also it filters out all identifiers. You still have to make a treestructure to save the found identifiers in though.


0
 

Author Comment

by:jnowlin
ID: 1250143
Thanks again Luc!

By the way, how are you at hashing? I NEVER fully was able to do an assignment involving hashing, but my personal summertime project is to write a text editor( DOS, then Windows 3.1, then Linux, then Windows95 and finally Windows NT. Boy! Am I a dreamer, eh?! Hashing, conceptually to me, makes so very much
sense in terms of run time and so forth. I got MANY flames here
while grappling(?) with that one. I figure, someone who can write
a text editor, include a spell checker perhaps, and write one which will run on ALL major platforms/OSs, well , that person can program! It would be so much better than a resume alone.

I hope this is enough points for any level of annoyance I may have caused. If not, PLEASE let me know.

Respectfully,

Jim Nowlin

jnowlin@ma.ultranet.com

SOMEWHERE in Massachusetts, U.S.A.

0

Featured Post

Top 6 Sources for Identifying Threat Actor TTPs

Understanding your enemy is essential. These six sources will help you identify the most popular threat actor tactics, techniques, and procedures (TTPs).

Join & Write a Comment

Suggested Solutions

This tutorial is posted by Aaron Wojnowski, administrator at SDKExpert.net.  To view more iPhone tutorials, visit www.sdkexpert.net. This is a very simple tutorial on finding the user's current location easily. In this tutorial, you will learn ho…
Summary: This tutorial covers some basics of pointer, pointer arithmetic and function pointer. What is a pointer: A pointer is a variable which holds an address. This address might be address of another variable/address of devices/address of fu…
The goal of this video is to provide viewers with basic examples to understand and use pointers in the C programming language.
The goal of this video is to provide viewers with basic examples to understand and use conditional statements in the C programming language.

708 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question

Need Help in Real-Time?

Connect with top rated Experts

13 Experts available now in Live!

Get 1:1 Help Now