Link to home
Start Free TrialLog in
Avatar of Triona
Triona

asked on

ispunct function-removing punctiation marks from a text file

Hi. I have a little search program for searching a text file for a particular word. I'd like to add a function to scan the file to find and remove all punctuation marks from the file before the search is performed. I'm guessing i use the ispunct function but can't get it working properly. My code is below.
Cheers,
Triona#include <stdio.h>
#include <string.h>
#include <ctype.h>

int main()
{
  FILE *essays;
  FILE *results;
  char line[250];
  char Search[80];
  const char *delimiters = {"[]"};
  char *token=NULL;
  char* pStr=NULL;
  int count = 0, line_count=0;
  char *test = "";
  int punct = 0;
 
 
 
  /* Open file to be searched and file to be written to */

  if ((essays = fopen("essays.txt", "r")) == NULL)
  {
      printf("Unable to open the input file");
      return 0;
  }

  if ((results = fopen("results.txt","w")) ==NULL)
  {
       printf("Unable to open output file");
       return 0;
  }
 
  /* Get string to be searched for */

  printf("Enter string to be searched for:\n",Search);
  scanf("%s",&Search);
     

               
   while(!feof(essays))
   {
          /*Read in first line of text file */
       
           if (fgets(line, 250, essays) != NULL)
                    {
     
         
                         line_count++;
                    /* Break up line into tokens */

                if ((test = strtok(line, delimiters )) != NULL)
                    {
                   
                    /*Search for string */
                         
                   if ( strstr(test, Search) != NULL)
                           
                       {    
                            count++;
                       printf("Entry %d on line %d: %s\n",count,line_count,test);
                            fputs(test,results);
                            fputs("\n", results);
                       }  
                            /* MOve on to next token in the line */

                   while ((test = strtok((char *)NULL, delimiters )) != NULL)
                       {
                              if ( strstr(test, Search) != NULL)
                              {
                                   count++;
                                    printf("Entry %d on line %d %s\n",count,line_count,test);
                                    fputs(test,results);
                                    fputs("\n",results);
                                   
                              }
                     
                   }

                } //End of If(test....
                             
               
          }  //End of Entry IF
       
   
           
     
   
   

   }
   printf("The string %s occurs %d times\n", Search, count);
   fclose(results);
   fclose(essays);
   
    return 0;
}


Avatar of gj62
gj62

after you get your token, to remove punctuation you can do the following:

 char test[]="this.is.a.test!!!";
 char *p;

p=test;
while(*p)
{
  if (ispunct(*p))
  {
    memmove(p,p+1,strlen(p));
  }
  else
    p++;

}

which will leave test = "thisisatest"
Wouldn't it make more sense to test for punctuation in the search itself?  After all, what about a situation like

no,spaces,here

If you yank the punctuation, you get

nospaceshere

so searching for "spaces" as a word will fail.  If it succeeeds, it will also mess up and indicate a hit with "spaceship," which doesn't sound like what you want to do.
He's only testing for [ and ] as his delimiters - I assumed they were word delimiters.

If not, he can either make the delimiters all the punctuation, or maybe he should be replacing punctuation with delimiters BEFORE strtok, as follows:

char *p;
if (fgets(line, 250, essays) != NULL)
{
  p=line;
  while(*p)
  {
    if(ispunct(*p)
    {
      *p='[';  /*or any delimiter you test for*/
    }
    p++;
  }
   
now strtok...
rest of code here...
Avatar of Triona

ASKER

Thanks, but i still want to keep the words seperate, just want to remove punctuation like full stops, commas etc. How do i go about that?
ASKER CERTIFIED SOLUTION
Avatar of KurtVon
KurtVon

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
Uh, not the memmove solution, but the simple replace solution, right?   e.g.

char *p;
if (fgets(line, 250, essays) != NULL)
{
 p=line;
 while(*p)
 {
   if(ispunct(*p)
   {
     *p=' ';  /*replace all punctuation with spaces...*/
   }
   p++;
 }

you could have more than 1 space in a row - do you want just 1 space?
Gee, kinda thought that would be for me :-( <grin>
Um, I think gj62 should be getting those points.

I was just trying to be helpful outside the question.
Oh, never mind, it is a B - you can have it Kurt <grin>
Avatar of Triona

ASKER

Sorry gj62 - accepted KurtVons comments before i received yours ( and because you assumed i was a 'HE'!! )
Avatar of Kent Olsen
And if you don't want your token edited, you can compare them yourself:

// Search for "s2" in "s1" ignoring embedded punctuation
// returns address within s1 where s2 was found, else NULL

char *strpcmp (char *s1, char *s2)
{
  char *p1, *p2;

  while (*s1)
  {
    if (*s1 == *s2)
    {
      p1 = s1+1;
      p2 = s2+1;
      while (*p2)
      {
        if (*p1 == *p2)  // if match, including identical punctuation
        {
          p1++;
          p2++;
        }
        else if (ispunct (*p1)) // punct in object string
          p1++;
        else break;
      }
      if (*p2 == 0) // end of p2 reached, match found
        return s1;
    }
    s1++;
  }
  return 0;
}
Man, you guys are quick today....