Triona
asked on
ispunct function-removing punctiation marks from a text file
Hi. I have a little search program for searching a text file for a particular word. I'd like to add a function to scan the file to find and remove all punctuation marks from the file before the search is performed. I'm guessing i use the ispunct function but can't get it working properly. My code is below.
Cheers,
Triona#include <stdio.h>
#include <string.h>
#include <ctype.h>
int main()
{
FILE *essays;
FILE *results;
char line[250];
char Search[80];
const char *delimiters = {"[]"};
char *token=NULL;
char* pStr=NULL;
int count = 0, line_count=0;
char *test = "";
int punct = 0;
/* Open file to be searched and file to be written to */
if ((essays = fopen("essays.txt", "r")) == NULL)
{
printf("Unable to open the input file");
return 0;
}
if ((results = fopen("results.txt","w")) ==NULL)
{
printf("Unable to open output file");
return 0;
}
/* Get string to be searched for */
printf("Enter string to be searched for:\n",Search);
scanf("%s",&Search);
while(!feof(essays))
{
/*Read in first line of text file */
if (fgets(line, 250, essays) != NULL)
{
line_count++;
/* Break up line into tokens */
if ((test = strtok(line, delimiters )) != NULL)
{
/*Search for string */
if ( strstr(test, Search) != NULL)
{
count++;
printf("Entry %d on line %d: %s\n",count,line_count,tes t);
fputs(test,results);
fputs("\n", results);
}
/* MOve on to next token in the line */
while ((test = strtok((char *)NULL, delimiters )) != NULL)
{
if ( strstr(test, Search) != NULL)
{
count++;
printf("Entry %d on line %d %s\n",count,line_count,tes t);
fputs(test,results);
fputs("\n",results);
}
}
} //End of If(test....
} //End of Entry IF
}
printf("The string %s occurs %d times\n", Search, count);
fclose(results);
fclose(essays);
return 0;
}
Cheers,
Triona#include <stdio.h>
#include <string.h>
#include <ctype.h>
int main()
{
FILE *essays;
FILE *results;
char line[250];
char Search[80];
const char *delimiters = {"[]"};
char *token=NULL;
char* pStr=NULL;
int count = 0, line_count=0;
char *test = "";
int punct = 0;
/* Open file to be searched and file to be written to */
if ((essays = fopen("essays.txt", "r")) == NULL)
{
printf("Unable to open the input file");
return 0;
}
if ((results = fopen("results.txt","w")) ==NULL)
{
printf("Unable to open output file");
return 0;
}
/* Get string to be searched for */
printf("Enter string to be searched for:\n",Search);
scanf("%s",&Search);
while(!feof(essays))
{
/*Read in first line of text file */
if (fgets(line, 250, essays) != NULL)
{
line_count++;
/* Break up line into tokens */
if ((test = strtok(line, delimiters )) != NULL)
{
/*Search for string */
if ( strstr(test, Search) != NULL)
{
count++;
printf("Entry %d on line %d: %s\n",count,line_count,tes
fputs(test,results);
fputs("\n", results);
}
/* MOve on to next token in the line */
while ((test = strtok((char *)NULL, delimiters )) != NULL)
{
if ( strstr(test, Search) != NULL)
{
count++;
printf("Entry %d on line %d %s\n",count,line_count,tes
fputs(test,results);
fputs("\n",results);
}
}
} //End of If(test....
} //End of Entry IF
}
printf("The string %s occurs %d times\n", Search, count);
fclose(results);
fclose(essays);
return 0;
}
Wouldn't it make more sense to test for punctuation in the search itself? After all, what about a situation like
no,spaces,here
If you yank the punctuation, you get
nospaceshere
so searching for "spaces" as a word will fail. If it succeeeds, it will also mess up and indicate a hit with "spaceship," which doesn't sound like what you want to do.
no,spaces,here
If you yank the punctuation, you get
nospaceshere
so searching for "spaces" as a word will fail. If it succeeeds, it will also mess up and indicate a hit with "spaceship," which doesn't sound like what you want to do.
He's only testing for [ and ] as his delimiters - I assumed they were word delimiters.
If not, he can either make the delimiters all the punctuation, or maybe he should be replacing punctuation with delimiters BEFORE strtok, as follows:
char *p;
if (fgets(line, 250, essays) != NULL)
{
p=line;
while(*p)
{
if(ispunct(*p)
{
*p='['; /*or any delimiter you test for*/
}
p++;
}
now strtok...
rest of code here...
If not, he can either make the delimiters all the punctuation, or maybe he should be replacing punctuation with delimiters BEFORE strtok, as follows:
char *p;
if (fgets(line, 250, essays) != NULL)
{
p=line;
while(*p)
{
if(ispunct(*p)
{
*p='['; /*or any delimiter you test for*/
}
p++;
}
now strtok...
rest of code here...
ASKER
Thanks, but i still want to keep the words seperate, just want to remove punctuation like full stops, commas etc. How do i go about that?
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Uh, not the memmove solution, but the simple replace solution, right? e.g.
char *p;
if (fgets(line, 250, essays) != NULL)
{
p=line;
while(*p)
{
if(ispunct(*p)
{
*p=' '; /*replace all punctuation with spaces...*/
}
p++;
}
you could have more than 1 space in a row - do you want just 1 space?
char *p;
if (fgets(line, 250, essays) != NULL)
{
p=line;
while(*p)
{
if(ispunct(*p)
{
*p=' '; /*replace all punctuation with spaces...*/
}
p++;
}
you could have more than 1 space in a row - do you want just 1 space?
Gee, kinda thought that would be for me :-( <grin>
Um, I think gj62 should be getting those points.
I was just trying to be helpful outside the question.
I was just trying to be helpful outside the question.
Oh, never mind, it is a B - you can have it Kurt <grin>
ASKER
Sorry gj62 - accepted KurtVons comments before i received yours ( and because you assumed i was a 'HE'!! )
And if you don't want your token edited, you can compare them yourself:
// Search for "s2" in "s1" ignoring embedded punctuation
// returns address within s1 where s2 was found, else NULL
char *strpcmp (char *s1, char *s2)
{
char *p1, *p2;
while (*s1)
{
if (*s1 == *s2)
{
p1 = s1+1;
p2 = s2+1;
while (*p2)
{
if (*p1 == *p2) // if match, including identical punctuation
{
p1++;
p2++;
}
else if (ispunct (*p1)) // punct in object string
p1++;
else break;
}
if (*p2 == 0) // end of p2 reached, match found
return s1;
}
s1++;
}
return 0;
}
// Search for "s2" in "s1" ignoring embedded punctuation
// returns address within s1 where s2 was found, else NULL
char *strpcmp (char *s1, char *s2)
{
char *p1, *p2;
while (*s1)
{
if (*s1 == *s2)
{
p1 = s1+1;
p2 = s2+1;
while (*p2)
{
if (*p1 == *p2) // if match, including identical punctuation
{
p1++;
p2++;
}
else if (ispunct (*p1)) // punct in object string
p1++;
else break;
}
if (*p2 == 0) // end of p2 reached, match found
return s1;
}
s1++;
}
return 0;
}
Man, you guys are quick today....
char test[]="this.is.a.test!!!"
char *p;
p=test;
while(*p)
{
if (ispunct(*p))
{
memmove(p,p+1,strlen(p));
}
else
p++;
}
which will leave test = "thisisatest"