• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 858
  • Last Modified:

c++ : search and count how many times a word appears in a file

I need a code snipped to do the following:

Search files: When specifying string to be searched,the program should returns a list of all the files which had
matches within them, as well as the number of matches e.g.
Files containing search string: HELLO
sally.txt: 333
bob.txt: 32
mike.txt: 2

I don't know any method/function that can go word for word in a file, in c++. Something like a java string tokenizer maybe!
0
moevic
Asked:
moevic
2 Solutions
 
JackNCalvinCommented:
moevic:

You need to open up a file and then scan each word.  This is done fairly easily in c++.  The input stream will allow you to read each individual word separatley.  You can read this in as a String and then compare it to the desired word.  Just add a counter to increment everytime this word is found, and reset it to 0 when the file closes.  

Here is a function that might work for you.

------------------------------------------------

void readSearchedFile(char *filename, String stringCompare){

   String s;
   int counter = 0;

   ifstream fin(filename);

   if (!filename){
        cout << "Could Not Open: " << filename << endl;
        exit(0);
   }

   while (fin) {

     fin >> s;

     if (s == stringCompare) {
         counter = 0;
      }

   }

   cout << filename << ": " << counter << endl;

   fin.close();
         

} // end readSearched File

------------------------------------------------

Hope this helps!!!


-jack-
0
 
Mayank SAssociate Director - Product EngineeringCommented:
You mean, you need to implement something similar to the grep command. But you didn't specify whether thw flie-names will be specified by the user or whether it has to search all the files in the directory?

Mayank.
0
 
baby001Commented:
The following codes can meet your need, but it can only run on windows. you can copy it to your project, it was builded before and can work well.

Hoping this is helpful to you.

#include <stdio.h>
#include <string.h>
#include "windows.h"
#include <sys/stat.h>

class FileList
{
public:
      FileList(int nFile) ;
      ~FileList() ;
      void Add(char * strName) ;
      int GetNum() ;
      void OutPutList() ;
public:
      char ** listFileName ;

private:
      int m_index ;
      int m_nFile ;
} ;

FileList::FileList(int nFile)
{      
      if (nFile >0)
      {
            m_index = 0 ;
            m_nFile = nFile ;
            listFileName= new char*[nFile] ;
            for (int i=0;i<nFile;++i)      
                  listFileName[i] = new char[100] ;      
      }

}

FileList::~FileList()
{      
      if (m_nFile > 0)
      {
            int i ;
            for (i=0;i<m_nFile;i++)
                  delete [] listFileName[i] ;
            delete [] listFileName ;
      }
}

void FileList::Add(char *strName)
{
      int i ;
      if (m_index < m_nFile)
            strcpy(listFileName[m_index], strName) ;      
      else
            return ;
      ++m_index ;
}

int FileList::GetNum()
{
      return m_index ;
}

void FileList::OutPutList()
{
      int i ;      
      for (i=0;i<m_index;i++)
            printf("\n%s", listFileName[i]) ;
}

void SearchSubString(char * strDirectory, char * strSub, FileList &listFile)
{
      HANDLE hFind;
      WIN32_FIND_DATA df;
      long fileSize = 0;
      int i ;
      char strClear[100] ;
      for (i=0;i<100;i++)
            strClear[i] = '\0' ;      
      
      char dirFirst[50] ;
      strcpy(dirFirst, strDirectory) ;
      strcat(dirFirst, "\\*.*") ;
      strcat(strDirectory, "\\") ;

      hFind=FindFirstFile(dirFirst, &df);
      if(hFind==INVALID_HANDLE_VALUE)
      {
            printf("%s is no a directory!\n", strDirectory) ;
            return ;
      }

      do
      {
            //is directory
            if( df.dwFileAttributes==FILE_ATTRIBUTE_DIRECTORY ||
                  df.dwFileAttributes==(FILE_ATTRIBUTE_DIRECTORY|FILE_ATTRIBUTE_READONLY))
            {            
                  if ((strcmp(df.cFileName, ".")!=0)&&
                        (strcmp(df.cFileName, "..")!=0))
                  {
                        char strDesSubPath[100] ;
                        memcpy(strDesSubPath, strClear, 100) ;
                        strcpy(strDesSubPath, strDirectory) ;
                        strcat(strDesSubPath, df.cFileName) ;                  
                        strcat(strDesSubPath, "\\");
                        SearchSubString(strDesSubPath, strSub, listFile) ;      
                  }
            }
            //is file
            else
            {
                  FILE *stream ;
                  long lenFile = 0 ;
                  char strTemp[500] ;
                  strcpy(strTemp, strDirectory) ;
                  strcat(strTemp, df.cFileName) ;
                  stream = fopen(strTemp, "r") ;
                  struct _stat statbuf ;
                  _stat(strTemp, &statbuf) ;
                  lenFile = statbuf.st_size ;
                  char * buffer = new char[lenFile] ;
                  fread(buffer, sizeof(char), lenFile, stream) ;
                  char * pDest ;
                  pDest = strstr(buffer, strSub) ;
                  if (pDest != NULL)
                        listFile.Add(strTemp) ;                  
                  fclose(stream) ;            
            }            
      }while(FindNextFile(hFind,&df));
      FindClose(hFind);
}

void main(int argc, char *argv[])
{
      FileList listFile(1000) ;
      
      if (argc <= 2)
      {
            printf("syntex error, you should call findsubstring like:\n findsubstring <directoryname>, <substring>") ;
            return ;
      }
      
      char * pDest ;
      if ((pDest = strstr(argv[1], "\\"))==NULL)
      {
            printf("The first argument must be a whole directory name!") ;
            return ;
      }            
      

      char strName[200] ;
      strcpy(strName, argv[1]) ;
      SearchSubString(strName, argv[2], listFile) ;      
      
      
      if (listFile.GetNum() <= 0)
      {
            printf("No files contain the substring\n") ;
            return ;
      }
      else                  
            listFile.OutPutList() ;
      
}
0
 
CleanupPingCommented:
moevic:
This old question needs to be finalized -- accept an answer, split points, or get a refund.  For information on your options, please click here-> http:/help/closing.jsp#1 
EXPERTS:
Post your closing recommendations!  No comment means you don't care.
0

Featured Post

Become an Android App Developer

Ready to kick start your career in 2018? Learn how to build an Android app in January’s Course of the Month and open the door to new opportunities.

Tackle projects and never again get stuck behind a technical roadblock.
Join Now