Sorting a flat text file using MSVS C++ #1

What's the easiest way to get C++ to sort a text file that is delimited by a tilde ("~")? There are 15 columns of char's. Regular text files about 200+ meg. I want to sort by columns. Any ideas?
wills1300Asked:
Who is Participating?
 
AlexVirochovskyCommented:
 /*-----------------------------------------*/
  /*     LONGSORTM.CPP                       */
  /* test programm for DOS, but you can      */
  /* easy make WinTest                       */
  /* 1 change LongSort in Windows Api           */
  /*  error() -> MessageBox()                 */
  /* Test: file 3.5 Mega, len of rec: 900b   */
  /* Time of sort 1 min. Computer Pentium-333*/
  /*-----------------------------------------*/
#include <alloc.h>
#include <io.h>
#include <dir.h>
#include <dos.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <conio.h>

//  h file: longsirt.h
#define MAX_FILE                    10 //max open'd file
#define VALLOC(P,N,E) if ((P = (E *)calloc(N,sizeof(E)))==NULL) error(0)
#define MES_HLP01  "\n  Usage: longsort.exe AAAA.EXT (name of sortede file)\n"
#define MAX_MILON                  10000 //max lines in Temp file
#define MES_ERROR01    "No memory, Help me!"
#define MES_ERROR02    "File not exist!"
#define MES_ERRORPRESS "Press to any key"
typedef struct HNAMEStag {
   char *name;           //Line
   int nLen;            //Index of file, that contains this line
} HNAMES;
//end of h file

/****   FUNCTIONS   PROTOTYPE   IN THIS CLUSTER  ***/
       void cdecl main (int argc, char **argv);
static void PrintHelp(void);
static void LongSort(char *szFile);
static int  compare_w (const void *a,const void *b);
static int  compare_all (const void *a,const void *b);
static void error (int  nEInd);
static void SortMilonPlus(char *szResult, char **szFile,int nFile);
/*===============================================*/
/*  Name       : main                         */
/*  Descript   : sort FileName                    */
/*  Comment:   : len of line <= 1024!!!             */
/*  Parameters : name of programm, name of file  */
/*  Return     : Error code, if exist error      */
/*===============================================*/
void cdecl main (int argc, char **argv)
{
  clrscr();                        //clear screen
  char szFileName[255];

  if (argc > 1)               /* use only 1-st parameter*/
    {
      strcpy(szFileName, argv[1]);      /*name of file? */
      if (access(szFileName, 0) == -1)  //exist file?
      error(1);
      LongSort(szFileName);            //OK: sort
    }
  else if (argc == 1)
    PrintHelp();
  clrscr();                        //clear screen
}  //end main
/*=======================================*/
/*  Name       : PrintHelp             */
/*  Descript   : Display Help of utilite */
/*  Parameters : None                   */
/*  Return     : None                   */
/*=======================================*/
void PrintHelp(void)
{
  puts(MES_HLP01);
  getch();
} //end  PrintHelp
/*=============================*/
/*  Name       : LongSort      */
/*  Descript   : sort file.    */
/*  Parameters : Name of file  */
/*  Return     : None             */
/*=============================*/
void LongSort(char *szFileOut)
{
  char **szText;
  char szBuff[1024];                  //buffer I/O

  FILE *fMilon;                        //input file

  fMilon = fopen(szFileOut, "rt");      //open for read dictionary

  if (fMilon)                        //exist
    {
      int
      i,                  //indexes
      nMilon,                        //number records in Temp file
      nFile = 0;                  //number temp files
      FILE
       *fOut;                        //out file(sorted)
      char **szFile;                  //name of files
      long lOff = 0;                  //lace in file


      VALLOC(szFile, 4000, char *);      //<= 4000 Temp files(can be that must
                              //count this value)
                              //1-st Stage: make temp files.
      VALLOC(szText, MAX_MILON,char *);
      while (1)
      {
        fseek(fMilon, lOff,SEEK_SET);
        nMilon = 0;
                              //read line(it this place must be
                              //you read!!!
        while (fgets(szBuff, sizeof(szBuff)-1, fMilon) && nMilon <= MAX_MILON)
          {
            if (*szBuff != '\n')            //not empty line?
            {
              if ( (szText[nMilon] = strdup(szBuff)) == NULL)
                break;
              nMilon++;
            }
            lOff = ftell(fMilon);
          }
        if (nMilon >= 1)                        //sort
          {
            char szNameTemp[13];                  //name of temp file
            qsort ((void *)szText,nMilon,sizeof(szText[0]),compare_w);
                                    //result
            sprintf(szNameTemp,"TEMP.%03d", nFile);
            fOut = fopen(szNameTemp, "wt");            //open temp file
            for (i = 0; i < nMilon; i++)              //write to temp file
            {
              fputs(szText[i], fOut);
              free(szText[i]);
            }
            fclose(fOut);                        //close sorted
            szFile[nFile++] = strdup(szNameTemp);
          }
        if (feof(fMilon))                  //end of main file?
          break;
      }
      free(szText);                        //free memory
      fclose(fMilon);                        //close main file
                                    //2-nd Stage: Merge
                                    //similtenously <= 10 files
      while (nFile >=  MAX_FILE)            //Delete Last 10!
      {
         char szResult[13];                  //name of new temp file

         tmpnam(szResult);
                                    //merge
         SortMilonPlus(szResult, szFile + (nFile - MAX_FILE), MAX_FILE);
         for (i = nFile - MAX_FILE; i < nFile; i++)  //free memrory
           free (szFile[i]);

         szFile[nFile - MAX_FILE] = strdup(szResult);//save new temp name
         nFile =  nFile - MAX_FILE + 1;             //new number of files
       }
      if (nFile == 1)                        //OK only 1 file!
      {
        unlink(szFileOut);                  //delete old main file
        rename(szFile[0], szFileOut);            //rename new to old
        free (szFile[0]);                  //free memory
      }
      else
      {
         SortMilonPlus(szFileOut, szFile, nFile);//merge last files
         for (i = 0; i < nFile; i++)
           free (szFile[i]);                  //free memory
      }
      free (szFile);
    }
} // end SortMilonL
/*=======================================================*/
/*  Name       : SortMilonPlus                         */
/*  Descript   : merge <= 10 files to 1                   */
/*  Parameters : name of result file,names of temp file, */
/*             : number of temp files                     */
/*  Return     : None                               */
/*=======================================================*/
void SortMilonPlus(char *szResult, char **szFile, int nFile)
{
   FILE       *fList[MAX_FILE];            //list of files
   FILE  *fOut;                        //result
   int i;                        //index
   char szBuff[1024];                  //I/O buffer
   HNAMES szAll[MAX_FILE];            //struct for hold sorted lines

   for (i = 0; i < MAX_FILE; i++)            //clear struct
     szAll[i].nLen = -1;
   for (i = 0; i < nFile; i++)            //open
     {
       fList[i] = fopen(szFile[i], "rt");      //open temp file
       fgets(szBuff, sizeof(szBuff)-1, fList[i]);//read 1-st line from every file
       if ( (szAll[i].name = strdup(szBuff)) == NULL)
        error(0);
       szAll[i].nLen = i;
     }
   fOut = fopen(szResult, "wt");            //open result
   while (1)                              //untill end of all files
     {                                    //sort readed lines
       qsort ((void *)szAll,nFile,sizeof(szAll[0]),compare_all);
                              //save 1-st
       for (i = 0; i < nFile; i++)            //open
       {
         int ind = szAll[i].nLen;            //find 1-st index >= 0
         if (ind >= 0)                    //ok, exist?
           {
             fputs(szAll[i].name, fOut);      //save to result
             free(szAll[i].name);            //free memory
                                    //read new Line
             if (fgets(szBuff, sizeof(szBuff)-1, fList[ind]) == NULL)
             {
               szAll[i].nLen = -1;            //end of file!
               szAll[i].name = strdup(" ");
             }
             else if ( (szAll[i].name = strdup(szBuff)) == NULL)//save new line
                error(0);
             break;
           }
       }
       if (i >= nFile)                  //end ?
       break;
     }
   for (i = 0; i < nFile; i++)            //close all file and delete Temp
     {
       fclose(fList[i]);            //close temp file
       unlink(szFile[i]);            //delete temp file
     }
   fclose(fOut);                  //close main file
}

/*====================================================*/
/*  Name       : compare_w                           */
/*  Descript   : Compare 2 strings(must be YOU comp!!!*/
/*  Parameters : 1-st string, 2-nd string            */
/*  Return     : 0<, 0, >0: see qsort                    */
/*====================================================*/
int compare_w (const void *a,const void *b)
{
  char  **a1 = (char **)a,**b1 = (char **)b;

  return stricmp( *a1,*b1);            //compare
} // end compare_w
/*=========================================================================*/
/*  Name       : compare_all                                        */
/*  Descript   : Compare names of 2 variables                                */
/*  Parameters : Adress of 1-st Names , Adress of 2-nd Names               */
/*  Return     : 0<, 0, >0: see qsort                                         */
/*=========================================================================*/
int compare_all (const void *a,const void *b)
{
// \\   char *hagai =
// \\     "ALL RIGHTS RESERVED (C) SHARLIN HAGAI, MOSHE SHAPIRO 43, NATANIA, ISRAEL 42240. tel: 972-53-822831";
  HNAMES  *a1 = (HNAMES *)a, *b1 = (HNAMES *)b;
  return  compare_w( &(a1->name),&(b1->name) );            //compare
} // end compare_w
/*=========================================================================*/
/* Description: error - Exits program in case of fatal error.              */
/* Parameter  : nEInd   - index of code of error message to display.         */
/* Return val : None.                                             */
/*=========================================================================*/
void error (int  nEInd)
{
   char buff[80];                      //work buffer wiith text error
   char *lpszError[] =                  //errors
     {
      MES_ERROR01,                  //0
      MES_ERROR02,                  //1
      NULL
     };

   fcloseall ();
   clrscr ();                              //clear screen
   strcpy (buff, lpszError[nEInd]);
   strcat (buff, "\n");
   strcat (buff, MES_ERRORPRESS);            //press any key
   printf(buff);                                //display error
   getch();                              //wait
   exit (4);                              //exist from utile
} // end error
Best regards, Alex
0
 
AlexVirochovskyCommented:
I know only 1 way:
1.
  read part (~ 60k): qsort part, save in temp file
  read next, qsort, save in temp file
  ...
  untill end of file.
2. merge temp files:
     input line from 1-st temp file
     ...
     input line from last  temp file
     {
       qsort
       save in result file
       read line from this file, that line was saved
     }
     there is little problem with number of file,simultanuos
     open (parameter Files in config.sys),
     but this problem can solve with iterations:
     merge 20 file, after that more 20, ..., merge results
0
 
viktornetCommented:
Here is a bubble sort routine I did some time ago in Turbo C++... YOu can convert it easily to any compiler... It also includes a test program..... It simply sorts the names and displays them in a sorted manner... The file is a bit messy, but I hope everything would be ok...

Hope this helps...
==================
#include <iostream.h>
#include <conio.h>
#include <stdio.h>
#include <string.h>

#define num 3
#define len 80

void SwapNames(char *, char *);
void ShowNames(char [][len], const int);
void SortNames(char [][len], const int);

void main()
{
      char names[num][len];
      clrscr();
      for (int i = 0; i < num; i++) {
            cout << "Please enter a name ==> ";
            gets(names[i]);
      }
      clrscr();
      cout << "Unsorted...\n\n";
      ShowNames(names, num);
      getch();
      clrscr();
      SortNames(names, num);
      cout << "Sorted...\n\n";
      ShowNames(names, num);
      getch();
}

void SortNames(char names[][len], const int size)
{
      int done;
      do {
            done = 1;
            for (int i = 0; i < size-1; i++) {
                  if (strcmp(names[i], names[i+1]) > 0) {
                        SwapNames(names[i], names[i+1]);
                        done = 0;
                  }
            }
      } while (!done);
}

void ShowNames(char names[][len], const int size)
{
      for (int i = 0; i < size; i++)
            cout << names[i] << endl;
}

void SwapNames(char *a, char *b)
{
      char tmp[len];
      strcpy(tmp, a);
      strcpy(a, b);
      strcpy(b, tmp);
}
============
-Viktor
--Ivanov
0
Free Tool: SSL Checker

Scans your site and returns information about your SSL implementation and certificate. Helpful for debugging and validating your SSL configuration.

One of a set of tools we are providing to everyone as a way of saying thank you for being a part of the community.

 
viktornetCommented:
Oh, you do the reading from the file and the rest yourself... I just wanted to show you how to do the sorting of the strings... you use strcmp()

-Viktor
0
 
alexoCommented:
NEVER use bubble sort.  A decent external sort algorithm is MergeSort.  Combined with in-memory sorting (e.g., quicksort) as Alex suggested will get you what you need.
0
 
wills1300Author Commented:
Bubble sort does not seem feasible because I need to have the data sorted by rows, and from the sample code I could not see an easy way to maintain the rows, unless I took the entire row as a string. But I need to sort by columns also. Basically, the data file I am working with is like a spreadsheet. The data can be thought of like a huge address book, where I have to sort first by last name, then first name, than by address, and so on. I was hoping for anything that could do this. Some easy routine? What is qsort? MergeSort? Alexo and AlexVirochovsky could you expand on your answer, please? (P.S. I am fairly new to C++ developement.)
0
 
alexoCommented:
OK, this is not C++ but basic algorithm stuff.
In general you have two kinds of sort algorithms: internal sort (the data is small enough to fit in memory) and external sort (the data is too large to fit in memory).
The best-known internal sort is Quicksort.  It is considered the fastest (on the average) general-purpose sort.  In fact the C/C++ standard library includes a Quicksort function (qsort).
Mergesort is another algorithm that is suitable for both internal and external sorting.  Its basic principle is taking 2 sorted sets (or files) and merging them into 1 sorted set (or file).
0
 
wills1300Author Commented:
OK, QSORT sounds good, but how do I use it? Is it a function call?
0
 
AlexVirochovskyCommented:
will1300! I send you example of qsort

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int sort_function( const void *a, const void *b);

char list[5][4] = { "cat", "car", "cab", "cap", "can" };

int main(void)
{
   int  x;

   qsort((void *)list, 5, sizeof(list[0]), sort_function);
   for (x = 0; x < 5; x++)
      printf("%s\n", list[x]);
   return 0;
}

int sort_function( const void *a, const void *b)
{
   return( strcmp((char *)a,(char *)b) );
}

If you want full text of Sorting Large Files programm,
you must:
1. Add points to 2500-300.
2. Accept answer to A category.

BTW: Full code read lines of text(this place you must change)
and Sort Function for Hebrew(this place you must change, too!).
Regards, Alex
0
 
wills1300Author Commented:
Yeah, sure Alex. Just put the full answer in 'answer form.'
0
 
alexoCommented:
>> (C) SHARLIN HAGAI, MOSHE SHAPIRO 43, NATANIA
I suggest you take this line seriously.  People from Netania have an er... "unconvential" way of enforcing copyright issues.
0
 
AlexVirochovskyCommented:
Ouups, sorry, i delete all lines about copyright(where many!),
and forgette this. You can use it free!

0
 
wills1300Author Commented:
OK Alex, I haven't tried the program yet, but I'll take your word for it. Thanks. Will.
0
Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.

All Courses

From novice to tech pro — start learning today.