Solved

improvement of  Imladirs algorithm- C

Posted on 2001-06-08
9
242 Views
Last Modified: 2008-03-10
the below codes is the one you have improved for me which is return first letter and 3 digit numbers such as Smith is converted to S520: HOWEVER THIS NEW REEQUIREMENT  NEED TO IMPROVE IS TO HAVE  all the letters of names  and the vowels in the name should be covert to "a" for example "aeiou" must converrt to "a" apart from the last letter of the name (cut the last letter of the name if it is "vowel")
so smith should be "smath" p.s. don't cut off the first letter which begin with "vowel)

more examples : assing = assang

moroni,moreno= maran  (not marana)

pereira, perrier = parar (not parana)
wilson=walsan
wang= wang
wong =wang

and so on....

-----------------

                  #include <stdio.h>
                   #include <string.h>
                   #include <stdlib.h>
                   #include <ctype.h>


                   int FullFlag;
                   char *Soundex(char *);
                   void getinput(char *prompt,char n[]);

                   void main(int argc,char *argv[])
                   {   int matched,msave,usave,mc;
                    char name[150],line[500];
                    float LIG;
                    FILE *namefile,*mfile,*ufile;

                    printf("Save matched names in matched.dat (y or n)?\n");
                    gets(line);
                    msave=(toupper(line[0])=='Y');
                    printf("Save unmatched names in unmatched.dat (y or n)?\n");
                    gets(line);
                    usave=(toupper(line[0])=='Y');
                    namefile=fopen("surnames.dat","r+"); /* open file in read/write mode */
                    if(msave)mfile=fopen("matched.dat","w");
                    if(usave)ufile=fopen("unmatched.dat","w");
                    FullFlag=1;
                    do
                    {   printf("Please enter name to match:\n");
                        gets(name);
                        printf("Soundex code: %s\n",Soundex(name));
                        fseek(namefile,0L,0); /* seek to start of file */
                        matched=0;
                        mc=0;
                        while(fgets(line,500,namefile)!=NULL)
                        {   line[strlen(line)-1]='\0';
                            if(strcmp(Soundex(name),Soundex(line))==0)
                            {    printf("%s\n",line);
                                 ++mc;
                                 if(msave)
                                 {   if(matched==0)fprintf(mfile,"Match name %s\n",name);
                                     fprintf(mfile,"%s\n",line);
                                 }
                                 matched=1;
                            }
                        }
                        if(matched==1)
                         {   if(msave)fprintf(mfile,"----------------------------------------\n");
                             printf("Number of Matches: %d\n",mc);
                         }
                        else
                        {   printf("No Matches Found.\n");
                            if(usave)
                            {   fprintf(ufile,"Match name %s\n",name);
                                fprintf(ufile,"No Matches found\n");
                                fprintf(ufile,"--------------------\n");
                            }
                        }
                        printf("Compare again (y or n)?\n");
                        gets(line);
                    } while(toupper(line[0])=='Y');
                    fclose(namefile);
                    if(msave)fclose(mfile);
                    if(usave)fclose(ufile);
                   }


                   /* prompt user for input
                   // get input
                   // copy it safely into provided variable
                   */

                   void getinput(char *prompt,char n[])
                   {   char ipc[150];

                    printf("%s:\n",prompt);
                    gets(ipc);
                    strncpy(n,ipc,19);
                    n[19]='\0';
                    return;
                   }

                             #define MAXNAMELEN 50
                             #define MAXLINELEN 500

                             /* Returns the soundex equivalent to In */
                             char *Soundex(char *In)
                             {
                             int Iin, Iout;
                             char C, PrevDig, *Out = (char *)malloc(10);

                             if (FullFlag)
                             {Iin = 0; Iout = 0; PrevDig = '*';}
                             else
                             {Iin = 1; Iout = 1;
                             Out[0] = tolower(In[0]); PrevDig = Out[0];}

                             while ((In[Iin] != (char)NULL) && (Iout <= 4))
                             {
                             In[Iin] = tolower(In[Iin]);
                             switch (In[Iin])
                             {
                             case 'b' : C = '1'; break;
                             case 'p' : C = '1'; break;
                             case 'f' : C = '1'; break;
                             case 'v' : C = '1'; break;
                             case 'c' : C = '2'; break;
                             case 's' : C = '2'; break;
                             case 'k' : C = '2'; break;
                             case 'g' : C = '2'; break;
                             case 'j' : C = '2'; break;
                             case 'q' : C = '2'; break;
                             case 'x' : C = '2'; break;
                             case 'z' : C = '2'; break;
                             case 'd' : C = '3'; break;
                             case 't' : C = '3'; break;
                             case 'l' : C = '4'; break;
                             case 'm' : C = '5'; break;
                             case 'n' : C = '5'; break;
                             case 'r' : C = '6'; break;
                             default : C = '*';
                             }
                             if ((C != PrevDig) && (C != '*'))
                             {
                             Out[Iout] = C;
                             PrevDig = Out[Iout];
                             Iout++;
                             }
                             Iin++;
                             }

                             if (Iout < 4)
                             for (Iin=Iout; Iin<4; Iin++)
                             Out[Iin] = '0';

                             Out[4] = (char)NULL;
                             return Out;
                             }
0
Comment
Question by:korsila
  • 5
  • 4
9 Comments
 
LVL 16

Expert Comment

by:imladris
ID: 6168845
So, let me just verify, that in this new algorithm you don't in fact want to use soundex at all. You want to do the "vowel replacement" (with 'a' under the listed constraints) algorithm instead.

Is that right?
0
 

Author Comment

by:korsila
ID: 6176671
Imladris
sorry I confused a bit with my requirement- you are absolutely right !!!just changed only vowels into "a" but if thgere is a duplicated of "a" just get only one a..(example is below -pereira -->parar (not paraar)

also if there are 2 vowels next to each other like " pareira" the code should contain only 1 "a" so that it should be "parar"

after compare name entering from user with datafile (names.dat) the program
-search for name matching from datafile , and return matched name and also number of matched names as well...(no different between small and capital letters)


hope it's not that hard for you...

let me know if i still make it unclear...

many thanks,
korsila

p.s. 10 point more adding ...

0
 
LVL 16

Accepted Solution

by:
imladris earned 120 total points
ID: 6183671
OK. I think this conforms to your spec. New algorithm creation (as opposed to adapting existing ones) should probably be more in the 100 point range.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>


int FullFlag;
char *Simplex(char *,char *);
void getinput(char *prompt,char n[]);

void main(int argc,char *argv[])
{   int matched,msave,usave,mc;
  char name[150],line[500],buf1[150],buf2[150];
  FILE *namefile,*mfile,*ufile;

  printf("Save matched names in matched.dat (y or n)?\n");
  gets(line);
  msave=(toupper(line[0])=='Y');
  printf("Save unmatched names in unmatched.dat (y or n)?\n");
  gets(line);
  usave=(toupper(line[0])=='Y');
  namefile=fopen("surnames.dat","r+"); //open file in read/write mode
  if(msave)mfile=fopen("matched.dat","w");
  if(usave)ufile=fopen("unmatched.dat","w");
  FullFlag=0;
  do
  {   printf("Please enter name to match:\n");
      gets(name);
       printf("Simplex: %s\n",Simplex(name,buf1));
      fseek(namefile,0L,0); //seek to start of file
      matched=0;
      mc=0;
      while(fgets(line,500,namefile)!=NULL)
      {   line[strlen(line)-1]='\0';
          if(strcmp(Simplex(name,buf1),Simplex(line,buf2))==0)
          {    printf("%s\n",line);
               ++mc;
               if(msave)
               {   if(matched==0)fprintf(mfile,"Match name %s\n",name);
                   fprintf(mfile,"%s\n",line);
               }
               matched=1;
          }
      }
      if(matched==1)
       {   if(msave)fprintf(mfile,"----------------------------------------\n");
           printf("Number of Matches: %d\n",mc);
       }
      else
      {   printf("No Matches Found.\n");
          if(usave)
          {   fprintf(ufile,"Match name %s\n",name);
              fprintf(ufile,"No Matches found\n");
              fprintf(ufile,"--------------------\n");
          }
      }
      printf("Compare again (y or n)?\n");
      gets(line);
  } while(toupper(line[0])=='Y');
  fclose(namefile);
  if(msave)fclose(mfile);
  if(usave)fclose(ufile);
}


// prompt user for input
// get input
// copy it safely into provided variable

void getinput(char *prompt,char n[])
{   char ipc[150];

  printf("%s:\n",prompt);
  gets(ipc);
  strncpy(n,ipc,19);
  n[19]='\0';
  return;
}


char vowel[]="aeiouAEIOU";

char *Simplex(char *name,char *buf)
{   int i,j,len,procvwl;

    procvwl=0;
     j=-1;
    len=strlen(name);
    for(i=0; i<len; ++i)
     {   if(strchr(vowel,name[i])==NULL)
          {   procvwl=0;
            buf[++j]=toupper(name[i]);
          }
        else
        {   if(!procvwl)
               {   procvwl=1;
                buf[++j]='A';
               }
          }
    }
     if(buf[j]!='A')++j;
     buf[j]='\0';
    return(buf);
}
0
 

Author Comment

by:korsila
ID: 6186722
apparently, it doesn't work...

for example I compared the name "smith" (smath after coding) with my datafile (surnames.dat)
smith
smyth
smythe
smieth
smeth
smeeth
smiith

all the name from the datafile supposed to match with name smith (smath-after to be coded from your algorithm)

however, I think you forgot to code name from datafile to be "smath" before comparing for a macth...that 's why the program return "no match found" which was absolutely wrong...

could you fix it a bit...it is exactkly the same when you code soundex (my above code) to name from  datafile and from user and then finding for matches...

many thanks,
korsila

p.s. yes i think so you deserve more point than that...so more points has been added also...





 
0
6 Surprising Benefits of Threat Intelligence

All sorts of threat intelligence is available on the web. Intelligence you can learn from, and use to anticipate and prepare for future attacks.

 
LVL 16

Expert Comment

by:imladris
ID: 6188045
Perplexing.

I have been testing any code I post for you, and it was working OK against my sample.
Against your surnames.dat, comparing smith, I get 5 matches: all but smyth and smythe. This is because, at present, 'Y' is not in the vowel list. That would be a simple addition.

Could you verify your result? If you continue to get the wrong result, we will have to try some debugging.
0
 

Author Comment

by:korsila
ID: 6190965
ah ha I see...
I did test with a huge datafile which contained too many diferent surnames..but when i cut down only 10 starting with basename "smith" it works pretty well...but am just wondering why it didn';t work with a huge data containing about 1000 names...never mind, you have been working hard on it..and no doubt to accept this as one of excellent answer..


many thanks...Imladris..

Korsila

0
 
LVL 16

Expert Comment

by:imladris
ID: 6191573
Thanx.

I would expect that the number of names perse would not be an issue. It is more likely that the actual format of the file was somehow unexpected.
0
 

Author Comment

by:korsila
ID: 6194680
maybe i need to improve this algorithm to suit the the datafile...
let me think if i can come up with something...

I would try to tidy up my datafile a bit and will test how it works....

thanks for a litle advise but it's worth following..!!

Korsila
 
0
 

Author Comment

by:korsila
ID: 6202233
Dear Imladris,

I were not be able to get this algorithm working with my datafile...

I haven't got any clue what the problem was...
so just sent you my datafile wich contains 100 names as I use to mention...
it contains basenames of different 5 types of names: brown, jones, smith, williams, moran

you could try "smith" or "williams" ..

hope you could find something mistake for me and get it working again...!!


many thanks,
Korsila

datafile
---------------

Braun
Braune
Brawn
Brawne
Broun
Broune
Browen
Browm
Brown
Browne
Brownee
Brownen
Browney
Brownie
Brownin
Brownne
Brownwon
Bruan
Bruane
Bruen
Jhones
Jhonnes
Joanas
Joanes
Joans
Joanse
Joenes
Joens
Johnes
Johns
Johnse
Johnss
Jomes
Jonas
Jonce
Jonees
Jones
Jonnes
Jonns
Jons
Moeran
Mooran
Mooren
Moorham
Moorhen
Morahan
Moran
Morean
Moreen
Moreham
Morehan
Moren
Morham
Morhan
Morhen
Morine
Morman
Morrain
Morran
Morren
Smeath
Smeeth
Smeethe
Smeith
Smeth
Smethe
Smett
Smieth
Smit
Smite
Smith
Smithe
Smithee
Smithie
Smithy
Smitt
Smitte
Smitth
Smity
Smiyth
Willeams
Willeans
Willems
Willens
Williamas
Williames
Williams
Williamse
Williamson
WilliamsonE
WilliamsonS
Williamss
Willians
Willianson
Willimason
Willimes
Willims
Wyllyames
Wyllyams
Wyllyms
0

Featured Post

Highfive + Dolby Voice = No More Audio Complaints!

Poor audio quality is one of the top reasons people don’t use video conferencing. Get the crispest, clearest audio powered by Dolby Voice in every meeting. Highfive and Dolby Voice deliver the best video conferencing and audio experience for every meeting and every room.

Join & Write a Comment

An Outlet in Cocoa is a persistent reference to a GUI control; it connects a property (a variable) to a control.  For example, it is common to create an Outlet for the text field GUI control and change the text that appears in this field via that Ou…
This tutorial is posted by Aaron Wojnowski, administrator at SDKExpert.net.  To view more iPhone tutorials, visit www.sdkexpert.net. This is a very simple tutorial on finding the user's current location easily. In this tutorial, you will learn ho…
The goal of this video is to provide viewers with basic examples to understand recursion in the C programming language.
The goal of this video is to provide viewers with basic examples to understand opening and reading files in the C programming language.

746 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question

Need Help in Real-Time?

Connect with top rated Experts

16 Experts available now in Live!

Get 1:1 Help Now