crazy4s
asked on
sorting an array of structs
Hi all,
Given a csv file and I need to read the data and create a sorted array of structs, each of which contains the country code, name, population and life expectancy. So i've to build an index array in memory and sort it using insertion sort and then write (country code with its offset) to a directory file where I'm able to search using the country code.
here is what I have right now, but the sorted array is not working???
Output:
-------------------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- --------
CODE COUNTRY NAME POPULATION LIFE EXPECTANCY OFFSET
-------------------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- --------
DZA Algeria 31471000 69.699997 0
AGO Angola 12878000 38.299999 64
BEN Benin 6097000 50.200001 128
BWA Botswana 1622000 39.299999 192
IOT British Indian Ocean Territory 0 0.000000 256
BFA Burkina Faso 11937000 46.700001 320
BDI Burundi 6695000 46.200001 384
Before Sort:
DZA 0
AGO 64
BEN 128
BWA 192
IOT 256
BFA 320
BDI 384
After Sort:
DZA 0
AGO 64
AGO 128
AGO 192
AGO 256
AGO 320
AGO 384
------THE END------
what i want is something like that:
After Sort:
AGO 64
BD1 384
BFA 320
...and so on
any help will be greatly apreciated:)
Given a csv file and I need to read the data and create a sorted array of structs, each of which contains the country code, name, population and life expectancy. So i've to build an index array in memory and sort it using insertion sort and then write (country code with its offset) to a directory file where I'm able to search using the country code.
here is what I have right now, but the sorted array is not working???
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ourhdr.h"
typedef struct // Country Data struct declaration
{
char code[4]; // Country Code
char name[50]; // Country Name
float life_expectancy; // Life Expentancy of the Country's citizen
int pop; // Country's Population
} DATATYPE;
typedef struct // Index Data struct declaration
{
char code[4];
long int offset; // offset of the data
} INDEX;
INDEX StructArray[300]; // for a lot of them
DATATYPE ActualStruct; // for a single struct
void sort(INDEX SortArray[], int n)
{
int j;
for(j=0; j < n;j++)
{
int k;
char *temp = SortArray[j].code;
for (k= j-1; k >= 0; k--)
{
if(strcmp(SortArray[k].code, temp) >= 0)
break;
strcpy(SortArray[k+1].code, SortArray[k].code);
}
strcpy( SortArray[k+1].code, temp); // strcpy
}
}
int main()
{
int i = 0;
int size = 0;
FILE *FI, *FO, *FO2;
char Line[1000]; // NO input line (delimited by \n) is longer than 1000
char * Tok; // token from line
if((FI = fopen("AC1.txt", "r")) == NULL) // open a stream from "countries"
err_sys("Open Fail\n");
if((FO = fopen("DATABASE.txt", "wb")) == NULL) // open a stream from "countries"
err_sys("Open DATABASE Fail\n");
if((FO2 = fopen("DIRECTORY.txt", "w+")) == NULL) // open a stream from "countries"
err_sys("Open DIRECTORY Fail\n");
while ( fgets( (char *) Line,1000,FI) != NULL) // fill buffer up to newline or eof or error
{
Tok = strtok( Line ,",\n"); // skip ID number
Tok = strtok( NULL ,",\n"); // this should get the country code
ActualStruct.code[0] = Tok[0];
ActualStruct.code[1] = Tok[1];
ActualStruct.code[2] = Tok[2];
ActualStruct.code[3] = 0; // To create "string"
Tok = strtok( NULL ,",\n"); // this should get the country name
strncpy(&ActualStruct.name[0],Tok,50); // copy string at most 50 chars
Tok = strtok( NULL ,",\n"); // skip
Tok = strtok( NULL ,",\n"); // skip
Tok = strtok( NULL ,",\n"); // skip
Tok = strtok( NULL ,",\n"); // skip
Tok = strtok( NULL ,",\n"); // this should get the population
ActualStruct.pop = atoi(Tok); // convert string to integer
Tok = strtok( NULL ,",\n"); // this one should get life expectancy
ActualStruct.life_expectancy = atof(Tok); // convert string to a floating point number
strcpy(StructArray[i].code, ActualStruct.code); // get the current code
StructArray[i].offset = ftell(FO); // get the current offset in the DATABASE file
if (fwrite(&ActualStruct, sizeof(DATATYPE),1,FO) != 1) // write the struct to the DATABASE file
err_sys("fwrite error");
printf("%5s%50s%15u%18f%10u\n",&ActualStruct.code[0],&ActualStruct.name[0],ActualStruct.pop,ActualStruct.life_expectancy,StructArray[i].offset);
i++;
size = i;
}
printf("Before Sort:\n");
for (i=0; i<size; i++)
{
printf("%s \t %u\n",StructArray[i].code, StructArray[i].offset);
}
sort(StructArray,sizeof(StructArray)/sizeof(*StructArray)); // call sort function
printf("After Sort:\n");
for (i=0; i<size; i++)
{
printf("%s \t %u\n",StructArray[i].code, StructArray[i].offset);
}
if (fclose(FI)) // close file descriptor FI
err_sys("Close error\n");
if (fclose(FO)) // close file descriptor FO
err_sys("Close error\n");
if (fclose(FO2)) // close file descriptor FO2
err_sys("Close error\n");
printf("------THE END------ \n"); // and this if not error :)
}
Output:
--------------------------
CODE COUNTRY NAME POPULATION LIFE EXPECTANCY OFFSET
--------------------------
DZA Algeria 31471000 69.699997 0
AGO Angola 12878000 38.299999 64
BEN Benin 6097000 50.200001 128
BWA Botswana 1622000 39.299999 192
IOT British Indian Ocean Territory 0 0.000000 256
BFA Burkina Faso 11937000 46.700001 320
BDI Burundi 6695000 46.200001 384
Before Sort:
DZA 0
AGO 64
BEN 128
BWA 192
IOT 256
BFA 320
BDI 384
After Sort:
DZA 0
AGO 64
AGO 128
AGO 192
AGO 256
AGO 320
AGO 384
------THE END------
what i want is something like that:
After Sort:
AGO 64
BD1 384
BFA 320
...and so on
any help will be greatly apreciated:)
ASKER
yes i know previously i've checked it using gdb but I dunno how to solve it? can you give some example?
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
can you elaborate more in details on this and how/where i should use it?
I'm kind of slow in understanding stuffs. thank you:)
I'm kind of slow in understanding stuffs. thank you:)
Tommy has pointed out one problem, but there are several.
Here's a fact that you may not realize.
If you have two struct objects, x and y, then if you say:
x = y;
then the entire contents of y are copied into x.
The only other c-syntax item that you may need help on is strcmp:
http://www.cplusplus.com/reference/clibrary/cstring/strcmp/
In particular, read carefully the section on Return Value.
Here's a fact that you may not realize.
If you have two struct objects, x and y, then if you say:
x = y;
then the entire contents of y are copied into x.
The only other c-syntax item that you may need help on is strcmp:
http://www.cplusplus.com/reference/clibrary/cstring/strcmp/
In particular, read carefully the section on Return Value.
ASKER
yes i understand this as if
SortArray[k] > SortArray[j]
then SortArray[k+1] = SortArray[k]
SortArray[k] = SortArray[j]
am i right?
yes and i know i should copy the whole DATATYPE struct into INDEX struct(but only show the country code and its offset) but i dunno how should i write it out? i'm really weak at this:(
SortArray[k] > SortArray[j]
then SortArray[k+1] = SortArray[k]
SortArray[k] = SortArray[j]
am i right?
yes and i know i should copy the whole DATATYPE struct into INDEX struct(but only show the country code and its offset) but i dunno how should i write it out? i'm really weak at this:(
>> am i right?
Well, as long as you understand that the '>' operator does not apply to your INDEX type, and that you meant something close to that, then, yes, you are right.
So, assuming that we're all on the same page as to the understanding of the algorithm, I urge you to write down the following table on paper for the values that you expect on line 32 in your OP. Here's the header of this table:
This table will have multiple entries for j since j is in the outer loop.
Then, go through the debugger and verify that your expectation is correct. Don't be surprised if you are surprised by what you find.
BTW - you don't have to do this for every possibilty of j,k pairs. It should only take a few entries for you to discover your problem.
You may also be surprised about some the first entry, which I find a bit odd (and I'd take off a point or two), but which still works algorithmically.
Well, as long as you understand that the '>' operator does not apply to your INDEX type, and that you meant something close to that, then, yes, you are right.
So, assuming that we're all on the same page as to the understanding of the algorithm, I urge you to write down the following table on paper for the values that you expect on line 32 in your OP. Here's the header of this table:
j k SortArray[k].code SortArray[j].code Next-LOC
Next-LOC is the next line of code that you want to execute (i.e., either 33 or 34).This table will have multiple entries for j since j is in the outer loop.
Then, go through the debugger and verify that your expectation is correct. Don't be surprised if you are surprised by what you find.
BTW - you don't have to do this for every possibilty of j,k pairs. It should only take a few entries for you to discover your problem.
You may also be surprised about some the first entry, which I find a bit odd (and I'd take off a point or two), but which still works algorithmically.
wait..
>> then SortArray[k+1] = SortArray[k] // ok
>> SortArray[k] = SortArray[j] // did you really mean j here?
>> then SortArray[k+1] = SortArray[k] // ok
>> SortArray[k] = SortArray[j] // did you really mean j here?
Will be back tonight. Hopefully, you and Tommy will have this wrapped up. There isn't much more I can say about the sort since there are only a few lines of code in it. If I say anything more, then I'd be writing it for you, and you would lose the benefit of going through the critical discipline of learning how to debug algorithms yourself.
Good Luck!
Good Luck!
ASKER
let's say now temp is BEN (j=2) so if
DZA(k=1) > BEN(j=2)
then k+1=k so now DZA(k=2)
and then compare again k--
AGO(k=0)>BEN(j=2)
IS NOT BIGGER, so fail get out of the loop and k+1 which is now (k=1) = BEN(j=2)
and the list is AGO(k=0),BEN(k=1),DZA(k=2) and so on....
i edit the sort function, i think is easier to understand
but i still can't understand the func that tommy provide?
DZA(k=1) > BEN(j=2)
then k+1=k so now DZA(k=2)
and then compare again k--
AGO(k=0)>BEN(j=2)
IS NOT BIGGER, so fail get out of the loop and k+1 which is now (k=1) = BEN(j=2)
and the list is AGO(k=0),BEN(k=1),DZA(k=2)
i edit the sort function, i think is easier to understand
void sort(INDEX SortArray[], int n)
{
int j;
for(j=0; j < n;j++)
{
int k;
char *temp = SortArray[j].code;
k = j-1;
while (strcmp(SortArray[k].code, temp) >= 0 && k>=0)
{
strcpy(SortArray[k+1].code, SortArray[k].code);
k = k - 1;
}
strcpy( SortArray[k+1].code, temp);
}
}
but i still can't understand the func that tommy provide?
It's designed to work just like the strcpy function except it copies the whole struct instead of just a string. You would call it just like you would call strcpy except you would pass the entire struct to it. I didn't test it or anything. It was meant more as a guide but I think it will work exactly as posted.
ASKER
but where should i call this function? in the main after the sort() or in the sort function?
Any time you want to copy one of your structs. So it would be in the sort fuction and would replace the strcpy lines.
I was assuming you understood the code you had. Now I'm not completely sure. Did you write and understand it or did you get it from someone else?
I was assuming you understood the code you had. Now I'm not completely sure. Did you write and understand it or did you get it from someone else?
ASKER
well the sort function was written through the algorithm given on the net and the while loop is given by the lecturer... and i added stuffs by stuffs slowly through questions at here to get till this code!
hmm so you meant that I've to call the whole datatypeCopy() func in that line //
hmm sorry i'm really trying out my best to understand what you all trying to say but i'm really slow in understanding things!
hmm so you meant that I've to call the whole datatypeCopy() func in that line //
void sort(INDEX SortArray[], int n)
{
int j;
for(j=0; j < n;j++)
{
int k;
char *temp = SortArray[j].code;
k = j-1;
while (strcmp(SortArray[k].code, temp) >= 0 && k>=0)
{
strcpy(SortArray[k+1].code, SortArray[k].code); //replacing this whole line with that func???
k = k - 1;
}
strcpy( SortArray[k+1].code, temp); //replacing this whole line with that func???
}
}
hmm sorry i'm really trying out my best to understand what you all trying to say but i'm really slow in understanding things!
Okay. I'll give you the function and explain it. Make sure to read the comments and understand them. I will leave the testing and debugging to you. First, you actually need a function to copy the INDEX type. You may want the other function but you need this one for sure.
//const because the source should not change, & so that the changes will stay
//(look up 'pass by reference' for more info.
void indexCopy(DATATYPE& dst, const DATATYPE& src)
{
strcpy(dest.code, src.code);
dest.offset = src.offset;
}
void sort(INDEX SortArray[], int n)
{
int j;
for(j=0; j < n;j++)
{
int k;
INDEX temp; //Create a blank index data object
indexCopy(temp, SortArray[j]); //copy the data from the current line
k = j-1;
while (strcmp(SortArray[k].code, temp.code) >= 0 && k>=0)
{
indexCopy(SortArray[k+1], SortArray[k]); //Now move them over
k = k - 1;
}
indexCopy(SortArray[k+1], temp);//And move the temp one back in
}
}
ASKER
hmm now i understand more about the func.. but why in the indexCopy () func you use DATAYPE struct instead of INDEX struct?
Oops. Good call. That's just a mistake. I missed it on the copy/paste. I also used dest instead of dst in one place.
ASKER
yeah i changed that too...hmm but i got an error on this line
void indexCopy(INDEX& dest, const INDEX& src) <-- syntax error before '&' token
and before that i defined INDEX dest, src; <-- is it correct to define like that because i got an error of not declaring dest and src?
void indexCopy(INDEX& dest, const INDEX& src) <-- syntax error before '&' token
and before that i defined INDEX dest, src; <-- is it correct to define like that because i got an error of not declaring dest and src?
No. It's not recognizing the word INDEX. Make sure this function is after the definition of the struct.
Do not declare dest and src anywhere else.
Do not declare dest and src anywhere else.
ASKER
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ourhdr.h"
typedef struct // Country Data struct declaration
{
char code[4]; // Country Code
char name[50]; // Country Name
float life_expectancy; // Life Expentancy of the Country's citizen
int pop; // Country's Population
} DATATYPE;
typedef struct // Index Data struct declaration
{
char code[4];
long int offset; // offset of the data
} INDEX;
INDEX dest, src;
INDEX StructArray[300]; // for a lot of them
DATATYPE ActualStruct; // for a single struct
void indexCopy(INDEX& dest, const INDEX& src)
{
strcpy(dest.code, src.code);
dest.offset = src.offset;
}
void sort(INDEX SortArray[], int n)
{
int j;
for(j=0; j < n;j++)
{
int k;
INDEX temp; //Create a blank index data object
indexCopy(temp, SortArray[j]); //copy the data from the current line
k = j-1;
while (strcmp(SortArray[k].code, temp.code) >= 0 && k>=0)
{
indexCopy(SortArray[k+1], SortArray[k]); //Now move them over
k = k - 1;
}
indexCopy(SortArray[k+1], temp);//And move the temp one back in
}
}
like this ryte?
hmm but i'm getting an error
as2.c:42: error: syntax error before '&' token
ASKER
the error is pointing at this line
void indexCopy(INDEX& dest, const INDEX& src)
void indexCopy(INDEX& dest, const INDEX& src)
Oh. I think I see the problem.
First: Take out the INDEX dest, src; line.
Then (for both structs) try defining them like this
First: Take out the INDEX dest, src; line.
Then (for both structs) try defining them like this
struct INDEX // Index Data struct declaration
{
char code[4];
long int offset; // offset of the data
}; //Don't forget this semicolon. It needs to be here.
That's the only thing that looks fishy. Otherwise, I don't see why you should get that error.
SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
I'm using C! oh yeah previously i was having the same prob too of using . instead of -> what are the difference actually? r both mean the same?
anyway i've successfully sorted out!
great thanks!
anyway i've successfully sorted out!
great thanks!
. and -> are very different.
It's a little technical, but it would be good to understand.
You use the . if you have the actual struct. You use the -> if you have a pointer.
So
INDEX x;
x.code;
INDEX* x;
x->code;
When you use * then the variable is just an address that points to where the data really is. Look up pointers if you want. They are very useful but easy to cause weird errors with.
It's a little technical, but it would be good to understand.
You use the . if you have the actual struct. You use the -> if you have a pointer.
So
INDEX x;
x.code;
INDEX* x;
x->code;
When you use * then the variable is just an address that points to where the data really is. Look up pointers if you want. They are very useful but easy to cause weird errors with.
You are only copying the code. The rest of the data is staying in its original place. (You need to copy everything, maybe you need a function?)