?
Solved

malloc doesn't work with a CSV file?

Posted on 2003-03-19
11
Medium Priority
?
371 Views
Last Modified: 2010-04-15
I'm trying to read in value from a CSV file and dynamically allocate them in memory. I'm using an array of pointers to pointers.

char **field - holds column headers
char **value - holds line values

When the functions 'parse_firstline' and 'parse_lines' are called they run correctly. The test loop at the bottom of these function prints out the correct values read in for both functions.

When the functions return and you try and loop round the code in main memory access errors occur.

I don't understand. You need to uncomment the code and create a sample CSV file.

Thanks for any help given.

A sample file is:


Header1,Header2,Header3
dan,23532,1
mick,24747,2
tom,45623,3


The code is:

#include <stdio.h>
#include <string.h>
#include <malloc.h>

void replace(char *buffer, char *field, char *value, char *modified);
void parse_firstline(char *file, int *numcsv, char **field);
int parse_lines(char *file, int numcsv, char **value);

void main()
{
      int numlines;
      int loop;
      char *file={"C:\\windows\\desktop\\fromcsv.csv"};

      // Number of CSV values
      int numcsv=0;

      char **field=malloc(sizeof(char *));
      char **value=malloc(sizeof(char *));

      parse_firstline(file, &numcsv, field);

      // test
      //for(loop=0;loop<numcsv;loop++)
      //{
      //      printf("%s\n", field[loop]);
      //}
      // test

      numlines=parse_lines(file, numcsv, value);

      // test
      //for(loop=0;loop<(numlines*numcsv);loop+=numcsv)
      //{
      //      printf("%s %s %s\n", value[loop], value[loop+1], value[loop+2]);
      //}
      // test

      return;
}

int parse_lines(char *file, int numcsv, char **value)
{
      char numlines=0;
      char buffer[1024];
      FILE *csv;
      int csvpos;
      char *found;
      char *ptr;
      char *deliminator={",\0"};
      int valuessize;
      int pos=0;
      
      csv=fopen(file, "r");
      if(csv==NULL)
      {
            printf("Could not open file\n");
      }

      // Discard first line
      fgets(buffer, sizeof(buffer), csv);

      // Go through file until EOF
      while(!feof(csv))
      {
            // Read in line
            fgets(buffer, sizeof(buffer), csv);

            // Set pointer
            ptr=&buffer[0];

            // Go through line according to numcsv
            for(csvpos=0;csvpos<numcsv;csvpos++)
            {
                  if(strstr(ptr, deliminator)!=NULL)
                  {
                        found=strstr(ptr, deliminator);

                        valuessize=strlen(ptr)-strlen(found);

                        // Resize array of pointers
                        value=realloc(value, ((numlines*numcsv)+1+csvpos)*sizeof(char *));

                        // Allocate memory
                        value[(numlines*numcsv)+csvpos]=malloc(valuessize+1);

                        // Add NULL termination
                        ptr[valuessize]='\0';

                        // Copy data into array
                        memcpy(value[(numlines*numcsv)+csvpos], ptr, valuessize+1);
      
                        // Set ptr to next position
                        ptr=&ptr[valuessize+1];

                        if(strstr(ptr, deliminator)==NULL)
                        {
                              // Done if theres data after a comma

                              csvpos++;
                              
                              // Resize array of pointers
                              value=realloc(value, (((numlines*numcsv)+1+csvpos)*sizeof(char *)));

                              // Allocate memory
                              value[(numlines*numcsv)+csvpos]=malloc(valuessize+1);

                              // Add NULL termination
                              ptr[strlen(ptr)-1]='\0';

                              // Copy data into array
                              memcpy(value[(numlines*numcsv)+csvpos], ptr, valuessize+1);
                        }
                        
                  }
            }
            // Increment number of lines
            numlines++;
      };

      numlines--;

      // test
      //for(pos=0;pos<(numlines*3);pos+=numcsv)
      //{
      //      printf("%s %s %s\n", value[pos], value[pos+1], value[pos+2]);
      //}
      // test

      return numlines;
}

void replace(char *buffer, char *field, char *value, char *modified)
{
      char *macro;
      char *found;

      int start, end, pos=0;
      int modpos=0;

      macro=malloc(strlen(field)+5);
      strcpy(macro, "%%");
      strcat(macro, field);
      strcat(macro, "%%");

      memcpy(modified, buffer, strlen(buffer)+1);

      while(strstr(&buffer[pos], macro)!=NULL)
      {
            found=strstr(&buffer[pos], macro);

            start=strlen(buffer)-strlen(found);
            end=start+strlen(macro);

            memcpy(&modified[modpos], &modified[pos], start);
            start=strlen(modified)-strlen(found);
            
      
            memcpy(&modified[start], value, strlen(value)+1);

            modpos=strlen(modified);
            pos=end;
      }

      if(strlen(found)>strlen(macro))
      {
            memcpy(&modified[modpos], &buffer[pos], strlen(&buffer[pos])+1);
      }

      free(macro);

      return;
}

void parse_firstline(char *file, int *numcsv, char **field)
{
      char buffer[1024];
      FILE *csv;

      char *ptr;
      char *found;
      char *deliminator={",\0"};
      int fieldsize;
      int pos=0;

      csv=fopen(file, "r");
      if(csv==NULL)
      {
            printf("Could not open file\n");
      }

      fgets(buffer, sizeof(buffer), csv);

      ptr=&buffer[0];

      while(strstr(ptr, deliminator)!=NULL)
      {
            found=strstr(ptr, deliminator);

            fieldsize=strlen(ptr)-strlen(found);

            field=realloc(field, ((*numcsv+1)*sizeof(char *)));
            field[*numcsv]=malloc(fieldsize+1);

            ptr[fieldsize]='\0';

            memcpy(field[*numcsv], ptr, fieldsize+1);
      
            ptr=&ptr[fieldsize+1];

            (*numcsv)++;

      }

      if(strstr(ptr, deliminator)==NULL)
      {
            field=realloc(field, ((*numcsv+1)*sizeof(char *)));
            field[*numcsv]=malloc(fieldsize+1);
            ptr[strlen(ptr)-1]='\0';
            memcpy(field[*numcsv], ptr, strlen(ptr)+1);
            (*numcsv)++;
      }

      // test
      //for(pos=0;pos<*numcsv;pos++)
      //{
      //      printf("%s\n", field[pos]);
      //}
      // test

      return;
}
0
Comment
Question by:danwarner
[X]
Welcome to Experts Exchange

Add your voice to the tech community where 5M+ people just like you are talking about what matters.

  • Help others & share knowledge
  • Earn cash & points
  • Learn & ask questions
  • 6
  • 4
11 Comments
 
LVL 16

Expert Comment

by:imladris
ID: 8170044
realloc cannot be used in the way the code suggests. Firstly you are not checking for failures (realloc returns NULL if the realloc is not possible), and secondly it will often fail if it is mixed in with mallocs.

realloc *changes* the size of the allocated block. So, following your example, field points to a 4 byte block, then value is set to point to a 4 byte block. Then, in parse_firstline, you go to change the block that field points to from a 4 byte block to an 8 byte block; but it can't! The next 4 bytes are already taken by the block that value points to! In general realloc can only usefully be used to change the size of the last block allocated, and even that isn't guaranteed.

If you want to persue the kind of solution you have, I would suggest that you write your own realloc. What it would need to do, is allocate a new block of memory, copy the contents of the old block to the new one, and then free the old one. This could, of course cause a lot of memory fragmentation, and CPU cycles for copying, but as long as the file isn't more than a fraction (10%?) of the memory available, that will probably not cause a noticeable problem. If you need to handle big files, you will probably have to consider some other, more efficient mechanism.

Your new realloc could be something like:

void *cleverrealloc(char *field,int oldsize,int newsize)
{   char *oldfield;

    oldfield=field;
    field=malloc(newsize);
    if(field==NULL)return(NULL);
    memcpy(field,oldfield,oldsize);
    free(oldfield);
    return(field);
}
0
 
LVL 16

Expert Comment

by:imladris
ID: 8170058
P.S. The most common good, useful use for realloc is to allocate a block of memory that is always going to be "big enough". Then read the "data" (whatever it is) into it, and then realloc it to a *smaller* block, thus freeing the unused memory without any need for copying. This use of it won't, of course, ever fail, because you are making the block smaller.

0
 
LVL 6

Expert Comment

by:gj62
ID: 8170084
Imladris,

Are you sure?  I use realloc all the time in the manner you suggest and it has never caused this type of error.

I agree, they should be error checking, but that isn't the source of error that they are getting.

It matters not that the next block of memory is in use when calling realloc - realloc will move the memory if it has to to get enough contiguous space (or fail and return NULL if it can't).

I stepped through this in a debugger and it more appears that the array is going out of scope.  In fact, if I move it outside a function into the main() block it works just fine...

Of course, I could also be completely wrong...
0
Technology Partners: We Want Your Opinion!

We value your feedback.

Take our survey and automatically be enter to win anyone of the following:
Yeti Cooler, Amazon eGift Card, and Movie eGift Card!

 
LVL 6

Expert Comment

by:gj62
ID: 8170179
From the man page for malloc() ( which includes realloc() ):

 realloc()  changes the size of the memory block pointed to
       by ptr to size bytes.  The contents will be  unchanged  to
       the minimum of the old and new sizes; newly allocated mem-
       ory will be uninitialized.  If ptr is NULL,  the  call  is
       equivalent  to malloc(size); if size is equal to zero, the
       call is equivalent to free(ptr).  Unless ptr is  NULL,  it
       must  have  been  returned by an earlier call to malloc(),
       calloc() or realloc().


It specifically states that it can be matched to malloc(), and that memory will be moved and retained (if newsize > oldsize) if it can't just add onto the existing allocation.

I continue to believe this is a scoping problem - still working it through though...
0
 
LVL 16

Expert Comment

by:imladris
ID: 8170239
gj62:

it looks like you are right. It may be my age showing here. When looking through the details of the realloc documentation is does specifically mention that the block may have been moved. I don't think that was the case when I started programming in the late seventies....... (but even that may just be a symptom of my failing memory  :)   ).
0
 
LVL 6

Expert Comment

by:gj62
ID: 8170253
Well, I'm beginning to have those days too...

Anyhow, I can't really imagine it being a scoping problem, since everything is allocated on the heap.  If I examine memory it is still all there, and I can even access the first element field[0]...

Any ideas?
0
 
LVL 6

Expert Comment

by:gj62
ID: 8170282
Well, it IS a scoping problem...

If I move the declaration of value and field outside of main(), your program works like a charm...

#include <stdio.h>
#include <string.h>
#include <malloc.h>

char **field;
char **value;


void replace(char *buffer, char *field, char *value, char *modified);
void parse_firstline(char *file, int *numcsv);
int parse_lines(char *file, int numcsv);

void main()
{
     int numlines;
     int loop;
     char *file={"C:\\fromcsv.csv"};

     // Number of CSV values
     int numcsv=0;

     field=(char **)malloc(sizeof(char *));
     value=(char **)malloc(sizeof(char *));

     //field=(char **)realloc(field, ((2+1)*sizeof(char *)));
     //for (int x=0;x<3;++x)
       //  field[x]=(char *)malloc(8+1);
     
       parse_firstline(file, &numcsv);

     // test
     //for(loop=0;loop<numcsv;loop++)
     //{
     //     printf("%s\n", field[loop]);
     //}
     // test

     numlines=parse_lines(file, numcsv);

     // test
     //for(loop=0;loop<(numlines*numcsv);loop+=numcsv)
     //{
     //     printf("%s %s %s\n", value[loop], value[loop+1], value[loop+2]);
     //}
     // test

     return;
}

int parse_lines(char *file, int numcsv)
{
     char numlines=0;
     char buffer[1024];
     FILE *csv;
     int csvpos;
     char *found;
     char *ptr;
     char *deliminator={",\0"};
     int valuessize;
     int pos=0;
     
     csv=fopen(file, "r");
     if(csv==NULL)
     {
          printf("Could not open file\n");
     }

     // Discard first line
     fgets(buffer, sizeof(buffer), csv);

     // Go through file until EOF
     while(!feof(csv))
     {
          // Read in line
          fgets(buffer, sizeof(buffer), csv);

          // Set pointer
          ptr=&buffer[0];

          // Go through line according to numcsv
          for(csvpos=0;csvpos<numcsv;csvpos++)
          {
               if(strstr(ptr, deliminator)!=NULL)
               {
                    found=strstr(ptr, deliminator);

                    valuessize=strlen(ptr)-strlen(found);

                    // Resize array of pointers
                    value=(char **)realloc(value, ((numlines*numcsv)+1+csvpos)*sizeof(char *));

                    // Allocate memory
                    value[(numlines*numcsv)+csvpos]=(char *)malloc(valuessize+1);

                    // Add NULL termination
                    ptr[valuessize]='\0';

                    // Copy data into array
                    memcpy(value[(numlines*numcsv)+csvpos], ptr, valuessize+1);
     
                    // Set ptr to next position
                    ptr=&ptr[valuessize+1];

                    if(strstr(ptr, deliminator)==NULL)
                    {
                         // Done if theres data after a comma

                         csvpos++;
                         
                         // Resize array of pointers
                         value=(char **)realloc(value, (((numlines*numcsv)+1+csvpos)*sizeof(char *)));

                         // Allocate memory
                         value[(numlines*numcsv)+csvpos]=(char *)malloc(valuessize+1);

                         // Add NULL termination
                         ptr[strlen(ptr)-1]='\0';

                         // Copy data into array
                         memcpy(value[(numlines*numcsv)+csvpos], ptr, valuessize+1);
                    }
                   
               }
          }
          // Increment number of lines
          numlines++;
     };

     numlines--;

     // test
     //for(pos=0;pos<(numlines*3);pos+=numcsv)
     //{
     //     printf("%s %s %s\n", value[pos], value[pos+1], value[pos+2]);
     //}
     // test

     return numlines;
}

void replace(char *buffer, char *field, char *value, char *modified)
{
     char *macro;
     char *found;

     int start, end, pos=0;
     int modpos=0;

     macro=(char *)malloc(strlen(field)+5);
     strcpy(macro, "%%");
     strcat(macro, field);
     strcat(macro, "%%");

     memcpy(modified, buffer, strlen(buffer)+1);

     while(strstr(&buffer[pos], macro)!=NULL)
     {
          found=strstr(&buffer[pos], macro);

          start=strlen(buffer)-strlen(found);
          end=start+strlen(macro);

          memcpy(&modified[modpos], &modified[pos], start);
          start=strlen(modified)-strlen(found);
         
     
          memcpy(&modified[start], value, strlen(value)+1);

          modpos=strlen(modified);
          pos=end;
     }

     if(strlen(found)>strlen(macro))
     {
          memcpy(&modified[modpos], &buffer[pos], strlen(&buffer[pos])+1);
     }

     free(macro);

     return;
}

void parse_firstline(char *file, int *numcsv)
{
     char buffer[1024];
     FILE *csv;

     char *ptr;
     char *found;
     char *deliminator={",\0"};
     int fieldsize;
     int pos=0;

     csv=fopen(file, "r");
     if(csv==NULL)
     {
          printf("Could not open file\n");
     }

     fgets(buffer, sizeof(buffer), csv);

     ptr=&buffer[0];

     while(strstr(ptr, deliminator)!=NULL)
     {
          found=strstr(ptr, deliminator);

          fieldsize=strlen(ptr)-strlen(found);

          field=(char **)realloc(field, ((*numcsv+1)*sizeof(char *)));
          field[*numcsv]=(char *)malloc(fieldsize+1);

          ptr[fieldsize]='\0';
          memcpy(field[*numcsv], ptr, fieldsize+1);
        
          ptr=&ptr[fieldsize+1];

          (*numcsv)++;

     }

     if(strstr(ptr, deliminator)==NULL)
     {
          field=(char **)realloc(field, ((*numcsv+1)*sizeof(char *)));
          field[*numcsv]=(char *)malloc(fieldsize+1);
          ptr[strlen(ptr)-1]='\0';
          memcpy(field[*numcsv], ptr, strlen(ptr)+1);
          (*numcsv)++;
     }

     // test
     //for(pos=0;pos<*numcsv;pos++)
     //{
     //     printf("%s\n", field[pos]);
     //}
     // test

     return;
}
0
 
LVL 6

Accepted Solution

by:
gj62 earned 400 total points
ID: 8176803
FIXED!

Ok, repeat after me, C is pass-by-value...

Basically, you need to pass the address of the char **, in other words, a char ***, if you want the allocations that you make to not get lost when you return from the functions.

BTW - you never close the file, but you open 2 handles to it - this will cause a file-handle leak - use fclose before you return from those functions...

Here's the code - it works with your sample file - I printed out the results here...

Let me know if you have any questions...

---------------------CUT HERE----------------------------
#include <stdio.h>
#include <string.h>
#include <malloc.h>

void replace(char *buffer, char *field, char *value, char *modified);
void parse_firstline(char *file, int *numcsv, char ***field);
int parse_lines(char *file, int numcsv, char ***value);

void main()
{
    int numlines;
    int loop;
    char *file={"C:\\fromcsv.csv"};

    // Number of CSV values
    int numcsv=0;

    char **field=(char **)malloc(sizeof(char *));
    char **value=(char **)malloc(sizeof(char *));

    //field=(char **)realloc(field, ((2+1)*sizeof(char *)));
    //for (int x=0;x<3;++x)
      //  field[x]=(char *)malloc(8+1);
   
    parse_firstline(file, &numcsv, &field);

    // test
    //for(loop=0;loop<numcsv;loop++)
    //{
    //     printf("%s\n", field[loop]);
    //}
    // test

    numlines=parse_lines(file, numcsv, &value);

    // test
    for(loop=0;loop<(numlines*numcsv);loop+=numcsv)
    {
         printf("%s %s %s\n", value[loop], value[loop+1], value[loop+2]);
    }
    // test

    return;
}

int parse_lines(char *file, int numcsv, char ***value)
{
    char numlines=0;
    char buffer[1024];
    FILE *csv;
    int csvpos;
    char *found;
    char *ptr;
    char *deliminator={",\0"};
    int valuessize;
    int pos=0;
   
    csv=fopen(file, "r");
    if(csv==NULL)
    {
         printf("Could not open file\n");
    }

    // Discard first line
    fgets(buffer, sizeof(buffer), csv);

    // Go through file until EOF
    while(!feof(csv))
    {
         // Read in line
         fgets(buffer, sizeof(buffer), csv);

         // Set pointer
         ptr=&buffer[0];

         // Go through line according to numcsv
         for(csvpos=0;csvpos<numcsv;csvpos++)
         {
              if(strstr(ptr, deliminator)!=NULL)
              {
                   found=strstr(ptr, deliminator);

                   valuessize=strlen(ptr)-strlen(found);

                   // Resize array of pointers
                   *value=(char **)realloc(*value, ((numlines*numcsv)+1+csvpos)*sizeof(char *));

                   // Allocate memory
                   (*value)[(numlines*numcsv)+csvpos]=(char *)malloc(valuessize+1);

                   // Add NULL termination
                   ptr[valuessize]='\0';

                   // Copy data into array
                   memcpy((*value)[(numlines*numcsv)+csvpos], ptr, valuessize+1);
   
                   // Set ptr to next position
                   ptr=&ptr[valuessize+1];

                   if(strstr(ptr, deliminator)==NULL)
                   {
                        // Done if theres data after a comma

                        csvpos++;
                       
                        // Resize array of pointers
                        (*value)=(char **)realloc(*value, (((numlines*numcsv)+1+csvpos)*sizeof(char *)));

                        // Allocate memory
                        (*value)[(numlines*numcsv)+csvpos]=(char *)malloc(valuessize+1);

                        // Add NULL termination
                        ptr[strlen(ptr)-1]='\0';

                        // Copy data into array
                        memcpy((*value)[(numlines*numcsv)+csvpos], ptr, valuessize+1);
                   }
                   
              }
         }
         // Increment number of lines
         numlines++;
    };

    numlines--;

    // test
    //for(pos=0;pos<(numlines*3);pos+=numcsv)
    //{
    //     printf("%s %s %s\n", value[pos], value[pos+1], value[pos+2]);
    //}
    // test

    fclose(csv);
    return numlines;
}

void replace(char *buffer, char *field, char *value, char *modified)
{
    char *macro;
    char *found;

    int start, end, pos=0;
    int modpos=0;

    macro=(char *)malloc(strlen(field)+5);
    strcpy(macro, "%%");
    strcat(macro, field);
    strcat(macro, "%%");

    memcpy(modified, buffer, strlen(buffer)+1);

    while(strstr(&buffer[pos], macro)!=NULL)
    {
         found=strstr(&buffer[pos], macro);

         start=strlen(buffer)-strlen(found);
         end=start+strlen(macro);

         memcpy(&modified[modpos], &modified[pos], start);
         start=strlen(modified)-strlen(found);
         
   
         memcpy(&modified[start], value, strlen(value)+1);

         modpos=strlen(modified);
         pos=end;
    }

    if(strlen(found)>strlen(macro))
    {
         memcpy(&modified[modpos], &buffer[pos], strlen(&buffer[pos])+1);
    }

    free(macro);

    return;
}

void parse_firstline(char *file, int *numcsv, char ***field)
{
    char buffer[1024];
    FILE *csv;

    char *ptr;
    char *found;
    char *deliminator={",\0"};
    int fieldsize;
    int pos=0;

    csv=fopen(file, "r");
    if(csv==NULL)
    {
         printf("Could not open file\n");
    }

    fgets(buffer, sizeof(buffer), csv);

    ptr=&buffer[0];

    while(strstr(ptr, deliminator)!=NULL)
    {
         found=strstr(ptr, deliminator);

         fieldsize=strlen(ptr)-strlen(found);

         (*field)=(char **)realloc(*field, ((*numcsv+1)*sizeof(char *)));
         (*field)[*numcsv]=(char *)malloc(fieldsize+1);

         ptr[fieldsize]='\0';
         memcpy((*field)[*numcsv], ptr, fieldsize+1);
       
         ptr=&ptr[fieldsize+1];

         (*numcsv)++;

    }

    if(strstr(ptr, deliminator)==NULL)
    {
         (*field)=(char **)realloc((*field), ((*numcsv+1)*sizeof(char *)));
         (*field)[*numcsv]=(char *)malloc(fieldsize+1);
         ptr[strlen(ptr)-1]='\0';
         memcpy((*field)[*numcsv], ptr, strlen(ptr)+1);
         (*numcsv)++;
    }

    // test
    //for(pos=0;pos<*numcsv;pos++)
    //{
    //     printf("%s\n", field[pos]);
    //}
    // test

    fclose(csv);
    return;
}
0
 

Author Comment

by:danwarner
ID: 8176932
Thanks alot!

I thought that by just passing the char** pointer would be enough.
0
 
LVL 6

Expert Comment

by:gj62
ID: 8176956
Obviously, so did I and Imladris for awhile <grin>

Had to sleep on it, and while doing something similar today, the light went on (though it was a bit dim...<2grin>)

Good luck!
0
 
LVL 16

Expert Comment

by:imladris
ID: 8196262
Good catch gj62. Thanks for persevering beyond the "globalization" solution. I was pretty sure that couldn't be the crux of the problem.
0

Featured Post

Technology Partners: We Want Your Opinion!

We value your feedback.

Take our survey and automatically be enter to win anyone of the following:
Yeti Cooler, Amazon eGift Card, and Movie eGift Card!

Question has a verified solution.

If you are experiencing a similar issue, please ask a related question

Preface I don't like visual development tools that are supposed to write a program for me. Even if it is Xcode and I can use Interface Builder. Yes, it is a perfect tool and has helped me a lot, mainly, in the beginning, when my programs were small…
Windows programmers of the C/C++ variety, how many of you realise that since Window 9x Microsoft has been lying to you about what constitutes Unicode (http://en.wikipedia.org/wiki/Unicode)? They will have you believe that Unicode requires you to use…
The goal of this video is to provide viewers with basic examples to understand recursion in the C programming language.
The goal of this video is to provide viewers with basic examples to understand and use switch statements in the C programming language.
Suggested Courses

762 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question