Question

Help needed debuging small C++ class that segfaults

Asked by: shofstetter

I am working on a class that I will be using in a much larger program on linux.  I made the class in DevC++ using the MINGW port of the gcc compiler.  The class and test program compile and run fine under windows with mingw, but when I comple it with G++ under linux amd run it I get a segfault.  I have never debuged a program under linux so I don't really know where the problem is, but I think it has something to do with malloc(),strcpy(),or strcat() working differently in linux. So can anyone debug and fix what is neccassary in the program below and maybe tell me what caused it.  The purpos of the class is to parse/extract/manipulate data from html files so the I can insert the data into a database.

#include <iostream.h>
#include <stdlib.h>
#include <alloc.h>
#include <string.h>
class page_type{
    char* html;
    char* title;
    char* body;
    char* plain;
    void set_title(void);
    void set_body(void);
    char* strip_tags(char*);
    void html_tolower(void);
public:
    char* str_replace_once(char*,char*,char*);
    char* str_to_lower(char*);
    char* trunc(char*,int);
    char* remove_symbols(char*);
    char* str_replace(char* ,char* , char*);
    char* get_title(void);
    char* get_body(void);
    void init(char*);
};

char* page_type::str_to_lower(char* string)
{
      char* temp;
      temp = (char*)malloc(strlen(string)+1);
      memcpy(temp,string,strlen(string));
      temp = str_replace("A","a",temp);
      temp = str_replace("B","b",temp);
      temp = str_replace("C","c",temp);
      temp = str_replace("D","d",temp);
      temp = str_replace("E","e",temp);
      temp = str_replace("F","f",temp);
      temp = str_replace("G","g",temp);
      temp = str_replace("H","h",temp);
      temp = str_replace("I","i",temp);
      temp = str_replace("J","j",temp);
      temp = str_replace("K","k",temp);
      temp = str_replace("L","l",temp);
      temp = str_replace("M","m",temp);
      temp = str_replace("N","n",temp);
      temp = str_replace("O","o",temp);
      temp = str_replace("P","p",temp);
      temp = str_replace("Q","q",temp);
      temp = str_replace("R","r",temp);
      temp = str_replace("S","s",temp);
      temp = str_replace("T","t",temp);
      temp = str_replace("U","u",temp);
      temp = str_replace("V","v",temp);
      temp = str_replace("W","w",temp);
      temp = str_replace("X","x",temp);
      temp = str_replace("Y","y",temp);
      temp = str_replace("Z","z",temp);
return(temp);
}



char* page_type::trunc(char* data,int len)
{
      char* temp;
      int size;
      size = strlen(data);
      if(size > len)
      {
            temp = (char*)malloc(len+10);
            memcpy(temp,data,len);
            return(temp);
      }
      else
      {
            return(data);
      }
}


char* page_type::remove_symbols(char* original)
{
      char* tempstr;
      tempstr = (char*)malloc(strlen(original)+1);
      strcpy(tempstr,original);
      tempstr = str_replace("!"," ",tempstr);
      tempstr = str_replace("@"," ",tempstr);
      tempstr = str_replace("#"," ",tempstr);
      tempstr = str_replace("$"," ",tempstr);
      tempstr = str_replace("%"," ",tempstr);
      tempstr = str_replace("^"," ",tempstr);
      tempstr = str_replace("&"," ",tempstr);
      tempstr = str_replace("*"," ",tempstr);
      tempstr = str_replace("("," ",tempstr);
      tempstr = str_replace(")"," ",tempstr);
      tempstr = str_replace("-"," ",tempstr);
      tempstr = str_replace("_"," ",tempstr);
      tempstr = str_replace("="," ",tempstr);
      tempstr = str_replace("+"," ",tempstr);
      tempstr = str_replace("\\"," ",tempstr);
      tempstr = str_replace("|"," ",tempstr);
      tempstr = str_replace("`"," ",tempstr);
      tempstr = str_replace("~"," ",tempstr);
      tempstr = str_replace("["," ",tempstr);
      tempstr = str_replace("]"," ",tempstr);
      tempstr = str_replace("{"," ",tempstr);
      tempstr = str_replace("}"," ",tempstr);
      tempstr = str_replace(";"," ",tempstr);
      tempstr = str_replace(":"," ",tempstr);
      tempstr = str_replace("'"," ",tempstr);
      tempstr = str_replace(","," ",tempstr);
      tempstr = str_replace("."," ",tempstr);
      tempstr = str_replace("<"," ",tempstr);
      tempstr = str_replace(">"," ",tempstr);
      tempstr = str_replace("/"," ",tempstr);
      tempstr = str_replace("?"," ",tempstr);
      return(tempstr);
}

char* page_type::str_replace(char* find, char* replace, char* string)
{
    char* new_string;
    char* test_var;
    int state = 1;
    new_string = (char*) malloc(strlen(string)+1);
    strcpy(new_string,string);
    while ( state == 1)
    {
        if(str_replace_once(find,replace,new_string) != NULL)
        {
                new_string = str_replace_once(find,replace,new_string);
        }
        else
        {
                state = 0;
        }
    }
    return(new_string);
}




char* page_type::str_replace_once(char* find, char* replace, char* string)
{
    int find_len;
    int replace_len;
    int string_len;
    int new_string_len;
    char* new_string;
    char* found_start;
    char* found_end;
    char* tempstring;
    tempstring = (char*)malloc(strlen(string)+1);
    strcpy(tempstring,string);
    find_len = strlen(find);
    replace_len = strlen(replace);
    string_len = strlen(tempstring);
    new_string_len = ((string_len - find_len) + replace_len);
    new_string = (char* ) malloc(new_string_len + 1);

   if(strstr(tempstring,find)!= NULL)
    {
        found_start = strstr(tempstring,find);
        found_end = strstr(tempstring,find) + find_len;
        memcpy(new_string,tempstring,string_len - strlen(found_start));
        strcat(new_string,replace);
        strcat(new_string,found_end);
        return(new_string);
    }
    else
    {
        return(NULL);
    }
}




void page_type::html_tolower(void)
{
html = str_to_lower(html);
}


char* page_type::get_title(void)
{
    if(title != NULL)
    {
        return(title);
    }
    else
    {
        return(NULL);
    }
}

char* page_type::get_body(void)
{
    if(body != NULL)
    {
        return(body);
    }
    else
    {
        return(NULL);
    }
}



void page_type::set_title(void)
{
    char* t_start;
    char* t_end;
    char* t_temp1;
    char* t_temp2;
    int size;
    int t_size;
     size = strlen(html);
    t_temp1 = (char *)malloc(size + 2);
    if(t_temp1 != NULL)
    {
        strcpy(t_temp1,html);
        t_start = strstr(t_temp1,"<title>")+7;
        t_end = strstr(t_temp1,"</title>");
        if((t_start != NULL) && (t_end != NULL))
        {
                t_size = t_end - t_start;
                if(t_size > 0)
                {
                      if(t_temp2 = (char*)malloc(t_size + 1))
                      {
                            memcpy(t_temp2,t_start,t_size);
                            t_temp2 = remove_symbols(t_temp2);
                      if(title = (char*)malloc(strlen(t_temp2)+1))
                            {
                                 strcpy(title,t_temp2);
                            }
                            else
                            {
                                 cout << "Not enough mem for title" << endl;
                            }
                      }
                      else
                      {
                           cout << "not enough mem for t_temp2" << endl;
                      }
                }
                else
                {
                cout << "No title data" << endl;
                }
        }
        else
        {
                cout << "No Title Found!" << endl;
        }
    }
    else
    {
        cout << "Not enought Mem for t-temp1 in function set_title()" << endl;
    }    
}

void page_type::set_body(void)
{
    char* b_start;
    char* b_end;
    char* b_temp1;
    char* b_temp2;
    char* b_temp3;
    char* templen;
    int size;
    int b_size;
    size = strlen(html);
    b_temp1 = (char *)malloc(size+1);
   if(b_temp1 != NULL)
   {
       strcpy(b_temp1,html);
       templen = strstr(b_temp1,"<bod");
       b_start = strstr(templen,">")+1;
       b_end = strstr(b_temp1,"</bod");
       if((b_start != NULL) && (b_end != NULL))
       {
              b_size = b_end - b_start;
              b_temp2 = (char*)malloc(b_size + 1);
              if(b_temp2 != NULL)
              {
                    memcpy(b_temp2,b_start,b_size);
                    b_temp3 = strip_tags(b_temp2);
                    b_temp3 = remove_symbols(b_temp3);
                body = (char*) malloc(strlen(b_temp3));
                    if(body != NULL)
                    {
                  
                   strcpy(body,b_temp3);
                  }
                    else
                    {
                         cout << "Not enough mem for body" << endl;
                    }
              }
              else
              {
                  cout << "not enough room for b_temp2" << endl;
              }
       }
       else
       {
            cout << "No body data" << endl;
       }
   }
   else
   {
        cout << "not enough mem for b_temp1" << endl;
   }
}

void page_type::init(char* html_data)
{
    html = (char *)malloc(strlen(html_data) + 2);
    if(html != NULL)
    {
    memcpy(html,html_data,(strlen(html_data)+1));
    html_tolower();
    set_title();
    set_body();
    }
}

char* page_type::strip_tags(char *data)
{
    char* temp1;
    char* temp2;
    char* temp3;
    char* start;
    char* end;
    int dist;
    int size;
    temp1 = (char*)malloc(strlen(data)+1);
    strcpy(temp1,data);
    while((start != NULL) && (end != NULL))
    {
        size = strlen(temp1);
        start = strstr(temp1,"<");
        end = strstr(temp1,">")+1;
        size = strlen(temp1);
        if((start != NULL) && (end != NULL))
        {
                dist = size - strlen(start);
                temp2 = (char *) malloc(dist + 2);
                memcpy(temp2,temp1,dist);
                temp3 = (char *) malloc(strlen(end)+1);
                memcpy(temp3,end,strlen(end));
                free(temp1);
                temp1 = (char *)malloc((strlen(temp2)+(strlen(temp3)+1)));
                memcpy(temp1,temp2,strlen(temp2));
                strcat(temp1,temp3);
                }
        else
        {
        break;
        }
    }
return(temp1);
}


int main(int argc, char *argv[])
{
 page_type *test;
 page_type testx;
 test = &testx;
 //char* teststr = "Replace Bable fish";
 //char* strtest;
 test->init("<html><head><TITLE>Hello World In order to test my trunc() function I need to make the title longer then 50 charactures I think I have it there now</TITLE></head><BODY bgcolor=\"red\"><b>This isn't a page;</b> about the \"hello world\" program!  Instead it is a page that I am using o test the functionality of my new class wich I am going to use for the new Delta controls search endgine spider.</BODY></html>");
 cout << test->trunc(test->get_title(),50) << endl << endl;
 cout << test->trunc(test->get_body(),200) << endl << endl;
 //cout << teststr << endl;
 //strtest = test->str_replace_once("Bable", "Gold", teststr);
 //cout << strtest << endl;
 system("PAUSE");      
  return 0;
}

This Question has been solved and asker verified All Experts Exchange premium technology solutions are available to subscription members.

Subscribe now for full access to Experts Exchange and get

Instant Access to this Solution

  • Plus...
  • 30 Day FREE access, no risk, no obligation
  • Collaborate with the world's top tech experts
  • Unlimited access to our exclusive solution database
  • Never be left without tech help again

Subscribe Now

Asked On
2003-10-15 at 12:49:01ID20768066
Tags

class

,

p

,

str_replace_once

Topic

C++ Programming Language

Participating Experts
4
Points
500
Comments
8

Trusted by hundreds of thousands everyday for fast, accurate and reliable tech support.

  • "The time we save is the biggest benefit of Experts Exchange to Warner Bros. What could take multiple guys 2 hours or more each to find is accessed in around 15 minutes on Experts Exchange." Mike Kapnisakis, Warner Bros.
  • "Our team likes having a resource that is more secure than just using Google and most experts using this service really know their stuff. It's nice to look here first versus using Google." Dayna Sellner, Lockheed Martin
  • "Anytime that I've been stumped with a problem, 9 out of 10 times Experts Exchange has either the accepted solution or an open discussion of the potential solution to the problem." Kenny Red, eBay Inc.

See what Experts Exchange can do for you.

Got a question?

We've got the answer.

Experts Exchange has been collecting answers to technology questions since 1996…3 million and counting! If you have a question, chances are we already have your answer.

Screenshot of Experts Exchange Knowledgebase

Need individual assistance?

Our experts are ready to help.

If you can't find the exact answer you're looking for, ask our exclusive community of 50,000 experts. You’ll get a personalized answer from a trusted professional.

Screenshot of Experts Exchange Knowledgebase

Want to learn from the best?

Read articles from industry experts.

Thousands of free tech tips, tricks, how-to’s and tutorials are available in our peer reviewed articles section. See for yourself how smart our experts are, no login required.

Screenshot of an Article

Working on a long term project?

Store your work and research.

Save solutions to your questions, answers you’ve discovered through searching plus helpful articles in your personal knowledgebase for easy future access.

Screenshot of Experts Exchange Knowledgebase

Access the answers to your technology questions today.

Subscribe Now

30-day free trial. Register in 60 seconds.

What Makes Experts Exchange Unique?

Members of the expert community talk about why the experience at Experts Exchange is different than what you will find anywhere else.

Trusted by the world's most respected brands.

image of each brand's logo

Faithfully serving IT professionals since 1996.

Experts Exchange Logo

Try it out and discover for yourself.

Subscribe Now

30-day free trial. Register in 60 seconds.

Related Solutions

  1. Formatting cout output?
    I have this set of code that I use to output information to the screen. But when it prints out its not formatted like I had it. Any ideas? cout << "-action Used to get or set the enviroments" << endl; cout << "[Options]" <<...
  2. cout << "Needed: Debugging ++   help";
    What i want the below code to do is read a file and read it accordingly to these steps: 1) read in first line and if there is no space in buffer[0], split up the line (delete the colon that follows the first filename) and pass the individual tokens to a function checkfiledat...

Free Tech Articles

  1. WARNING: 5 Reasons why you should NEVER fix a computer for free.
    It is in our nature to love the puzzle. We are obsessed. The lot of us. We love puzzles. We love the challenge. We thrive on finding the answer. We hate disarray. It bothers us deep in our soul. W...
  2. SCCM OSD Basic troubleshooting
    SCCM 2007 OSD is a fantastic way to deploy operating systems, however, like most things SCCM issues can sometimes be difficult to resolve due to the sheer volume of logs to sift through and the dispe...
  3. Migrate Small Business Server 2003 to Exchange 2010 and Windows 2008 R2
    This guide is intended to provide step by step instructions on how to migrate from Small Business Server 2003 to Windows 2008 R2 with Exchange 2010. For this migration to work you will need the fo...
  4. Create a Win7 Gadget
    This article shows you how to create a simple "Gadget" -- a sort of mini-application supported by Windows 7 and Vista. Gadgets can be dropped anywhere on the desktop to provide instant information, ...
  5. Outlook continually prompting for username and password
    There have been a lot of questions recently regarding Outlook prompting for a username and password whilst using Exchange 2007. There are a few reasons why this would happen and I will try to cover t...
  6. Backup Exchange 2010 Information Store using Windows Backup
    There seems to be quite a lot of confusion around the ability to backup Exchange 2010 using the built in Windows Backup feature. This stems from the omission of this feature prior to Exchange 2007 s...

Cloud Class Webinars

  1. Avoiding Bugs in Microsoft Access
    Alison Balter takes and in-depth look at avoiding bugs in Access. In this webinar you will learn about using the immediate window to debug your applications, invoking the debugger, using breakpoints to troubleshoot, stepping through code, setting the next statement to execute, ...
  2. Top 10 Best New Features in Visio 2010
    Scott Helmers gives live demonstrations of the top 10 new features in Visio 2010. This webinar will teach you how to create compelling diagrams by adding shapes to the page with a single click, linking the shapes in a diagram to data in Excel (or SQL Server, or SharePoint), ...
  3. IT Consultant Business Secrets Revealed
    Michael Munger, Experts Exchange tech pro and IT consultant, pulls back the curtain on his very successful businesses and answers question on every IT consultant and business owner should know about. He shares secrets on what he did to solve the 5 most common problems in IT, ...
  4. Disaster Recovery and Business Continuity
    Quest CTO, Mike Billon, gives an overview of the steps involved in building a dunamic disaster recovery plan. Through case studies and an examination of software/hardware tooles for monitoring and testing, you'll gain a better understandin of where you are, where you want ...
  5. Organize Your Visio Diagrams with Containers and Lists
    Scott Helmers uses cross functional flowcharts, wireframe diagrams, data graphic legends and seating charts to teach you: how to ustilize all three new structured diagram components in Visio 2010, the best practices for organizeing shapes in previous version of Visio, how to organize ...
  6. How to Us Objects, Properties, Events and Methods in Microsoft Access
    Alison Dalter gives an in-depbth look at objects, properties, events and methods in Microsoft Access. In this webinar you will learn about using the object browser, referring to objects, working with properties and methods, working with object variables, understanding the ...

Join the Community

Give a Little. Get a Lot.

Join the community of experts here and help other tech pros by answering question in your area of expertise. You can earn FREE access to all Experts Exchange's premium features and resources.

Join the Community

Answers

 

by: AmitAgarwalPosted on 2003-10-15 at 12:57:06ID: 9557444

This program has lot of bugs and memory leaks. it will be nice if you test it on windows and then port it on linux. to catch memory leaks use some purifier or numega tool.
first you add a constructor in your class and Initialize all the pointers to NULL similarly all the local variables also.
this will help you in catching bugs.

 

by: jkrPosted on 2003-10-15 at 13:12:27ID: 9557538

*Cough*, some optimizations, maybe:

#include <ctype.h>

char* page_type::str_to_lower(char* string)
{
  while ( *string++) tolower ( *string);
}

char* page_type::remove_symbols(char* string)
{
  while ( *string++) if ( !isalpha ( *string)) *string = ' ';
}

 

by: grg99Posted on 2003-10-15 at 18:22:48ID: 9558744

Lotsa places you malloc but dont check the result.  One should  always check the return value.
It may be NULL for one of many reasons, none of them too good.  You may truly be out of memory,
which is unlikely on a VM system, it probably means you have a bad memory leak.  It may be NULL because
you somehow wrote over parts of the heap.  Also bad.

You're doing a lot of mallocs with no corresponding free, so you probably do have some memory leaks.
Eack malloc should have a corresponding free(), OR if you're passing the pointer back to the user to use, then it becomes
(usually) that caller's responsibility to dispose of the memory when they're done with it.  

 I would go back to Windows and run the program with a big looop, so it runs your main loop say 10,000 times.
Any memory leaks will then rear their heads on  Windows too.  At least then you won't be chasing a Linux versus Windows C problem.



 

by: shofstetterPosted on 2003-10-16 at 05:17:30ID: 9561279

Thanks I'll take a look at it, You all have some very good suggestions and fixes.  I'll see if I can get it fixed.  I haven't programmed in C++ for along time and the little programming I had done before didn't involve using memory so I never really got the hang of using malloc and free() etc.

 

by: fsign21Posted on 2003-10-16 at 06:06:17ID: 9561623

Your general problem is, that you use malloc() and never free() => you produce memory leaks

By the way, it is better to use new() for memory allocation and delete() for freeing, take a look on advantages on
http://www.parashift.com/c++-faq-lite/freestore-mgmt.html#faq-16.3
The main problem with malloc and free is: they don't know about constructors and destructors.


There are 3 situation, when you are using malloc() in your program:
1) allocate memory for class variables
2) allocate memory for temporaries in member functions
3) allocate memory in member functions for a variable, which you use then as a return value

The solution for (1) is:
->use a constructor to set your pointers to 0 and call delete() in destructor.  The call to delete() on zero-pointer is save and does nothing, if the pointer is not zero, the memory will be freed.
->in your member functions, I would check if the member-objects were already allocated or not, and free them, if yes.
I give you an example for char* html and char* title class variable and member init()

class page_type{
    char* html;
    char* title;
//hide copy-constructor and assignment, otherwise handle them properly
page_type(const page_type&);
page_type& operator=(const page_type&);

public:
page_type();
virtual  ~page_type();

}

page_type::page_type()
   : html(0), title(0)
{}

page_type::~page_type()
{
    delete [] html;
    delete [] title;
}

void page_type::init(char* html_data)
{
if(html)
{
     delete [] html;
     html=0;
}
size_t len = strlen(html_data);
html = new char[len+1];
if(html)
    {
    strcpy(html,html_data);
    html_tolower();
    set_title();
    set_body();
    }
}

The solution for (2) is easy as well: allocate memory for variable at the begin of the function and free it just before you return:
void page_type::set_body(void)
{
….
size_t len = strlen(html);
char* temp = new char[len+1];
…..
delete [] temp;
}

The solution for (3) is not that easy. The easiest and the most save way is to use std::string as a return value instead of char*. The other way is to pass 2 pointers as parameters to the function, both of them should point to the already allocated memory. The first parameter points to the result, another one - to the "start-string", like for example the function strcpy does:
char *strcpy(char *s1, const char *s2);
The strcpy() function copies the string pointed to by s2 (including the terminating null byte) into the array pointed to by s1.
RETURN VALUE
The strcpy() function returns s1; no return value is reserved to indicate an error.

In your example it would be
char* page_type::trunc(char* dataret, const char* data, size_t len)
{
    size_t size = strlen(data);
    if(size < len) size = len;
 
    memcpy(dataret, data, len);
    dataret[len]=0; // do not forget to make the result string zero-terminated!!!
    return dataret;
}

the call to this function would be:
{
size_t trunc_size = 10;
char* trunc_str = new char[trunc_size+1];
trunc(trunc_str, html, trunc_size);

cout << "Resut: " << trunc_str << endl;
delete [] trunc_str;
}

 

by: shofstetterPosted on 2003-10-16 at 07:32:07ID: 9562378

OK I think I have got it fixed, jkr I was not able to get the two functions you posted above to work.  I was getting errors when I called them.  I  would like to thank everyone for your help I was able to use something from each of you so I have decided to split the points.

 

by: shofstetterPosted on 2003-10-16 at 13:40:40ID: 9565347

Well I though I had it fixed More segfault.  I had it loop through 10000 times underwindow without a single problem,  It wouldn't even go though once in linux.  I have heard something about the gcc version of malloc acting a little different from the one I use in windows.  Does anyone know what that small differance might be.  And does anyone know of a good graphical debbuger that I can run on redhat 9 so I can work out the rest of the bugs?

I am going to post my latest version of the class I'll give 340 points to whoever can take it and post a fixed version that will work on linux.

 

by: shofstetterPosted on 2003-10-16 at 13:42:11ID: 9565356

well here it is.

---------------------------------------------------------------------------------------------------------
#include <iostream.h>
#include <stdlib.h>
#include <alloc.h>
#include <ctype.h>
#include <string.h>
class page_type{
    char* html;
    char* title;
    char* body;
    char* plain;
    void set_title(void);
    void set_body(void);
    char* strip_tags(char*);
    void html_tolower(void);
public:
    void destroy(void);
    char* str_replace_once(char*,char*,char*);
    char* str_to_lower(char*);
    char* trunc(char*,int);
    char* remove_symbols(char*);
    char* str_replace(char* ,char* , char*);
    char* get_title(void);
    char* get_body(void);
    int init(char*);
};

char* page_type::str_to_lower(char* temp)
{
      temp = str_replace("A","a",temp);
      temp = str_replace("B","b",temp);
      temp = str_replace("C","c",temp);
      temp = str_replace("D","d",temp);
      temp = str_replace("E","e",temp);
      temp = str_replace("F","f",temp);
      temp = str_replace("G","g",temp);
      temp = str_replace("H","h",temp);
      temp = str_replace("I","i",temp);
      temp = str_replace("J","j",temp);
      temp = str_replace("K","k",temp);
      temp = str_replace("L","l",temp);
      temp = str_replace("M","m",temp);
      temp = str_replace("N","n",temp);
      temp = str_replace("O","o",temp);
      temp = str_replace("P","p",temp);
      temp = str_replace("Q","q",temp);
      temp = str_replace("R","r",temp);
      temp = str_replace("S","s",temp);
      temp = str_replace("T","t",temp);
      temp = str_replace("U","u",temp);
      temp = str_replace("V","v",temp);
      temp = str_replace("W","w",temp);
      temp = str_replace("X","x",temp);
      temp = str_replace("Y","y",temp);
      temp = str_replace("Z","z",temp);
return(temp);
}




char* page_type::trunc(char* data,int len)
{
      char* temp;
      int size;
      size = strlen(data);
      if(size > len)
      {
            temp = (char*)malloc(len+10);
            memcpy(temp,data,len);
            return(temp);
      }
      else
      {
            return(data);
      }
}


char* page_type::remove_symbols(char* original)
{
      char* tempstr;
      tempstr = (char*)malloc(strlen(original)+1);
      strcpy(tempstr,original);
      tempstr = str_replace("!"," ",tempstr);
      tempstr = str_replace("@"," ",tempstr);
      tempstr = str_replace("#"," ",tempstr);
      tempstr = str_replace("$"," ",tempstr);
      tempstr = str_replace("%"," ",tempstr);
      tempstr = str_replace("^"," ",tempstr);
      tempstr = str_replace("&"," ",tempstr);
      tempstr = str_replace("*"," ",tempstr);
      tempstr = str_replace("("," ",tempstr);
      tempstr = str_replace(")"," ",tempstr);
      tempstr = str_replace("-"," ",tempstr);
      tempstr = str_replace("_"," ",tempstr);
      tempstr = str_replace("="," ",tempstr);
      tempstr = str_replace("+"," ",tempstr);
      tempstr = str_replace("\\"," ",tempstr);
      tempstr = str_replace("|"," ",tempstr);
      tempstr = str_replace("`"," ",tempstr);
      tempstr = str_replace("~"," ",tempstr);
      tempstr = str_replace("["," ",tempstr);
      tempstr = str_replace("]"," ",tempstr);
      tempstr = str_replace("{"," ",tempstr);
      tempstr = str_replace("}"," ",tempstr);
      tempstr = str_replace(";"," ",tempstr);
      tempstr = str_replace(":"," ",tempstr);
      tempstr = str_replace("'"," ",tempstr);
      tempstr = str_replace(","," ",tempstr);
      tempstr = str_replace("."," ",tempstr);
      tempstr = str_replace("<"," ",tempstr);
      tempstr = str_replace(">"," ",tempstr);
      tempstr = str_replace("/"," ",tempstr);
      tempstr = str_replace("?"," ",tempstr);
      return(tempstr);
}

char* page_type::str_replace(char* find, char* replace, char* string)
{
    char* new_string;
    int state = 1;
    new_string = (char*) malloc(strlen(string)+1);
    strcpy(new_string,string);
    while ( state == 1)
    {
        if(str_replace_once(find,replace,new_string) != NULL)
        {
                new_string = str_replace_once(find,replace,new_string);
        }
        else
        {
                state = 0;
        }
    }
return(new_string);
}




char* page_type::str_replace_once(char* find, char* replace, char* string)
{
    int find_len;
    int replace_len;
    int string_len;
    int new_string_len;
    char* new_string;
    char* found_start;
    char* found_end;
    char* tempstring;
    tempstring = (char*)malloc(strlen(string)+1);
    strcpy(tempstring,string);
    find_len = strlen(find);
    replace_len = strlen(replace);
    string_len = strlen(tempstring);
    new_string_len = ((string_len - find_len) + replace_len);
    new_string = (char* ) malloc(new_string_len + 1);

   if(strstr(tempstring,find)!= NULL)
    {
        found_start = strstr(tempstring,find);
        found_end = strstr(tempstring,find) + find_len;
        memcpy(new_string,tempstring,string_len - strlen(found_start));
        strcat(new_string,replace);
        strcat(new_string,found_end);
        return(new_string);
    }
    else
    {
        return(NULL);
    }
    free(tempstring);
}




void page_type::html_tolower(void)
{
html = str_to_lower(html);
}


char* page_type::get_title(void)
{
    if(title != NULL)
    {
        return(title);
    }
    else
    {
        return(NULL);
    }
}

char* page_type::get_body(void)
{
    if(body != NULL)
    {
        return(body);
    }
    else
    {
        return(NULL);
    }
}



void page_type::set_title(void)
{
    char* t_start;
    char* t_end;
    char* t_temp1;
    char* t_temp2;
    int size;
    int t_size;
     size = strlen(html);
    t_temp1 = (char *)malloc(size + 2);
    if(t_temp1 != NULL)
    {
        strcpy(t_temp1,html);
        t_start = strstr(t_temp1,"<title>")+7;
        t_end = strstr(t_temp1,"</title>");
        if((t_start != NULL) && (t_end != NULL))
        {
                t_size = t_end - t_start;
                if(t_size > 0)
                {
                      if(t_temp2 = (char*)malloc(t_size + 1))
                      {
                            memcpy(t_temp2,t_start,t_size);
                            t_temp2 = remove_symbols(t_temp2);
                      if(title = (char*)malloc(strlen(t_temp2)+1))
                            {
                                 strcpy(title,t_temp2);
                            }
                            else
                            {
                                 cout << "Not enough mem for title" << endl;
                            }
                      }
                      else
                      {
                           cout << "not enough mem for t_temp2" << endl;
                      }
                }
                else
                {
                cout << "No title data" << endl;
                }
        }
        else
        {
                cout << "No Title Found!" << endl;
        }
    }
    else
    {
        cout << "Not enought Mem for t-temp1 in function set_title()" << endl;
    }    
    free(t_temp1);
    free(t_temp2);
}

void page_type::set_body(void)
{
    char* b_start;
    char* b_end;
    char* b_temp1;
    char* b_temp2;
    char* b_temp3;
    char* templen;
    int size;
    int b_size;
    size = strlen(html);
    b_temp1 = (char *)malloc(size+1);
   if(b_temp1 != NULL)
   {
       strcpy(b_temp1,html);
       templen = strstr(b_temp1,"<bod");
       b_start = strstr(templen,">")+1;
       b_end = strstr(b_temp1,"</bod");
       if((b_start != NULL) && (b_end != NULL))
       {
              b_size = b_end - b_start;
              b_temp2 = (char*)malloc(b_size + 1);
              if(b_temp2 != NULL)
              {
                    memcpy(b_temp2,b_start,b_size);
                    b_temp3 = strip_tags(b_temp2);
                    b_temp3 = remove_symbols(b_temp3);
                body = (char*) malloc(strlen(b_temp3));
                    if(body != NULL)
                    {
                  
                   strcpy(body,b_temp3);
                  }
                    else
                    {
                         cout << "Not enough mem for body" << endl;
                    }
              }
              else
              {
                  cout << "not enough room for b_temp2" << endl;
              }
       }
       else
       {
            cout << "No body data" << endl;
       }
   }
   else
   {
        cout << "not enough mem for b_temp1" << endl;
   }
    free(b_temp1);
    free(b_temp2);
    free(b_temp3);
}

int page_type::init(char* html_data)
{
    html = (char *)malloc(strlen(html_data) + 1);
    if(html != NULL)
    {
    memcpy(html,html_data,strlen(html_data));
    html_tolower();
    set_title();
    set_body();
    return(1);
    }
    else
    {
    return(0);
    }
}
void page_type::destroy(void)
{
free(html);
free(title);
free(body);
}

char* page_type::strip_tags(char *data)
{
    char* temp1;
    char* temp2;
    char* temp3;
    char* start;
    char* end;
    int dist;
    int size;
    temp1 = (char*)malloc(strlen(data)+1);
    strcpy(temp1,data);
    while((start != NULL) && (end != NULL))
    {
        size = strlen(temp1);
        start = strstr(temp1,"<");
        end = strstr(temp1,">")+1;
        size = strlen(temp1);
        if((start != NULL) && (end != NULL))
        {
                dist = size - strlen(start);
                temp2 = (char *) malloc(dist + 2);
                memcpy(temp2,temp1,dist);
                temp3 = (char *) malloc(strlen(end)+1);
                memcpy(temp3,end,strlen(end));
                free(temp1);
                temp1 = (char *)malloc((strlen(temp2)+(strlen(temp3)+1)));
                memcpy(temp1,temp2,strlen(temp2));
                strcat(temp1,temp3);
                }
        else
        {
        break;
        }
    }
    free(temp2);
    free(temp3);
return(temp1);
}


int main(int argc, char *argv[])
{
 page_type *test;
 page_type testx;
 test = &testx;
 //char* teststr = "Replace Bable fish";
 //char* strtest;
 for(int i=0; i < 10000; i++)
 {
 if(test->init("<html><head><TITLE>Hello World In order to test my trunc() function I need to make the title longer then 50 charactures I think I have it there now</TITLE></head><BODY bgcolor=\"red\"><b>This isn't a page;</b> about the \"hello world\" program!  Instead it is a page that I am using o test the functionality of my new class wich I am going to use for the new Delta controls search endgine spider.</BODY></html>") != 0)
 {
cout << test->trunc(test->get_title(),50) << endl << endl;
 cout << test->trunc(test->get_body(),200) << endl << endl;
 //cout << teststr << endl;
 //strtest = test->str_replace_once("Bable", "Gold", teststr);
 //cout << strtest << endl;
 test->destroy();
 }
 }
 system("PAUSE");      
 return 0;
}

20120131-EE-VQP-002

3 Ways to Join

30-Day Free Trial

The Experts

98% positive feedback on 31,087 answers since March 2000. angeliii is a Microsoft Most Valuable Professional for his work with MS SQL Server & Develoment.

He has also proven his knowledge of Visual Basic Programming, PHP Scripting and Oracle Databases.

The Experts

97% positive feedback on 10,752 answers since July 2000. lrmoore has more than 18 years experience in the networking industry.

The six-time Mircosoft MVPs specialties include firewalls, virtual private networking, and network management.

Testimonials

"...and excellent source for support... Kind of like having your very own IT dept." Electriciansnet

Testimonials

"I was apprehensive at signing up at first. However... it has already made my life as an IT administrator much easier." JaCrews

Testimonials

"WOW! You guys have great, active, and knowledgeable people on here." moore50

Business Clients

Business Clients

In the Press

"If you’ve got a question... Experts Exchange can supply an answer.”

In the Press

"...an invaluable aid for both IT professionals and those who require tech support."

In the Press

"where IT professionals provide quick answers on just about any topic"

Business Account Plans

Loading Advertisement...