I am working on a class that I will be using in a much larger program on linux. I made the class in DevC++ using the MINGW port of the gcc compiler. The class and test program compile and run fine under windows with mingw, but when I comple it with G++ under linux amd run it I get a segfault. I have never debuged a program under linux so I don't really know where the problem is, but I think it has something to do with malloc(),strcpy(),or strcat() working differently in linux. So can anyone debug and fix what is neccassary in the program below and maybe tell me what caused it. The purpos of the class is to parse/extract/manipulate data from html files so the I can insert the data into a database.
#include <iostream.h>
#include <stdlib.h>
#include <alloc.h>
#include <string.h>
class page_type{
char* html;
char* title;
char* body;
char* plain;
void set_title(void);
void set_body(void);
char* strip_tags(char*);
void html_tolower(void);
public:
char* str_replace_once(char*,char*,char*);
char* str_to_lower(char*);
char* trunc(char*,int);
char* remove_symbols(char*);
char* str_replace(char* ,char* , char*);
char* get_title(void);
char* get_body(void);
void init(char*);
};
char* page_type::str_to_lower(char* string)
{
char* temp;
temp = (char*)malloc(strlen(string)+1);
memcpy(temp,string,strlen(string));
temp = str_replace("A","a",temp);
temp = str_replace("B","b",temp);
temp = str_replace("C","c",temp);
temp = str_replace("D","d",temp);
temp = str_replace("E","e",temp);
temp = str_replace("F","f",temp);
temp = str_replace("G","g",temp);
temp = str_replace("H","h",temp);
temp = str_replace("I","i",temp);
temp = str_replace("J","j",temp);
temp = str_replace("K","k",temp);
temp = str_replace("L","l",temp);
temp = str_replace("M","m",temp);
temp = str_replace("N","n",temp);
temp = str_replace("O","o",temp);
temp = str_replace("P","p",temp);
temp = str_replace("Q","q",temp);
temp = str_replace("R","r",temp);
temp = str_replace("S","s",temp);
temp = str_replace("T","t",temp);
temp = str_replace("U","u",temp);
temp = str_replace("V","v",temp);
temp = str_replace("W","w",temp);
temp = str_replace("X","x",temp);
temp = str_replace("Y","y",temp);
temp = str_replace("Z","z",temp);
return(temp);
}
char* page_type::trunc(char* data,int len)
{
char* temp;
int size;
size = strlen(data);
if(size > len)
{
temp = (char*)malloc(len+10);
memcpy(temp,data,len);
return(temp);
}
else
{
return(data);
}
}
char* page_type::remove_symbols(char* original)
{
char* tempstr;
tempstr = (char*)malloc(strlen(original)+1);
strcpy(tempstr,original);
tempstr = str_replace("!"," ",tempstr);
tempstr = str_replace("@"," ",tempstr);
tempstr = str_replace("#"," ",tempstr);
tempstr = str_replace("$"," ",tempstr);
tempstr = str_replace("%"," ",tempstr);
tempstr = str_replace("^"," ",tempstr);
tempstr = str_replace("&"," ",tempstr);
tempstr = str_replace("*"," ",tempstr);
tempstr = str_replace("("," ",tempstr);
tempstr = str_replace(")"," ",tempstr);
tempstr = str_replace("-"," ",tempstr);
tempstr = str_replace("_"," ",tempstr);
tempstr = str_replace("="," ",tempstr);
tempstr = str_replace("+"," ",tempstr);
tempstr = str_replace("\\"," ",tempstr);
tempstr = str_replace("|"," ",tempstr);
tempstr = str_replace("`"," ",tempstr);
tempstr = str_replace("~"," ",tempstr);
tempstr = str_replace("["," ",tempstr);
tempstr = str_replace("]"," ",tempstr);
tempstr = str_replace("{"," ",tempstr);
tempstr = str_replace("}"," ",tempstr);
tempstr = str_replace(";"," ",tempstr);
tempstr = str_replace(":"," ",tempstr);
tempstr = str_replace("'"," ",tempstr);
tempstr = str_replace(","," ",tempstr);
tempstr = str_replace("."," ",tempstr);
tempstr = str_replace("<"," ",tempstr);
tempstr = str_replace(">"," ",tempstr);
tempstr = str_replace("/"," ",tempstr);
tempstr = str_replace("?"," ",tempstr);
return(tempstr);
}
char* page_type::str_replace(char* find, char* replace, char* string)
{
char* new_string;
char* test_var;
int state = 1;
new_string = (char*) malloc(strlen(string)+1);
strcpy(new_string,string);
while ( state == 1)
{
if(str_replace_once(find,replace,new_string) != NULL)
{
new_string = str_replace_once(find,replace,new_string);
}
else
{
state = 0;
}
}
return(new_string);
}
char* page_type::str_replace_once(char* find, char* replace, char* string)
{
int find_len;
int replace_len;
int string_len;
int new_string_len;
char* new_string;
char* found_start;
char* found_end;
char* tempstring;
tempstring = (char*)malloc(strlen(string)+1);
strcpy(tempstring,string);
find_len = strlen(find);
replace_len = strlen(replace);
string_len = strlen(tempstring);
new_string_len = ((string_len - find_len) + replace_len);
new_string = (char* ) malloc(new_string_len + 1);
if(strstr(tempstring,find)!= NULL)
{
found_start = strstr(tempstring,find);
found_end = strstr(tempstring,find) + find_len;
memcpy(new_string,tempstring,string_len - strlen(found_start));
strcat(new_string,replace);
strcat(new_string,found_end);
return(new_string);
}
else
{
return(NULL);
}
}
void page_type::html_tolower(void)
{
html = str_to_lower(html);
}
char* page_type::get_title(void)
{
if(title != NULL)
{
return(title);
}
else
{
return(NULL);
}
}
char* page_type::get_body(void)
{
if(body != NULL)
{
return(body);
}
else
{
return(NULL);
}
}
void page_type::set_title(void)
{
char* t_start;
char* t_end;
char* t_temp1;
char* t_temp2;
int size;
int t_size;
size = strlen(html);
t_temp1 = (char *)malloc(size + 2);
if(t_temp1 != NULL)
{
strcpy(t_temp1,html);
t_start = strstr(t_temp1,"<title>")+7;
t_end = strstr(t_temp1,"</title>");
if((t_start != NULL) && (t_end != NULL))
{
t_size = t_end - t_start;
if(t_size > 0)
{
if(t_temp2 = (char*)malloc(t_size + 1))
{
memcpy(t_temp2,t_start,t_size);
t_temp2 = remove_symbols(t_temp2);
if(title = (char*)malloc(strlen(t_temp2)+1))
{
strcpy(title,t_temp2);
}
else
{
cout << "Not enough mem for title" << endl;
}
}
else
{
cout << "not enough mem for t_temp2" << endl;
}
}
else
{
cout << "No title data" << endl;
}
}
else
{
cout << "No Title Found!" << endl;
}
}
else
{
cout << "Not enought Mem for t-temp1 in function set_title()" << endl;
}
}
void page_type::set_body(void)
{
char* b_start;
char* b_end;
char* b_temp1;
char* b_temp2;
char* b_temp3;
char* templen;
int size;
int b_size;
size = strlen(html);
b_temp1 = (char *)malloc(size+1);
if(b_temp1 != NULL)
{
strcpy(b_temp1,html);
templen = strstr(b_temp1,"<bod");
b_start = strstr(templen,">")+1;
b_end = strstr(b_temp1,"</bod");
if((b_start != NULL) && (b_end != NULL))
{
b_size = b_end - b_start;
b_temp2 = (char*)malloc(b_size + 1);
if(b_temp2 != NULL)
{
memcpy(b_temp2,b_start,b_size);
b_temp3 = strip_tags(b_temp2);
b_temp3 = remove_symbols(b_temp3);
body = (char*) malloc(strlen(b_temp3));
if(body != NULL)
{
strcpy(body,b_temp3);
}
else
{
cout << "Not enough mem for body" << endl;
}
}
else
{
cout << "not enough room for b_temp2" << endl;
}
}
else
{
cout << "No body data" << endl;
}
}
else
{
cout << "not enough mem for b_temp1" << endl;
}
}
void page_type::init(char* html_data)
{
html = (char *)malloc(strlen(html_data) + 2);
if(html != NULL)
{
memcpy(html,html_data,(strlen(html_data)+1));
html_tolower();
set_title();
set_body();
}
}
char* page_type::strip_tags(char *data)
{
char* temp1;
char* temp2;
char* temp3;
char* start;
char* end;
int dist;
int size;
temp1 = (char*)malloc(strlen(data)+1);
strcpy(temp1,data);
while((start != NULL) && (end != NULL))
{
size = strlen(temp1);
start = strstr(temp1,"<");
end = strstr(temp1,">")+1;
size = strlen(temp1);
if((start != NULL) && (end != NULL))
{
dist = size - strlen(start);
temp2 = (char *) malloc(dist + 2);
memcpy(temp2,temp1,dist);
temp3 = (char *) malloc(strlen(end)+1);
memcpy(temp3,end,strlen(end));
free(temp1);
temp1 = (char *)malloc((strlen(temp2)+(strlen(temp3)+1)));
memcpy(temp1,temp2,strlen(temp2));
strcat(temp1,temp3);
}
else
{
break;
}
}
return(temp1);
}
int main(int argc, char *argv[])
{
page_type *test;
page_type testx;
test = &testx;
//char* teststr = "Replace Bable fish";
//char* strtest;
test->init("<html><head><TITLE>Hello World In order to test my trunc() function I need to make the title longer then 50 charactures I think I have it there now</TITLE></head><BODY bgcolor=\"red\"><b>This isn't a page;</b> about the \"hello world\" program! Instead it is a page that I am using o test the functionality of my new class wich I am going to use for the new Delta controls search endgine spider.</BODY></html>");
cout << test->trunc(test->get_title(),50) << endl << endl;
cout << test->trunc(test->get_body(),200) << endl << endl;
//cout << teststr << endl;
//strtest = test->str_replace_once("Bable", "Gold", teststr);
//cout << strtest << endl;
system("PAUSE");
return 0;
}
by: AmitAgarwalPosted on 2003-10-15 at 12:57:06ID: 9557444
This program has lot of bugs and memory leaks. it will be nice if you test it on windows and then port it on linux. to catch memory leaks use some purifier or numega tool.
first you add a constructor in your class and Initialize all the pointers to NULL similarly all the local variables also.
this will help you in catching bugs.