Link to home
Start Free TrialLog in
Avatar of rares_dumitrescu
rares_dumitrescu

asked on

Check Website c++ program

Hi.

I need a c++ program that will check if a website is working or not.
For example:
check www.example.com

Will try to connect to www.example.com and if the connection is succesfull to retrieve the header sent and check for the header code 200 OK.

The script will return TRUE or FALSE

I need the source to be compiled in linux.

Thanks.
Avatar of Infinity08
Infinity08
Flag of Belgium image

You could easily use libcurl for this :

        http://curl.haxx.se/libcurl/

Example code :

        http://curl.haxx.se/libcurl/c/simple.html
Avatar of rares_dumitrescu
rares_dumitrescu

ASKER

I need to check 200 000 websites, so i need a faster solution than curl.
this is ping implementation for linux taken from http://www.linuxforums.org/forum/linux-networking/60389-implementing-ping-c.html

#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/file.h>
#include <sys/time.h>

#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
//#include <netinet/ip_var.h>
#include <netdb.h>
#include <unistd.h>
#include <stdio.h>
#include <ctype.h>
//#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <iostream>

using namespace std;

uint16_t in_cksum(uint16_t *addr, unsigned len);

#define	DEFDATALEN	(64-ICMP_MINLEN)	/* default data length */
#define	MAXIPLEN	60
#define	MAXICMPLEN	76
#define	MAXPACKET	(65536 - 60 - ICMP_MINLEN)/* max packet size */

int ping(string target)
{

        int s, i, cc, packlen, datalen = DEFDATALEN;
	struct hostent *hp;
	struct sockaddr_in to, from;
	//struct protoent	*proto;
	struct ip *ip;
	u_char *packet, outpack[MAXPACKET];
	char hnamebuf[MAXHOSTNAMELEN];
	string hostname;
	struct icmp *icp;
	int ret, fromlen, hlen;
	fd_set rfds;
	struct timeval tv;
	int retval;
	struct timeval start, end;
	int /*start_t, */end_t;
	bool cont = true;

	to.sin_family = AF_INET;

	// try to convert as dotted decimal address, else if that fails assume it's a hostname
	to.sin_addr.s_addr = inet_addr(target.c_str());
	if (to.sin_addr.s_addr != (u_int)-1)
		hostname = target;
	else 
	{
		hp = gethostbyname(target.c_str());
		if (!hp)
		{
			cerr << "unknown host "<< target << endl;
			return -1;
		}
		to.sin_family = hp->h_addrtype;
		bcopy(hp->h_addr, (caddr_t)&to.sin_addr, hp->h_length);
		strncpy(hnamebuf, hp->h_name, sizeof(hnamebuf) - 1);
		hostname = hnamebuf;
	}
	packlen = datalen + MAXIPLEN + MAXICMPLEN;
	if ( (packet = (u_char *)malloc((u_int)packlen)) == NULL)
	{
		cerr << "malloc error\n";
		return -1;
	}

/*
	if ( (proto = getprotobyname("icmp")) == NULL)
	{
		cerr << "unknown protocol icmp" << endl;
		return -1;
	}
*/
	if ( (s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
	{
		perror("socket");	/* probably not running as superuser */
		return -1;
	}

	icp = (struct icmp *)outpack;
	icp->icmp_type = ICMP_ECHO;
	icp->icmp_code = 0;
	icp->icmp_cksum = 0;
	icp->icmp_seq = 12345;	/* seq and id must be reflected */
	icp->icmp_id = getpid();


	cc = datalen + ICMP_MINLEN;
	icp->icmp_cksum = in_cksum((unsigned short *)icp,cc);

	gettimeofday(&start, NULL);

	i = sendto(s, (char *)outpack, cc, 0, (struct sockaddr*)&to, (socklen_t)sizeof(struct sockaddr_in));
	if (i < 0 || i != cc)
	{
		if (i < 0)
			perror("sendto error");
		cout << "wrote " << hostname << " " <<  cc << " chars, ret= " << i << endl;
	}
	
	// Watch stdin (fd 0) to see when it has input.
	FD_ZERO(&rfds);
	FD_SET(s, &rfds);
	// Wait up to one seconds.
	tv.tv_sec = 1;
	tv.tv_usec = 0;

	while(cont)
	{
		retval = select(s+1, &rfds, NULL, NULL, &tv);
		if (retval == -1)
		{
			perror("select()");
			return -1;
		}
		else if (retval)
		{
			fromlen = sizeof(sockaddr_in);
			if ( (ret = recvfrom(s, (char *)packet, packlen, 0,(struct sockaddr *)&from, (socklen_t*)&fromlen)) < 0)
			{
				perror("recvfrom error");
				return -1;
			}

			// Check the IP header
			ip = (struct ip *)((char*)packet); 
			hlen = sizeof( struct ip ); 
			if (ret < (hlen + ICMP_MINLEN)) 
			{ 
				cerr << "packet too short (" << ret  << " bytes) from " << hostname << endl;;
				return -1; 
			} 

			// Now the ICMP part 
			icp = (struct icmp *)(packet + hlen); 
			if (icp->icmp_type == ICMP_ECHOREPLY)
			{
				cout << "Recv: echo reply"<< endl;
				if (icp->icmp_seq != 12345)
				{
					cout << "received sequence # " << icp->icmp_seq << endl;
					continue;
				}
				if (icp->icmp_id != getpid())
				{
					cout << "received id " << icp->icmp_id << endl;
					continue;
				}
				cont = false;
			}
			else
			{
				cout << "Recv: not an echo reply" << endl;
				continue;
			}
	
			gettimeofday(&end, NULL);
			end_t = 1000000*(end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec);

			if(end_t < 1)
				end_t = 1;

			cout << "Elapsed time = " << end_t << " usec" << endl;
			return end_t;
		}
		else
		{
			cout << "No data within one seconds.\n";
			return 0;
		}
	}
	return 0;
}

uint16_t in_cksum(uint16_t *addr, unsigned len)
{
  uint16_t answer = 0;
  /*
   * Our algorithm is simple, using a 32 bit accumulator (sum), we add
   * sequential 16 bit words to it, and at the end, fold back all the
   * carry bits from the top 16 bits into the lower 16 bits.
   */
  uint32_t sum = 0;
  while (len > 1)  {
    sum += *addr++;
    len -= 2;
  }

  // mop up an odd byte, if necessary
  if (len == 1) {
    *(unsigned char *)&answer = *(unsigned char *)addr ;
    sum += answer;
  }

  // add back carry outs from top 16 bits to low 16 bits
  sum = (sum >> 16) + (sum & 0xffff); // add high 16 to low 16
  sum += (sum >> 16); // add carry
  answer = ~sum; // truncate to 16 bits
  return answer;
}

int main(int argc, char** argv)
{
	if (argc != 2)
	{
		cout << "Usage: ping hostname" << endl;
		exit(1);
	}
	cout << "ping returned " << ping(argv[1]) << endl;
	return 0;
}

Open in new window

Did you test libcurl ? Did you notice bad performance ? Can you see an improved approach that would speed things up ? If not, then just stick with libcurl - it's quite well maintained, and likely better than anything you can come up with yourself (in a decent time anyway).

If you want a custom solution, you'll have to dive into the HTTP standards, and implement your own. It could be as simple as sending a HTTP GET request, and checking the response for 200 (or any other return codes that are acceptable, like redirects maybe etc.).

Supporting more, like HTTPS, makes things a bit more complicated of course, but then again, libcurl would make your life easier ;)
> I need to check 200 000 websites, so i need a faster solution than curl.

Faster solution? What is slow on curl? 99% of the spent time is network communication.
If you want fast solution, you will need parallel communication (no matter what you use).
ASKER CERTIFIED SOLUTION
Avatar of evilrix
evilrix
Flag of United Kingdom of Great Britain and Northern Ireland image

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
Hhmmm ... yes ... parallel communication


Any help with that ? :)
>> Any help with that ? :)

What do you mean ?

First get it running in a simple straightforward loop. Use libcurl (following the simple sample code I referred to earlier) to check a series of websites by looping as needed.

Once that works as you want, you can think about optimizing things. But I recommend to first get it working.
I managed to do something with libcurl, working like i wanted.

Now about parallel execution.
I use multicurl, but now working so great, it gets about 20 status and then all status are failed.
>> I managed to do something with libcurl, working like i wanted.

Great. How many requests does it process per second (approximately) ?


>> I use multicurl, but now working so great, it gets about 20 status and then all status are failed.

I've never used multicurl (it seems to be for PHP rather than for C++, so it's not really in my comfort zone). If you intend to do this in PHP, you might get better help by adding the PHP zone to this question (you can click the "Request Attention" button to ask that).

If this is still for C++, then what you are looking up against, is using multiple threads that each process a portion of the 200000 requests (eg. 10 threads each processing 20000 requests). There's no special consideration to be taken into account since each request is independent from the others, so they can be easily processed in parallel. Only the reporting of the results might require some synchronization, but nothing too complicated.
You can directly use the command in your command propmt.

e.g- ping www.msn.com

if the same thing you want to be done through a C++ program. then you can always use the system() function call to invoke the command line execution.
Hope it helps.
pinging a server is not the same as checking whether a certain web page is available. The first checks if a machine is running and if it's responding to pings. The second checks if a certain web page is available/reachable on the internet.
>> pinging a server is not the same as checking whether a certain web page is available.
Also, a server may very well be configured not to respond to ICMP packets (firewall policy) but that doesn't mean it won't server HTTP. The only way to be sure is to connect to the HTTP(S) port and download the HTTP header (you need to do this just in case a different service just happens to be running on that port).
working on it