• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 256
  • Last Modified:

Check Website c++ program

Hi.

I need a c++ program that will check if a website is working or not.
For example:
check www.example.com

Will try to connect to www.example.com and if the connection is succesfull to retrieve the header sent and check for the header code 200 OK.

The script will return TRUE or FALSE

I need the source to be compiled in linux.

Thanks.
0
rares_dumitrescu
Asked:
rares_dumitrescu
  • 5
  • 4
  • 2
  • +3
1 Solution
 
Infinity08Commented:
You could easily use libcurl for this :

        http://curl.haxx.se/libcurl/

Example code :

        http://curl.haxx.se/libcurl/c/simple.html
0
 
rares_dumitrescuAuthor Commented:
I need to check 200 000 websites, so i need a faster solution than curl.
0
 
Meir RivkinFull stack Software EngineerCommented:
this is ping implementation for linux taken from http://www.linuxforums.org/forum/linux-networking/60389-implementing-ping-c.html

#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/file.h>
#include <sys/time.h>

#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
//#include <netinet/ip_var.h>
#include <netdb.h>
#include <unistd.h>
#include <stdio.h>
#include <ctype.h>
//#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <iostream>

using namespace std;

uint16_t in_cksum(uint16_t *addr, unsigned len);

#define	DEFDATALEN	(64-ICMP_MINLEN)	/* default data length */
#define	MAXIPLEN	60
#define	MAXICMPLEN	76
#define	MAXPACKET	(65536 - 60 - ICMP_MINLEN)/* max packet size */

int ping(string target)
{

        int s, i, cc, packlen, datalen = DEFDATALEN;
	struct hostent *hp;
	struct sockaddr_in to, from;
	//struct protoent	*proto;
	struct ip *ip;
	u_char *packet, outpack[MAXPACKET];
	char hnamebuf[MAXHOSTNAMELEN];
	string hostname;
	struct icmp *icp;
	int ret, fromlen, hlen;
	fd_set rfds;
	struct timeval tv;
	int retval;
	struct timeval start, end;
	int /*start_t, */end_t;
	bool cont = true;

	to.sin_family = AF_INET;

	// try to convert as dotted decimal address, else if that fails assume it's a hostname
	to.sin_addr.s_addr = inet_addr(target.c_str());
	if (to.sin_addr.s_addr != (u_int)-1)
		hostname = target;
	else 
	{
		hp = gethostbyname(target.c_str());
		if (!hp)
		{
			cerr << "unknown host "<< target << endl;
			return -1;
		}
		to.sin_family = hp->h_addrtype;
		bcopy(hp->h_addr, (caddr_t)&to.sin_addr, hp->h_length);
		strncpy(hnamebuf, hp->h_name, sizeof(hnamebuf) - 1);
		hostname = hnamebuf;
	}
	packlen = datalen + MAXIPLEN + MAXICMPLEN;
	if ( (packet = (u_char *)malloc((u_int)packlen)) == NULL)
	{
		cerr << "malloc error\n";
		return -1;
	}

/*
	if ( (proto = getprotobyname("icmp")) == NULL)
	{
		cerr << "unknown protocol icmp" << endl;
		return -1;
	}
*/
	if ( (s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
	{
		perror("socket");	/* probably not running as superuser */
		return -1;
	}

	icp = (struct icmp *)outpack;
	icp->icmp_type = ICMP_ECHO;
	icp->icmp_code = 0;
	icp->icmp_cksum = 0;
	icp->icmp_seq = 12345;	/* seq and id must be reflected */
	icp->icmp_id = getpid();


	cc = datalen + ICMP_MINLEN;
	icp->icmp_cksum = in_cksum((unsigned short *)icp,cc);

	gettimeofday(&start, NULL);

	i = sendto(s, (char *)outpack, cc, 0, (struct sockaddr*)&to, (socklen_t)sizeof(struct sockaddr_in));
	if (i < 0 || i != cc)
	{
		if (i < 0)
			perror("sendto error");
		cout << "wrote " << hostname << " " <<  cc << " chars, ret= " << i << endl;
	}
	
	// Watch stdin (fd 0) to see when it has input.
	FD_ZERO(&rfds);
	FD_SET(s, &rfds);
	// Wait up to one seconds.
	tv.tv_sec = 1;
	tv.tv_usec = 0;

	while(cont)
	{
		retval = select(s+1, &rfds, NULL, NULL, &tv);
		if (retval == -1)
		{
			perror("select()");
			return -1;
		}
		else if (retval)
		{
			fromlen = sizeof(sockaddr_in);
			if ( (ret = recvfrom(s, (char *)packet, packlen, 0,(struct sockaddr *)&from, (socklen_t*)&fromlen)) < 0)
			{
				perror("recvfrom error");
				return -1;
			}

			// Check the IP header
			ip = (struct ip *)((char*)packet); 
			hlen = sizeof( struct ip ); 
			if (ret < (hlen + ICMP_MINLEN)) 
			{ 
				cerr << "packet too short (" << ret  << " bytes) from " << hostname << endl;;
				return -1; 
			} 

			// Now the ICMP part 
			icp = (struct icmp *)(packet + hlen); 
			if (icp->icmp_type == ICMP_ECHOREPLY)
			{
				cout << "Recv: echo reply"<< endl;
				if (icp->icmp_seq != 12345)
				{
					cout << "received sequence # " << icp->icmp_seq << endl;
					continue;
				}
				if (icp->icmp_id != getpid())
				{
					cout << "received id " << icp->icmp_id << endl;
					continue;
				}
				cont = false;
			}
			else
			{
				cout << "Recv: not an echo reply" << endl;
				continue;
			}
	
			gettimeofday(&end, NULL);
			end_t = 1000000*(end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec);

			if(end_t < 1)
				end_t = 1;

			cout << "Elapsed time = " << end_t << " usec" << endl;
			return end_t;
		}
		else
		{
			cout << "No data within one seconds.\n";
			return 0;
		}
	}
	return 0;
}

uint16_t in_cksum(uint16_t *addr, unsigned len)
{
  uint16_t answer = 0;
  /*
   * Our algorithm is simple, using a 32 bit accumulator (sum), we add
   * sequential 16 bit words to it, and at the end, fold back all the
   * carry bits from the top 16 bits into the lower 16 bits.
   */
  uint32_t sum = 0;
  while (len > 1)  {
    sum += *addr++;
    len -= 2;
  }

  // mop up an odd byte, if necessary
  if (len == 1) {
    *(unsigned char *)&answer = *(unsigned char *)addr ;
    sum += answer;
  }

  // add back carry outs from top 16 bits to low 16 bits
  sum = (sum >> 16) + (sum & 0xffff); // add high 16 to low 16
  sum += (sum >> 16); // add carry
  answer = ~sum; // truncate to 16 bits
  return answer;
}

int main(int argc, char** argv)
{
	if (argc != 2)
	{
		cout << "Usage: ping hostname" << endl;
		exit(1);
	}
	cout << "ping returned " << ping(argv[1]) << endl;
	return 0;
}

Open in new window

0
What does it mean to be "Always On"?

Is your cloud always on? With an Always On cloud you won't have to worry about downtime for maintenance or software application code updates, ensuring that your bottom line isn't affected.

 
Infinity08Commented:
Did you test libcurl ? Did you notice bad performance ? Can you see an improved approach that would speed things up ? If not, then just stick with libcurl - it's quite well maintained, and likely better than anything you can come up with yourself (in a decent time anyway).

If you want a custom solution, you'll have to dive into the HTTP standards, and implement your own. It could be as simple as sending a HTTP GET request, and checking the response for 200 (or any other return codes that are acceptable, like redirects maybe etc.).

Supporting more, like HTTPS, makes things a bit more complicated of course, but then again, libcurl would make your life easier ;)
0
 
Let_Me_BeCommented:
> I need to check 200 000 websites, so i need a faster solution than curl.

Faster solution? What is slow on curl? 99% of the spent time is network communication.
If you want fast solution, you will need parallel communication (no matter what you use).
0
 
evilrixSenior Software Engineer (Avast)Commented:
Slightly off topic but...

If you are doing this on a machine with Perl installed you can just use the HEAD command in a bash script. Since most of the time will be spent performing network connectivity this should be plenty fast enough and doesn't require you to do much more than write a few links of bash script. In fact, a very simple example of checking one site can be done in one line.

XXX=`HEAD www.google.com`; echo $XXX | grep "200 OK" && echo TRUE || echo FALSE

If you have a collection of sites to check just put the list in a text file and use the "read" command to traverse it.
0
 
rares_dumitrescuAuthor Commented:
Hhmmm ... yes ... parallel communication


Any help with that ? :)
0
 
Infinity08Commented:
>> Any help with that ? :)

What do you mean ?

First get it running in a simple straightforward loop. Use libcurl (following the simple sample code I referred to earlier) to check a series of websites by looping as needed.

Once that works as you want, you can think about optimizing things. But I recommend to first get it working.
0
 
rares_dumitrescuAuthor Commented:
I managed to do something with libcurl, working like i wanted.

Now about parallel execution.
I use multicurl, but now working so great, it gets about 20 status and then all status are failed.
0
 
Infinity08Commented:
>> I managed to do something with libcurl, working like i wanted.

Great. How many requests does it process per second (approximately) ?


>> I use multicurl, but now working so great, it gets about 20 status and then all status are failed.

I've never used multicurl (it seems to be for PHP rather than for C++, so it's not really in my comfort zone). If you intend to do this in PHP, you might get better help by adding the PHP zone to this question (you can click the "Request Attention" button to ask that).

If this is still for C++, then what you are looking up against, is using multiple threads that each process a portion of the 200000 requests (eg. 10 threads each processing 20000 requests). There's no special consideration to be taken into account since each request is independent from the others, so they can be easily processed in parallel. Only the reporting of the results might require some synchronization, but nothing too complicated.
0
 
dinesh4uCommented:
You can directly use the command in your command propmt.

e.g- ping www.msn.com

if the same thing you want to be done through a C++ program. then you can always use the system() function call to invoke the command line execution.
Hope it helps.
0
 
Infinity08Commented:
pinging a server is not the same as checking whether a certain web page is available. The first checks if a machine is running and if it's responding to pings. The second checks if a certain web page is available/reachable on the internet.
0
 
evilrixSenior Software Engineer (Avast)Commented:
>> pinging a server is not the same as checking whether a certain web page is available.
Also, a server may very well be configured not to respond to ICMP packets (firewall policy) but that doesn't mean it won't server HTTP. The only way to be sure is to connect to the HTTP(S) port and download the HTTP header (you need to do this just in case a different service just happens to be running on that port).
0
 
rares_dumitrescuAuthor Commented:
working on it
0

Featured Post

VIDEO: THE CONCERTO CLOUD FOR HEALTHCARE

Modern healthcare requires a modern cloud. View this brief video to understand how the Concerto Cloud for Healthcare can help your organization.

  • 5
  • 4
  • 2
  • +3
Tackle projects and never again get stuck behind a technical roadblock.
Join Now