troubleshooting Question

Can any .net guru help in writing post parameters for screen scrapping a website

Avatar of mmalik15
mmalik15 asked on
JavaScriptC#ASP.NET
4 Comments1 Solution265 ViewsLast Modified:
I have been trying the following code but struggling to scrape the second or following pages  from the website

http://www.ihi.org/knowledge/pages/ViewAll.aspx?FilterField1=IHI_x0020_Content_x0020_Type&FilterValue1=Measures&Filter1ChainingOperator=And&Targ

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
using System.Text;

public partial class MHF2 : System.Web.UI.Page
{
    protected void Page_Load(object sender, EventArgs e)
    {

    }

    protected void btnSubmit_Click1(object sender, EventArgs e)
    {
        txtEventTarget.Text = "";
        txtEventArgument.Text = "";
        txtViewState.Text = "";
        txtEventValidation.Text = "";

        string url = "http://www.ihi.org/knowledge/pages/ViewAll.aspx?FilterField1=IHI_x0020_Content_x0020_Type&FilterValue1=Measures&Filter1ChainingOperator=And&Targ";
        string html = "";

        HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
        request.ContentType="application/x-www-form-urlencoded";
        request.Referer = "http://www.ihi.org/knowledge/pages/ViewAll.aspx?FilterField1=IHI_x0020_Content_x0020_Type&FilterValue1=Measures&Filter1ChainingOperator=And&Targ";
        
        


        html = GetResponse(url, ref request);

        // txtResponse1.Text = html;

        string[] data = ParseHTML(html);

        txtEventTarget.Text = data[0];
        txtEventArgument.Text = data[1];

        txtResponse2.Text = PostRequest(url, data, ref request);
    }

    private string PostRequest(string url, string[] args, ref HttpWebRequest request)
    {
        ASCIIEncoding encoding = new ASCIIEncoding();

        string postData = "__EVENTTARGET=" + args[0] + "&__EVENTARGUMENT=" + args[1];
        postData += "&__VIEWSTATE=" + args[2] + "&__EVENTVALIDATION=" + args[3];

        txtPostBack.Text = postData;

        byte[] data = encoding.GetBytes(postData);

        request = (HttpWebRequest)WebRequest.Create(url);
        request.Method = "POST";
        request.ContentType = "application/x-www-form-urlencoded";
        request.ContentLength = data.Length;
        request.Referer = url;

        Stream newStream = request.GetRequestStream();
        // Send the data.
        try
        {
            newStream.Write(data, 0, data.Length);
            newStream.Close();
        }
        catch (Exception ex)
        {
            Response.Write(ex.StackTrace);
        }
        finally
        {
            newStream.Close();
        }

        return GetResponse(url, ref request);
    }

    private string GetResponse(string url, ref HttpWebRequest request)
    {
        StringBuilder sb = new StringBuilder();
        Stream resStream = null;
        HttpWebResponse response = null;
        byte[] buf = new byte[8192];

        try
        {
            // execute the request
            response = (HttpWebResponse)request.GetResponse();

            // we will read data via the response stream
            resStream = response.GetResponseStream();
            string tempString = null;
            int count = 0;
            do
            {
                // fill the buffer with data
                count = resStream.Read(buf, 0, buf.Length);
                // make sure we read some data
                if (count != 0)
                {
                    // translate from bytes to ASCII text
                    tempString = Encoding.ASCII.GetString(buf, 0, count);
                    // continue building the string
                    sb.Append(tempString);
                }
            }
            while (count > 0); // any more data to read?
        }
        catch (Exception err)
        {
            String exc = err.Message;
        }
        finally
        {
            response.Close();
            resStream.Close();
        }

        return sb.ToString();
    }

    private string[] ParseHTML(string html)
    {
        string[] data = new string[4];
        string value = "";
        string temp = "";
        Match match;

        //Set the EVENTTARGET control
        data[0] = "ctl00%24m%24g_06bd18ef_887b_4e8f_a9ba_e08b261f48b2%24ctl00%24a_oGrid";

        //Set the EVENTARGUMENT, should be an empty string
        data[1] = "";

        //get the ViewState data
        Regex regex = new Regex("id=\"__VIEWSTATE\" value=\"/[a-zA-Z0-9\\W]+\"\\s/>");
        match = regex.Match(html);
        value = match.Value;
        temp = value.Remove(value.IndexOf("id"), 24);
        temp = temp.Remove(temp.LastIndexOf("\""), 4);
       txtViewState.Text = temp;

        temp = temp.Replace("/", "%2F");
        temp = temp.Replace("+", "%2B");
        temp = temp.Replace("=", "%3D");
        data[2] = temp;

       


        //get the EVENTVALIDATION data
        regex = new Regex("id=\"__EVENTVALIDATION\" value=\"/[a-zA-Z0-9\\W]+\"\\s/>");
        match = regex.Match(html);
        value = match.Value;
        temp = value.Remove(value.IndexOf("id"), 30);
        temp = temp.Remove(temp.LastIndexOf("\""), 4);
        txtEventValidation.Text = temp;

        temp = temp.Replace("/", "%2F");
        temp = temp.Replace("+", "%2B");
        temp = temp.Replace("=", "%3D");
        data[3] = temp;

       
        return data;

      
    }




}

And the front end is


<%@ Page Language="C#" AutoEventWireup="true" CodeFile="MHF2.aspx.cs" Inherits="MHF2" %>

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
    <title></title>
</head>
<body>
    <form id="form1" runat="server">
    <div>
        <asp:TextBox ID="txtEventTarget" runat="server"></asp:TextBox><br />
        <asp:TextBox ID="txtEventArgument" runat="server"></asp:TextBox><br />
        <asp:TextBox ID="txtViewState" runat="server"></asp:TextBox><br />
        <asp:TextBox ID="txtResponse2" runat="server"></asp:TextBox><br />
        <asp:TextBox ID="txtEventValidation" runat="server"></asp:TextBox><br />
          <asp:TextBox ID="txtPostBack" runat="server"></asp:TextBox><br />
        <asp:Button ID="btnSubmit" runat="server" Text="Submit"
            onclick="btnSubmit_Click1" />

    </div>
    </form>
</body>
</html>
ASKER CERTIFIED SOLUTION
selvol

Our community of experts have been thoroughly vetted for their expertise and industry experience.

Join our community to see this answer!
Unlock 1 Answer and 4 Comments.
Start Free Trial
Learn from the best

Network and collaborate with thousands of CTOs, CISOs, and IT Pros rooting for you and your success.

Andrew Hancock - VMware vExpert
See if this solution works for you by signing up for a 7 day free trial.
Unlock 1 Answer and 4 Comments.
Try for 7 days

”The time we save is the biggest benefit of E-E to our team. What could take multiple guys 2 hours or more each to find is accessed in around 15 minutes on Experts Exchange.

-Mike Kapnisakis, Warner Bros