• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 194
  • Last Modified:

parsing from string

i currently have a bit of code that log's into a website, directs to another page, then reads the source of that HTML page into a string called HTML, it use's IO.StreamReader to read.

I'm trying to parse the page to get a few bits of info, and put those into variables. Here is how the HTML source looks

<CODE>
      <link rel="stylesheet" href="/login/css/idnet.css" type="text/css" media="all" title="default" />
      <link rel="stylesheet" href="/login/css/idnetcp.css" type="text/css" media="all" title="default" />
      
      <script type="text/javascript" src="/js/idnet.js"></script>
      
      <script type="text/javascript" language="JavaScript">
      
      function clearDefault(el) {
            if (el.defaultValue == el.value) el.value = "";
      }
      
      function setFocus(f, message)
      {
            if( message != "")
            {
                  alert(message);
            }
            f.focus();
            return false;
      }
      
      </script>
      
</head>

<body>

<!-- // STANDARDS COMPLIANT TOOLBAR VERSION 1.1 -->
<!-- // CREATED: 16.11.04 -->
<!-- // LAST MODIFIED: 25.01.05 -->



<div id="container">

<!-- // HEADER START -->

<div id="headercontainer">

      <div id="logo">
      <a href="/default.jsp"><img src="/images/idnetlogo.gif" alt="IDNet - Web Solutions" width="231" height="136" /></a>
      </div>
      
      <div id="navcontainer">
      <div id="navone">
      <ul>
            <li><a href="/login/default.jsp">Home</a></li>
            <li><a href="/login/broadband/">Broadband</a></li>
            <li><a href="/login/email/">Email</a></li>
            <li><a href="/login/payment/">Payment</a></li>
            <li><a href="/login/logout.jsp">Log Out</a></li>
      </ul>
      </div>
      </div>

</div>

<!-- // HEADER END -->





<!-- // CONTENT START -->

<div id="contentcontainer">

      <div id="rhscontent">
      
      <h3>Related information</h3>

<ul>
      <li><a href="/login/broadband/">Broadband Details</a></li>
      <li><a href="/login/broadband/settings.jsp">Broadband Settings</a></li>
</ul>
      
      </div>
      
      <div id="lhscontent">
      
      <h1>Broadband Details</h1>
      
      
            
            
                                    <h2>Your IDNet Broadband was due to be activated on 04 April 2006</h2>
                                    
                  
            
            
            
            
            
            <h3>Your connection: Up to 8 Mbps  - BT IPStream Max</h3>
            
            
                  <h3>During the time period 01 November 2006 to 20 November 2006 your bandwidth use was:</h3>
                  <h3>1.21 GB Download</h3>
                  <h3>0.88 GB Upload</h3>
                  
                  
                        <br />
                        
                        <h4>These figures cover 19 days. If the same rate of usage continues for 30 days then the total for the month will be:</h4>
                        <h4>1.91 GB Download</h4>
                        <h4>1.39 GB Upload</h4>
                        
                  
                  <table border="0" cellpadding="0" cellspacing="0" summary="Broadband Daily Usage Details">
                  <tr class="nohover">
                        <th class="start">Day</th>
                        <th class="start">Download (GB)</th>
                        <th class="start">Upload (GB)</th>
                  </tr>
                  
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>1</th>
                                    <td>0.04</td>
                                    <td>0.0</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>2</th>
                                    <td>0.44</td>
                                    <td>0.01</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>3</th>
                                    <td>0.02</td>
                                    <td>0.0</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>4</th>
                                    <td>0.03</td>
                                    <td>0.01</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>5</th>
                                    <td>0.04</td>
                                    <td>0.01</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>6</th>
                                    <td>0.01</td>
                                    <td>0.0</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>7</th>
                                    <td>0.01</td>
                                    <td>0.0</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>8</th>
                                    <td>0.01</td>
                                    <td>0.0</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>9</th>
                                    <td>0.02</td>
                                    <td>0.01</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>10</th>
                                    <td>0.01</td>
                                    <td>0.0</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>11</th>
                                    <td>0.02</td>
                                    <td>0.0</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>12</th>
                                    <td>0.05</td>
                                    <td>0.01</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>13</th>
                                    <td>0.02</td>
                                    <td>0.0</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>14</th>
                                    <td>0.01</td>
                                    <td>0.0</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>15</th>
                                    <td>0.02</td>
                                    <td>0.01</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>16</th>
                                    <td>0.01</td>
                                    <td>0.0</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>17</th>
                                    <td>0.04</td>
                                    <td>0.01</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>18</th>
                                    <td>0.04</td>
                                    <td>0.0</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>19</th>
                                    <td>0.18</td>
                                    <td>0.77</td>
                              </tr>
                              
                              <tr onmouseover="this.bgColor='002345';" onmouseout="this.bgColor='003366';">
                                    <th>20</th>
                                    <td>0.19</td>
                                    <td>0.01</td>
                              </tr>
                              
                  </table>
                  
            
                  
            <br />
            
            <form id="period" name="period" method="post" action="default.jsp">
            
            <p>
                  <select name="period" tabindex="1">
                        
                                    <option value="2006-11" selected="true"
>November 2006</option>
                                    
                                    <option value="2006-10" >October 2006</option>
                                    
                  </select>
                  <input type="submit" id="submit" name="submit" value="submit" class="std" tabindex="2" />
            </p>
            
            </form>
            
            <br />
            
            
            
            <h3>Your broadband address:</h3>
            
            <p>
            
            4
Changed this<br />
            meh<br />
            meh<br />

            LE9 ABC
            </p>
            
            <p>
            Telephone: 077900000000<br />
            
            
            
            Email: abc@abc.com<br />

            </p>
            
             
      
      
      
      <!--<ul>
            <li><a href="/default.jsp">Home</a></li>
            <li><a href="/broadband/">Broadband</a></li>
            <li><a href="/email/">Email</a></li>
            <li><a href="/hosting/">Hosting</a></li>
            <li><a href="/logout.jsp">Log Out</a></li>
      </ul>-->
      
      </div>
      
</div>

<!-- // CONTENT END -->

<!-- // FOOTER START -->

<div id="footercontainer">
      
      <p>&copy; abc 1996-2006. All rights reserved.<br />
      ac, The Spirella Building, abc, Herts SG6 ABC. UK<br />
      T: 08000 267 237 | E: <a href="mailto:&#099;&#111;&#110;&#116;&#097;&#099;&#116;&#117;&#115;&#064;&#105;&#100;&#110;&#101;&#116;&#046;&#110;&#101;&#116;">&#099;&#111;&#110;&#116;&#097;&#099;&#116;&#117;&#115;&#064;&#105;&#100;&#110;&#101;&#116;&#046;&#110;&#101;&#116;</a></p>

      <p>Site managed by <a href="http://www.idnet.net/mobisxtnd/default.jsp">MOBIS XTnd CMS</a>. Page last modified: 13 February 2006</p>
      
</div>

<!-- // FOOTER END -->

</div>

</body>
</html>




</CODE>

The bits i'm interested in are the following 3;


                                    <h2>Your IDNet Broadband was due to be activated on 04 April 2006</h2>

(would like 04 April 2006 to be in a variable)
and

<h3>During the time period 01 November 2006 to 20 November 2006 your bandwidth use was:</h3>
                  <h3>1.21 GB Download</h3>
                  <h3>0.88 GB Upload</h3>

(would like 2 dates and the both up and down GB)
And

<h4>These figures cover 19 days. If the same rate of usage continues for 30 days then the total for the month will be:</h4>
                        <h4>1.91 GB Download</h4>
                        <h4>1.39 GB Upload</h4>

(19 days bit and the bandwidth up and down)


I know I need to use regular expressions, but this is proving very difficult as it’s the first time I have used regexp

Many thanks for your help

0
nabberuk
Asked:
nabberuk
  • 2
1 Solution
 
Fernando SotoRetiredCommented:
Hi nabberuk;

This should do what you want.

Imports System.Text.RegularExpressions


        Dim HTML As String = "The data to search"
        Dim pattern As String = "<h2>\s*Your\s+IDNet\s+Broadband\s+was\s+due\s+to\s+" _
            & "be\s+activated\s+on\s*(?<Date>\d{1,2}\s+[a-zA-Z]+\s+\d{4})\s*</h2>|" _
            & "<h3>During\s+the\s+time\speriod\s+(?<From>\d{1,2}\s+[a-zA-Z]+\s+" _
            & "\d{4})\s+to\s+(?<To>\d{1,2}\s+[a-zA-Z]+\s+\d{4})\s+your\s+" _
            & "bandwidth\s+use\s+was:\s*</h3>.*?<h3>\s*(?<Down>\d+\.\d+)\s+GB\s+" _
            & "Download\s*</h3>.*?<h3>(?<Up>\d+\.\d+)\s+GB\s+Upload</h3>|<h4>\s*" _
            & "These\s+figures\s+cover\s+(?<Days>\d+)\s+days\.\s+If\s+the\s+same\s+" _
            & "rate\s+of\s+usage\s+continues\s+for\s+\d+\s+days\s+then\s+the\s+" _
            & "total\s+for\s+the\s+month\s+will\s+be:\s*</h4>.*?<h4>\s*" _
            & "(?<TotalDown>\d+\.\d+)\s+GB\s+Download\s*</h4>.*?<h4>\s*" _
            & "(?<TotalUp>\d+\.\d+)\s+GB\s+Upload\s*</h4>"
        Dim RepDate As String
        Dim FromDate As String
        Dim ToDate As String
        Dim DownGB As String
        Dim UpGB As String
        Dim Days As String
        Dim TotalDown As String
        Dim TotalUp As String
        Dim mc As MatchCollection = Regex.Matches(HTML, pattern, RegexOptions.Singleline)

        For Each m As Match In mc
            If Not m.Groups("Date").Value = "" Then
                RepDate = m.Groups("Date").Value
            End If
            If Not m.Groups("From").Value = "" Then
                FromDate = m.Groups("From").Value
                ToDate = m.Groups("To").Value
                DownGB = m.Groups("Down").Value
                UpGB = m.Groups("Up").Value
            End If
            If Not m.Groups("Days").Value = "" Then
                Days = m.Groups("Days").Value
                TotalDown = m.Groups("TotalDown").Value
                TotalUp = m.Groups("TotalUp").Value
            End If
        Next


Fernando
0
 
nabberukAuthor Commented:
many thanks, works well!
0
 
Fernando SotoRetiredCommented:
Not a problem, glad I was able to help. ;=)
0
Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.

Join & Write a Comment

Featured Post

Cloud Class® Course: Python 3 Fundamentals

This course will teach participants about installing and configuring Python, syntax, importing, statements, types, strings, booleans, files, lists, tuples, comprehensions, functions, and classes.

  • 2
Tackle projects and never again get stuck behind a technical roadblock.
Join Now