Hard question about string search

I was just wondering if anyone knows how the browsers strips out the values of tags and such...
I want to know how to take a tag like this.
<img src="hello.jpg" border=0 width="2" height=34 align="texttop" alt=hey>

a tag like this and get its values, this must be very difficult cause the tags can be like border=0 and border="0"
how the heck does this work?
Is it possible to make a function that works like this, call StripTags(richtextbox.text,"<img src,border,width,align,height,alt,output) and output contains their values separated by "," . This is to hard to do in vb right?
Who is Participating?
hey here is cool code which can parse your string ... not only when You use double quotes, also when there is single quote ... but what will You to do if your ALT parameter like:
ALT=mama miya bambarabiya kerkudu
This code also can doing that, create new form add Text1, Command1, List1, List2 and copy paste code ...

Dim TagParamIndex(2, 9) As String

Private Sub Command1_Click()
Dim Tagz() As String
Dim TagCount As Integer
Dim StartPos As Long
Dim StopPos As Long
Dim DefText As String
Dim TagArrayPos As Long
Dim ParamArrayPos As Long
Dim defTag As String
Dim defParam As String
Dim defValue As String
Dim bStillSeekForChar As Boolean


DefText = ""

StartPos = InStr(1, Text1.Text, "<")
If StartPos > 0 Then
   StopPos = InStr(StartPos, Text1.Text, ">")
   If StopPos > StartPos Then
      DefText = Trim(Mid(Text1.Text, StartPos + 1, StopPos - StartPos - 1))
   End If
End If

If DefText = "" Then
   MsgBox "Can't find <>"
   Exit Sub
End If

TagArrayPos = 0
ParamArrayPos = 0

Text1.Text = DefText

'find end of tag name
StopPos = InStr(1, DefText, Chr(32))
If StopPos > 1 Then
defTag = Mid(DefText, 1, StopPos - 1)
MsgBox defTag
Do While TagParamIndex(TagArrayPos, 0) <> Chr(0)
     If UCase(defTag) = TagParamIndex(TagArrayPos, 0) Then
       'here tag found
        If StopPos < Len(DefText) - 1 Then
             DefText = Right(DefText, Len(DefText) - StopPos)
            Do While DefText <> ""
             Text1.Text = DefText
             StartPos = InStr(1, DefText, "=")
             'ok param name found
             If StartPos > 1 Then
                defParam = Trim(Mid(DefText, 1, StartPos - 1))
                ParamArrayPos = 1
                'determine parameter type
                Do While TagParamIndex(TagArrayPos, ParamArrayPos) <> Chr(0)
                     If UCase(defParam) = TagParamIndex(TagArrayPos, ParamArrayPos) Then
                        'ok its found, add to list
                          List1.AddItem defParam
                          DefText = Right(DefText, Len(DefText) - StartPos)
                          Text1.Text = DefText
                          'get param value
                          bStillSeekForChar = True
                          'skip one word back
                          StartPos = InStr(StartPos, DefText, "=")
                          If StartPos > 0 Then
                             StopPos = StartPos - 1
                             Do While StopPos > 0
                                 If Mid(DefText, StopPos, 1) <> Chr(32) Then
                                    If bStillSeekForChar Then
                                       bStillSeekForChar = False
                                    End If
                                    If Not (bStillSeekForChar) Then Exit Do
                                 End If
                                 StopPos = StopPos - 1
                             If StopPos > 0 Then
                                defValue = Trim(Replace(Left(DefText, StopPos), Chr(34), Chr(32)))
                                defValue = Trim(Replace(defValue, Chr(39), Chr(32)))
                                List2.AddItem defValue
                                DefText = Right(DefText, Len(DefText) - StopPos)
                             End If
                            defValue = Trim(Replace(DefText, Chr(34), Chr(32)))
                            defValue = Trim(Replace(defValue, Chr(39), Chr(32)))
                            List2.AddItem defValue
                            DefText = ""
                          End If
                          Exit Do
                     End If
                     ParamArrayPos = ParamArrayPos + 1
                MsgBox defParam
             End If
        End If
       Exit Do
     End If
     TagArrayPos = TagArrayPos + 1
End If

End Sub

Private Sub Form_Load()

Text1.Text = "<img src=" & Chr(34) & "hello.jpg" & Chr(34) & " border=0 width=" & Chr(34) & "2" & Chr(34) & " height=34 align=" & Chr(34) & "texttop" & Chr(34) & " alt=hey>"

TagParamIndex(0, 0) = "IMG"
TagParamIndex(0, 1) = "SRC"
TagParamIndex(0, 2) = "BORDER"
TagParamIndex(0, 3) = "WIDTH"
TagParamIndex(0, 4) = "HEIGHT"
TagParamIndex(0, 5) = "ALIGN"
TagParamIndex(0, 6) = "ALT"
TagParamIndex(0, 7) = "NAME"
TagParamIndex(0, 8) = "ID"
TagParamIndex(0, 9) = Chr(0)

TagParamIndex(1, 0) = "A"
TagParamIndex(1, 1) = "HREF"
TagParamIndex(1, 2) = "TARGET"
TagParamIndex(1, 3) = "NAME"
TagParamIndex(1, 4) = "ID"
TagParamIndex(1, 5) = Chr(0)

TagParamIndex(2, 0) = Chr(0)

End Sub
you may get ideas from these two samples.  One strips all the hyperlinks out of the document and the other strips all the tags out of the document.



you will need to think methodically. Think to yourself how you would go about doing it manually, then automate that. Use commands such as Right, Left, Mid and InStr. You might want to keep a progress bar in mind, because VB's string manipulation is sssslllloooowww!
If you could make a function to return the specified Tag or word in the string, then that would be a great start.

I'll have a look for you, see if i can work out some functions.
Ultimate Tool Kit for Technology Solution Provider

Broken down into practical pointers and step-by-step instructions, the IT Service Excellence Tool Kit delivers expert advice for technology solution providers. Get your free copy now.

the way i would approach it is create an array of strings that holds all the possible tags.  then i would load the html source into a richtextbox and use its find method and perform a loop that finds the tags and go from there using string manipulation and such
It's very simpe, if you are using HTML object model, not string parsing. Here is a small example :

Dim ob As Variant
For Each ob In WebBrowser1.document.All
    If ob.tagName = "IMG" Then
        Debug.Print "Src=" & ob.src & " Border=" & ob.border & " Width=" & ob.Width
    End If

WebBrowser1 is a WebBrowser :-)) or may be created

set WebBrowser1 = CreateObject("internetexplorer.application")
WebBrowser1.Navigate "www....."
WebBrowser1.visible = True

try something like this

sStr = "<img src=""hello.jpg"" border=0 width=""2"" height=34 align=""texttop"" alt=hey>"

vparts = Split(sStr, " ")

for p = LBound(vparts) to UBound(vparts)
   if Instr(vparts(p), "=") then
      if Right(vparts(p), 1) = """" then
         sTemp = Right(vparts(p), len(vparts(p)) - Instr(vparts(p), "=")) & ","
         sResults = sResults & Mid(sTemp, 2, len(sTemp) - 2)
         sResults = sResults & Right(vparts(p), len(vparts(p)) - Instr(vparts(p), "=")) & ","
      end if
   end if
next p
Debug.Print sResults

That should give you what you want without having to create a webbrowser object.
Geo24Author Commented:
This code is more than EXCELLENT!!

Thank u man!
Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.

All Courses

From novice to tech pro — start learning today.