Murray Brown
asked on
Word VBA Convert formatted text to markup
Hi
In Word VBA is it possible to convert formatted text into markup language like html?
Thanks
In Word VBA is it possible to convert formatted text into markup language like html?
Thanks
I'm going to say "yes", but it is contingent on what your markup looks like. Please post a representative example.
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
Thanks. That makes sense
If it's helpful, I use the VBA code below to convert Word to simple HTML. The code handles hyperlinks, bold, italic and underline formatting, standard Word heading styles (Heading1, Heading2 etc) plus bulleted and simple numbered lists.
It does not handle tables or images, though it could be extended to do so fairly easily. There are four routines and a function (which creates a closing HTML tag).
The main routine calls the others.
In essence it does the following:
Hope it is useful
It does not handle tables or images, though it could be extended to do so fairly easily. There are four routines and a function (which creates a closing HTML tag).
The main routine calls the others.
In essence it does the following:
- Works with your current word selection
- If nothing is selected it converts the whole document
- Converts Word hyperlinks to HTML setting the html target to "_blank"
- Runs through all paragraphs in the selection and
- - Converts lists to their HTML equivalent
- - converts ordinary paragraphs to <p> tagged HTML
- - converts Heading1 to <H1> etc
- Calls another routine to search for and convert bold, italic and underline Word formatting to HTML tags
Hope it is useful
Sub Word2HTML()
'''''''''''''''''''''''''''''''''''''''''''
' Converts Word selection to HTML '
' Converts whole document if no selection '
' Neil Fleming, 2018 '
'''''''''''''''''''''''''''''''''''''''''''
Dim ss As Selection, rr As Range, wPara As Paragraph
Dim sTag As String, sList As String, sHead As String
Dim inList As Boolean, iPos As Long
Set ss = Selection
'if nothing selected, select whole document
If ss.End = ss.start Then ActiveDocument.Range.Select
'work with range rather than selection
Set rr = ss.Range
'call ConvertHyperlinks to convert hyperlinks as target=_blank
ConvertHyperlinks2HTML rr
'iterate paragraphs
For Each wPara In rr.Paragraphs
'check for bulleted or simple numbered lists and set HTML tags
Select Case wPara.Range.ListFormat.ListType
Case WdListType.wdListBullet: sList = "<ul>": sTag = "<li>"
Case WdListType.wdListSimpleNumbering: sList = "<ol>": sTag = "<li>"
Case WdListType.wdListNoNumbering: sTag = "<p>"
End Select
'check for standard Word heading styles
sHead = wPara.Style
If Left(sHead, 7) = "Heading" Then sTag = "<h" & Right(sHead, 1) & ">"
Select Case sTag
Case "<li>"
'remove Word list format
wPara.Range.ListFormat.RemoveNumbers
'on first occurrence of list tag add the <ul> or <ol> opener
If Not inList Then
inList = True
wPara.Range.InsertBefore sList & Chr(13) & sTag
Else
wPara.Range.InsertBefore sTag
End If
'call AddEndTag routine, ensures tag is not accidentally formatted bold,italic, underline
AddEndTag wPara.Range, sTag
Case "<p>", "<h1>", "<h2>", "<h3>", "<h4>"
'at end of previous list, if one exists, insert closing tag for HTML list
If inList Then
inList = False
wPara.Range.InsertBefore eTag(sList) & Chr(13) & sTag
Else
wPara.Range.InsertBefore sTag
End If
AddEndTag wPara.Range, sTag
End Select
'check for blank paragraphs - adjacent html open and close tags - and add non-breaking space
iPos = InStr(wPara.Range, sTag & eTag(sTag))
If iPos > 0 Then
wPara.Range.Characters(iPos + (Len(sTag) - 1)).InsertAfter " "
End If
Next
'convert Word bold, italic, underline to html
FormatIBU2HTML rr
End Sub
Sub AddEndTag(rText As Range, sTag As String)
Dim iLen As Long, rFormat As Range
iLen = Len(rText.Text)
rText.Characters(Len(rText.Text) - 1).InsertAfter eTag(sTag)
'ensure html tag is not bold underline or italic
Set rFormat = rText
rFormat.start = rText.start + (iLen - 1)
rFormat.Font.Bold = False: rFormat.Font.Italic = False: rFormat.Font.Underline = False
End Sub
Function eTag(s As String) As String
'returns closing HTML tag for any given opening tag
Dim iPos As Long
iPos = InStr(s, " ")
If iPos = 0 Then
eTag = "</" & Right(s, Len(s) - 1)
Else
eTag = "</" & Mid(s, 2, iPos - 2) & ">"
End If
End Function
Sub FormatIBU2HTML(rr As Range)
'formats italic, bold, underline as HTML
Dim rFind As Range
Dim iStart As Long, iStop As Long, iFound As Long
Dim notFound As Boolean
Const cTags = "<i>,<b>,<u>"
Dim aTag() As String
Dim iTag As Long
'create array of tags
aTag = Split(cTags, ",")
iStart = rr.start
iStop = rr.End
Set rFind = rr
'set up rFind find parameters
With rFind.Find
.ClearFormatting
.Forward = True
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchSoundsLike = False
.MatchAllWordForms = False
.MatchWildcards = True
.Text = "*"
.Wrap = wdFindStop
'find formatting for each tag
For iTag = 0 To 2
notFound = False
Do
rFind.start = iStart
rFind.End = iStop
'Set format for search
.ClearFormatting
Select Case iTag
Case 0: .Font.Italic = True
Case 1: .Font.Bold = True
Case 2: .Font.Underline = True
End Select
'search
If .Execute Then
iFound = rFind.start
'Set format to find end of formatting
Select Case iTag
Case 0: .Font.Italic = False
Case 1: .Font.Bold = False
Case 2: .Font.Underline = False
End Select
'find end of formatting
rFind.End = rr.End
If .Execute Then
rFind.End = rFind.start
rFind.start = iFound
Else
notFound = True
End If
'remove Word formatting
Select Case iTag
Case 0: rFind.Font.Italic = False
Case 1: rFind.Font.Bold = False
Case 2: rFind.Font.Underline = False
End Select
'add HTML formatting tags
rFind.InsertBefore aTag(iTag)
rFind.InsertAfter eTag(aTag(iTag))
Else
notFound = True
End If
Loop Until notFound
Next
End With
rFind.start = iStart
rFind.End = iStop
End Sub
Sub ConvertHyperlinks2HTML(rr As Range)
Dim hh As Hyperlink
For Each hh In rr.Hyperlinks
'only convert text hyperlinks
If hh.Type = msoHyperlinkRange Then
hh.Range.Font.Underline = False
hh.Range.InsertAfter "</a>"
hh.Range.InsertBefore "<a target='_blank' href='" & hh.Address & "'>"
End If
Next
End Sub