Sub Word2HTML()
'''''''''''''''''''''''''''''''''''''''''''
' Converts Word selection to HTML '
' Converts whole document if no selection '
' Neil Fleming, 2018 '
'''''''''''''''''''''''''''''''''''''''''''
Dim ss As Selection, rr As Range, wPara As Paragraph
Dim sTag As String, sList As String, sHead As String
Dim inList As Boolean, iPos As Long
Set ss = Selection
'if nothing selected, select whole document
If ss.End = ss.start Then ActiveDocument.Range.Select
'work with range rather than selection
Set rr = ss.Range
'call ConvertHyperlinks to convert hyperlinks as target=_blank
ConvertHyperlinks2HTML rr
'iterate paragraphs
For Each wPara In rr.Paragraphs
'check for bulleted or simple numbered lists and set HTML tags
Select Case wPara.Range.ListFormat.ListType
Case WdListType.wdListBullet: sList = "<ul>": sTag = "<li>"
Case WdListType.wdListSimpleNumbering: sList = "<ol>": sTag = "<li>"
Case WdListType.wdListNoNumbering: sTag = "<p>"
End Select
'check for standard Word heading styles
sHead = wPara.Style
If Left(sHead, 7) = "Heading" Then sTag = "<h" & Right(sHead, 1) & ">"
Select Case sTag
Case "<li>"
'remove Word list format
wPara.Range.ListFormat.RemoveNumbers
'on first occurrence of list tag add the <ul> or <ol> opener
If Not inList Then
inList = True
wPara.Range.InsertBefore sList & Chr(13) & sTag
Else
wPara.Range.InsertBefore sTag
End If
'call AddEndTag routine, ensures tag is not accidentally formatted bold,italic, underline
AddEndTag wPara.Range, sTag
Case "<p>", "<h1>", "<h2>", "<h3>", "<h4>"
'at end of previous list, if one exists, insert closing tag for HTML list
If inList Then
inList = False
wPara.Range.InsertBefore eTag(sList) & Chr(13) & sTag
Else
wPara.Range.InsertBefore sTag
End If
AddEndTag wPara.Range, sTag
End Select
'check for blank paragraphs - adjacent html open and close tags - and add non-breaking space
iPos = InStr(wPara.Range, sTag & eTag(sTag))
If iPos > 0 Then
wPara.Range.Characters(iPos + (Len(sTag) - 1)).InsertAfter " "
End If
Next
'convert Word bold, italic, underline to html
FormatIBU2HTML rr
End Sub
Sub AddEndTag(rText As Range, sTag As String)
Dim iLen As Long, rFormat As Range
iLen = Len(rText.Text)
rText.Characters(Len(rText.Text) - 1).InsertAfter eTag(sTag)
'ensure html tag is not bold underline or italic
Set rFormat = rText
rFormat.start = rText.start + (iLen - 1)
rFormat.Font.Bold = False: rFormat.Font.Italic = False: rFormat.Font.Underline = False
End Sub
Function eTag(s As String) As String
'returns closing HTML tag for any given opening tag
Dim iPos As Long
iPos = InStr(s, " ")
If iPos = 0 Then
eTag = "</" & Right(s, Len(s) - 1)
Else
eTag = "</" & Mid(s, 2, iPos - 2) & ">"
End If
End Function
Sub FormatIBU2HTML(rr As Range)
'formats italic, bold, underline as HTML
Dim rFind As Range
Dim iStart As Long, iStop As Long, iFound As Long
Dim notFound As Boolean
Const cTags = "<i>,<b>,<u>"
Dim aTag() As String
Dim iTag As Long
'create array of tags
aTag = Split(cTags, ",")
iStart = rr.start
iStop = rr.End
Set rFind = rr
'set up rFind find parameters
With rFind.Find
.ClearFormatting
.Forward = True
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchSoundsLike = False
.MatchAllWordForms = False
.MatchWildcards = True
.Text = "*"
.Wrap = wdFindStop
'find formatting for each tag
For iTag = 0 To 2
notFound = False
Do
rFind.start = iStart
rFind.End = iStop
'Set format for search
.ClearFormatting
Select Case iTag
Case 0: .Font.Italic = True
Case 1: .Font.Bold = True
Case 2: .Font.Underline = True
End Select
'search
If .Execute Then
iFound = rFind.start
'Set format to find end of formatting
Select Case iTag
Case 0: .Font.Italic = False
Case 1: .Font.Bold = False
Case 2: .Font.Underline = False
End Select
'find end of formatting
rFind.End = rr.End
If .Execute Then
rFind.End = rFind.start
rFind.start = iFound
Else
notFound = True
End If
'remove Word formatting
Select Case iTag
Case 0: rFind.Font.Italic = False
Case 1: rFind.Font.Bold = False
Case 2: rFind.Font.Underline = False
End Select
'add HTML formatting tags
rFind.InsertBefore aTag(iTag)
rFind.InsertAfter eTag(aTag(iTag))
Else
notFound = True
End If
Loop Until notFound
Next
End With
rFind.start = iStart
rFind.End = iStop
End Sub
Sub ConvertHyperlinks2HTML(rr As Range)
Dim hh As Hyperlink
For Each hh In rr.Hyperlinks
'only convert text hyperlinks
If hh.Type = msoHyperlinkRange Then
hh.Range.Font.Underline = False
hh.Range.InsertAfter "</a>"
hh.Range.InsertBefore "<a target='_blank' href='" & hh.Address & "'>"
End If
Next
End Sub