Boxes with question mark from page breaks I want to modify text in Word with HTML tags to change it to Word-recognized format.
I am looking to modify the code below to include the following:
Sub ReformatHTML()
Application.ScreenUpdating = False
With ActiveDocument.Range.Find
.ClearFormatting
.Format = True
.Forward = True
.MatchWildcards = True
.Wrap = wdFindContinue
' Remove <p>
.Replacement.Text = "\2"
.Replacement.ClearFormatting
.Text = "\<(p\>)(*)\</\1"
.Execute Replace:=wdReplaceAll
.Replacement.Text = "\2"
.Replacement.ClearFormatting
.Text = "\<(u\>)(*)\</\1"
.Replacement.Font.Underline = True
.Replacement.Font.Name = "Calibri"
.Replacement.Font.Size = 11
.Execute Replace:=wdReplaceAll
.Replacement.ClearFormatting
.Text = "\<(strong\>)(*)\</\1"
.Replacement.Style = "Strong"
.Replacement.Font.Name = "Calibri"
.Replacement.Font.Size = 11
.Execute Replace:=wdReplaceAll
.Replacement.ClearFormatting
.Text = "\<(i\>)(*)\</\1"
.Replacement.Font.Italic = True
.Replacement.Font.Name = "Calibri"
.Replacement.Font.Size = 11
.Execute Replace:=wdReplaceAll
.Replacement.ClearFormatting
.Text = "\<(h\>)(*)\</\1"
.Replacement.Highlight = True
.Replacement.Font.Name = "Calibri"
.Replacement.Font.Size = 11
.Execute Replace:=wdReplaceAll
' line break
.Replacement.Text = vbCrLf
.Replacement.ClearFormatting
.Text = "\<br\>"
.Execute Replace:=wdReplaceAll
End With
With ActiveDocument.Range
.Font.Name = "Calibri"
.Font.Size = 11
End With
Application.ScreenUpdating = True
End Sub
The code works, aside from the modifications. Post titled Rendering text with HTML tags to Formatted text in a Word table using VBA has helped.
Here is a sample of text I am trying to modify (I can't figure out how to do it with text and not have the HTML converted to formatting):
Pleas try.
Sub ReformatHTML()
Application.ScreenUpdating = False
With ActiveDocument.Range.Find
.ClearFormatting
.Format = True
.Forward = True
.MatchWildcards = True
.Wrap = wdFindContinue
' replace <u>
.Replacement.Text = "\2"
.Replacement.ClearFormatting
' set font style
.Replacement.Font.Name = "Calibri"
.Replacement.Font.Size = 11
.Text = "\<(u\>)(*)\</\1"
.Replacement.Font.Underline = True
.Execute Replace:=wdReplaceAll
' line break
.Replacement.Text = vbCr
.Replacement.ClearFormatting
.Text = "\<br\>"
.Execute Replace:=wdReplaceAll
End With
Application.ScreenUpdating = True
End Sub
Update
remove tag <p> and integret with code on OP
Sub ReformatHTML()
Application.ScreenUpdating = False
With ActiveDocument.Range.Find
.ClearFormatting
.Format = True
.Forward = True
.MatchWildcards = True
.Wrap = wdFindContinue
' Remove <p>
.Replacement.Text = "\2"
.Replacement.ClearFormatting
.Text = "\<(p\>)(*)\</\1"
.Execute Replace:=wdReplaceAll
.Replacement.Text = "\2"
.Replacement.ClearFormatting
.Text = "\<(u\>)(*)\</\1"
.Replacement.Font.Underline = True
.Replacement.Font.Name = "Calibri"
.Replacement.Font.Size = 11
.Execute Replace:=wdReplaceAll
.Replacement.ClearFormatting
.Text = "\<(strong\>)(*)\</\1"
.Replacement.Style = "Strong"
.Replacement.Font.Name = "Calibri"
.Replacement.Font.Size = 11
.Execute Replace:=wdReplaceAll
.Replacement.ClearFormatting
.Text = "\<(i\>)(*)\</\1"
.Replacement.Font.Italic = True
.Replacement.Font.Name = "Calibri"
.Replacement.Font.Size = 11
.Execute Replace:=wdReplaceAll
.Replacement.ClearFormatting
.Text = "\<(h\>)(*)\</\1"
.Replacement.Highlight = True
.Replacement.Font.Name = "Calibri"
.Replacement.Font.Size = 11
.Execute Replace:=wdReplaceAll
' line break
.Replacement.Text = vbCrLf
.Replacement.ClearFormatting
.Text = "\<br\>"
.Execute Replace:=wdReplaceAll
End With
Application.ScreenUpdating = True
End Sub
Set font style for whole document, then .Replacement.Font.Name
and .Replacement.Font.Size
are not necessary.
With ActiveDocument.Range.Font
.Name = "Calibri"
.Size = 11
End With