(On MacOS) How can I format (or decode) HTML entities and tags in excel using vba?

HopefulDear 6 Reputation points
2021-03-12T19:10:55.347+00:00

Using VBA I have been able to attain and place strings like the below within an excel cell. But they currently still contain both HTML tags and entities. How can I show these in their appropriately formatted form, within an excel cell?

<sup>3</sup>&#8260;<sub>10</sub> is larger than <sup>3</sup>&#8260;<sub>100</sub>, <sup>33</sup>&#8260;<sub>1,000</sub> or <sup>3</sup>&#8260;<sub>1,000</sub>

This is what I'm trying to achieve:

gdvIp.png

Image originally from link.

I have attempted many of the approaches already suggested within different stack overflow threads, but as they all focus toward the Windows O.S most make use of libraries and browsers which the Mac version of excel simply doesn't have access to.

Errors such:

Run-time error '429:

ActiveX component can't create object

When attempting this approach:

Sub Sample()
    Dim Ie As Object
    Set Ie = CreateObject("InternetExplorer.Application")
    With Ie
        .Visible = False
        .Navigate "about:blank"
        .document.body.InnerHTML = Sheets("Sheet1").Range("I2").Value
             'update to the cell that contains HTML you want converted
        .ExecWB 17, 0
             'Select all contents in browser
        .ExecWB 12, 2
             'Copy them
        ActiveSheet.Paste Destination:=Sheets("Sheet1").Range("J2")
             'update to cell you want converted HTML pasted in
        .Quit
    End With
End Sub

Or this approach:

Function HtmlDecode(str As String) As String
    Dim dom As Object

    Set dom = CreateObject("htmlfile")
    dom.Open
    dom.Write str
    dom.Close
    HtmlDecode = dom.Body.innerText
End Function

Compiler Error: User-Defined Type not defined

When attempting this approach:

Option Explicit

Private Const ERROR_UNEXPECTED_ENTITY_COUNT As Long = vbObjectError Or 1
Private Const ERROR_UNEXPECTED_ENTITY_DUPLICATE As Long = vbObjectError Or 2
Private Const ERROR_UNEXPECTED_ENTITY_VALUE As Long = vbObjectError Or 3
Private Const ERROR_UNEXPECTED_ENTITY_FORMAT As Long = vbObjectError Or 4

Private Function CreateEntityDictionary() As Scripting.Dictionary

  Const PROC_NAME As String = "getEntityDictionary"

  Const ENTITY_NAMES = _
    "quot,amp,apos,lt,gt,nbsp,iexcl,cent,pound,curren,yen,brvbar,sect,uml,copy,ordf," & _
    "laquo,not,shy,reg,macr,deg,plusmn,sup2,sup3,acute,micro,para,middot,cedil,sup1,ordm," & _
    "raquo,frac14,frac12,frac34,iquest,Agrave,Aacute,Acirc,Atilde,Auml,Aring,AElig,Ccedil,Egrave,Eacute,Ecirc," & _
    "Euml,Igrave,Iacute,Icirc,Iuml,ETH,Ntilde,Ograve,Oacute,Ocirc,Otilde,Ouml,times,Oslash,Ugrave,Uacute," & _
    "Ucirc,Uuml,Yacute,THORN,szlig,agrave,aacute,acirc,atilde,auml,aring,aelig,ccedil,egrave,eacute,ecirc," & _
    "euml,igrave,iacute,icirc,iuml,eth,ntilde,ograve,oacute,ocirc,otilde,ouml,divide,oslash,ugrave,uacute," & _
    "ucirc,uuml,yacute,thorn,yuml,OElig,oelig,Scaron,scaron,Yuml,fnof,circ,tilde,Alpha,Beta,Gamma," & _
    "Delta,Epsilon,Zeta,Eta,Theta,Iota,Kappa,Lambda,Mu,Nu,Xi,Omicron,Pi,Rho,Sigma,Tau," & _
    "Upsilon,Phi,Chi,Psi,Omega,alpha,beta,gamma,delta,epsilon,zeta,eta,theta,iota,kappa,lambda," & _
    "mu,nu,xi,omicron,pi,rho,sigmaf,sigma,tau,upsilon,phi,chi,psi,omega,thetasym,upsih," & _
    "piv,ensp,emsp,thinsp,zwnj,zwj,lrm,rlm,ndash,mdash,lsquo,rsquo,sbquo,ldquo,rdquo,bdquo," & _
    "dagger,Dagger,bull,hellip,permil,prime,Prime,lsaquo,rsaquo,oline,frasl,euro,image,weierp,real,trade," & _
    "alefsym,larr,uarr,rarr,darr,harr,crarr,lArr,uArr,rArr,dArr,hArr,forall,part,exist,empty," & _
    "nabla,isin,notin,ni,prod,sum,minus,lowast,radic,prop,infin,ang,and,or,cap,cup," & _
    "int,there4,sim,cong,asymp,ne,equiv,le,ge,sub,sup,nsub,sube,supe,oplus,otimes," & _
    "perp,sdot,lceil,rceil,lfloor,rfloor,lang,rang,loz,spades,clubs,hearts,diams"

  Const ENTITY_VALUES = _
    "34,38,39,60,62,160,161,162,163,164,165,166,167,168,169,170," & _
    "171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186," & _
    "187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202," & _
    "203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218," & _
    "219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234," & _
    "235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250," & _
    "251,252,253,254,255,338,339,352,353,376,402,710,732,913,914,915," & _
    "916,917,918,919,920,921,922,923,924,925,926,927,928,929,931,932," & _
    "933,934,935,936,937,945,946,947,948,949,950,951,952,953,954,955," & _
    "956,957,958,959,960,961,962,963,964,965,966,967,968,969,977,978," & _
    "982,8194,8195,8201,8204,8205,8206,8207,8211,8212,8216,8217,8218,8220,8221,8222," & _
    "8224,8225,8226,8230,8240,8242,8243,8249,8250,8254,8260,8364,8465,8472,8476,8482," & _
    "8501,8592,8593,8594,8595,8596,8629,8656,8657,8658,8659,8660,8704,8706,8707,8709," & _
    "8711,8712,8713,8715,8719,8721,8722,8727,8730,8733,8734,8736,8743,8744,8745,8746," & _
    "8747,8756,8764,8773,8776,8800,8801,8804,8805,8834,8835,8836,8838,8839,8853,8855," & _
    "8869,8901,8968,8969,8970,8971,9001,9002,9674,9824,9827,9829,9830"

  Const ENTITY_DELIMITER As String = ","

  Set CreateEntityDictionary = New Scripting.Dictionary
  'Entity names must be case sensitive
  CreateEntityDictionary.CompareMode = BinaryCompare

  Dim entityNames() As String
  entityNames = Split(ENTITY_NAMES, ENTITY_DELIMITER)

  Dim entityValues() As String
  entityValues = Split(ENTITY_VALUES, ENTITY_DELIMITER)

  If UBound(entityNames) = UBound(entityValues) Then
    With CreateEntityDictionary
      Dim entityCounter As Long
      For entityCounter = LBound(entityNames) To UBound(entityNames)
        If Not .Exists(entityNames(entityCounter)) Then
          If IsNumeric(entityValues(entityCounter)) Then
            .Add entityNames(entityCounter), CLng(entityValues(entityCounter))
          Else
            Err.Raise ERROR_UNEXPECTED_ENTITY_VALUE, PROC_NAME, "Unexpected entity value: " & entityValues(entityCounter)
          End If
        Else
          Err.Raise ERROR_UNEXPECTED_ENTITY_DUPLICATE, PROC_NAME, "Unexpected duplicate entity name: " & entityNames(entityCounter)
        End If
      Next entityCounter
    End With
  Else
    Err.Raise ERROR_UNEXPECTED_ENTITY_COUNT, PROC_NAME, "Unexpected number of entity names/values"
  End If

End Function

Private Function ExtractCharacterReferences(ByRef sourceText As String) As VBScript_RegExp_55.MatchCollection

  Const REFERENCE_PATTERN As String = "&(#{0,1}[x]{0,1})(\w{1,8});"

  With New VBScript_RegExp_55.RegExp
    .Global = True
    .Pattern = REFERENCE_PATTERN
    Set ExtractCharacterReferences = .Execute(sourceText)
  End With

End Function

The closest I have gotten is finding this approach, to find and replace a list of html entities with their corresponding symbols:

Public Function HTMLEntititesDecode(p_strText As String) As String

Dim strTemp As String
    strTemp = p_strText
strTemp = Replace(strTemp, "&quot;", """")
strTemp = Replace(strTemp, "&amp;", "&")
strTemp = Replace(strTemp, "&apos;", "'")
strTemp = Replace(strTemp, "&lt;", "<")
strTemp = Replace(strTemp, "&gt;", ">")
strTemp = Replace(strTemp, "&nbsp;", " ")
strTemp = Replace(strTemp, "&iexcl;", "¡")
strTemp = Replace(strTemp, "&cent;", "¢")
strTemp = Replace(strTemp, "&pound;", "£")
strTemp = Replace(strTemp, "&curren;", "?")
strTemp = Replace(strTemp, "&yen;", "¥")
strTemp = Replace(strTemp, "&brvbar;", "?")
strTemp = Replace(strTemp, "&sect;", "§")
strTemp = Replace(strTemp, "&uml;", "¨")
strTemp = Replace(strTemp, "&copy;", "©")
strTemp = Replace(strTemp, "&ordf;", "ª")
strTemp = Replace(strTemp, "&laquo;", "«")
strTemp = Replace(strTemp, "&not;", "¬")
strTemp = Replace(strTemp, "*", "")
strTemp = Replace(strTemp, "&reg;", "®")
strTemp = Replace(strTemp, "&macr;", "¯")
strTemp = Replace(strTemp, "&deg;", "°")
strTemp = Replace(strTemp, "&plusmn;", "±")
strTemp = Replace(strTemp, "&sup2;", "?")
strTemp = Replace(strTemp, "&sup3;", "?")
strTemp = Replace(strTemp, "&acute;", "´")
strTemp = Replace(strTemp, "&micro;", "µ")
strTemp = Replace(strTemp, "&para;", "¶")
strTemp = Replace(strTemp, "&middot;", "·")
strTemp = Replace(strTemp, "&cedil;", "¸")
strTemp = Replace(strTemp, "&sup1;", "?")
strTemp = Replace(strTemp, "&ordm;", "º")
strTemp = Replace(strTemp, "&raquo;", "»")
strTemp = Replace(strTemp, "&frac14;", "?")
strTemp = Replace(strTemp, "&frac12;", "?")
strTemp = Replace(strTemp, "&frac34;", "?")
strTemp = Replace(strTemp, "&iquest;", "¿")
strTemp = Replace(strTemp, "&Agrave;", "À")
strTemp = Replace(strTemp, "&Aacute;", "Á")
strTemp = Replace(strTemp, "&Acirc;", "Â")
strTemp = Replace(strTemp, "&Atilde;", "Ã")
strTemp = Replace(strTemp, "&Auml;", "Ä")
strTemp = Replace(strTemp, "&Aring;", "Å")
strTemp = Replace(strTemp, "&AElig;", "Æ")
strTemp = Replace(strTemp, "&Ccedil;", "Ç")
strTemp = Replace(strTemp, "&Egrave;", "È")
strTemp = Replace(strTemp, "&Eacute;", "É")
strTemp = Replace(strTemp, "&Ecirc;", "Ê")
strTemp = Replace(strTemp, "&Euml;", "Ë")
strTemp = Replace(strTemp, "&Igrave;", "Ì")
strTemp = Replace(strTemp, "&Iacute;", "Í")
strTemp = Replace(strTemp, "&Icirc;", "Î")
strTemp = Replace(strTemp, "&Iuml;", "Ï")
strTemp = Replace(strTemp, "&ETH;", "?")
strTemp = Replace(strTemp, "&Ntilde;", "Ñ")
strTemp = Replace(strTemp, "&Ograve;", "Ò")
strTemp = Replace(strTemp, "&Oacute;", "Ó")
strTemp = Replace(strTemp, "&Ocirc;", "Ô")
strTemp = Replace(strTemp, "&Otilde;", "Õ")
strTemp = Replace(strTemp, "&Ouml;", "Ö")
strTemp = Replace(strTemp, "&times;", "?")
strTemp = Replace(strTemp, "&Oslash;", "Ø")
strTemp = Replace(strTemp, "&Ugrave;", "Ù")
strTemp = Replace(strTemp, "&Uacute;", "Ú")
strTemp = Replace(strTemp, "&Ucirc;", "Û")
strTemp = Replace(strTemp, "&Uuml;", "Ü")
strTemp = Replace(strTemp, "&Yacute;", "?")
strTemp = Replace(strTemp, "&THORN;", "?")
strTemp = Replace(strTemp, "&szlig;", "ß")
strTemp = Replace(strTemp, "&agrave;", "à")
strTemp = Replace(strTemp, "&aacute;", "á")
strTemp = Replace(strTemp, "&acirc;", "â")
strTemp = Replace(strTemp, "&atilde;", "ã")
strTemp = Replace(strTemp, "&auml;", "ä")
strTemp = Replace(strTemp, "&aring;", "å")
strTemp = Replace(strTemp, "&aelig;", "æ")
strTemp = Replace(strTemp, "&ccedil;", "ç")
strTemp = Replace(strTemp, "&egrave;", "è")
strTemp = Replace(strTemp, "&eacute;", "é")
strTemp = Replace(strTemp, "&ecirc;", "ê")
strTemp = Replace(strTemp, "&euml;", "ë")
strTemp = Replace(strTemp, "&igrave;", "ì")
strTemp = Replace(strTemp, "&iacute;", "í")
strTemp = Replace(strTemp, "&icirc;", "î")
strTemp = Replace(strTemp, "&iuml;", "ï")
strTemp = Replace(strTemp, "&eth;", "?")
strTemp = Replace(strTemp, "&ntilde;", "ñ")
strTemp = Replace(strTemp, "&ograve;", "ò")
strTemp = Replace(strTemp, "&oacute;", "ó")
strTemp = Replace(strTemp, "&ocirc;", "ô")
strTemp = Replace(strTemp, "&otilde;", "õ")
strTemp = Replace(strTemp, "&ouml;", "ö")
strTemp = Replace(strTemp, "&divide;", "÷")
strTemp = Replace(strTemp, "&oslash;", "ø")
strTemp = Replace(strTemp, "&ugrave;", "ù")
strTemp = Replace(strTemp, "&uacute;", "ú")
strTemp = Replace(strTemp, "&ucirc;", "û")
strTemp = Replace(strTemp, "&uuml;", "ü")
strTemp = Replace(strTemp, "&yacute;", "?")
strTemp = Replace(strTemp, "&thorn;", "?")
strTemp = Replace(strTemp, "&yuml;", "ÿ")
strTemp = Replace(strTemp, "&OElig;", "Œ")
strTemp = Replace(strTemp, "&oelig;", "œ")
strTemp = Replace(strTemp, "&Scaron;", "?")
strTemp = Replace(strTemp, "&scaron;", "?")
strTemp = Replace(strTemp, "&Yuml;", "Ÿ")
strTemp = Replace(strTemp, "&fnof;", "ƒ")
strTemp = Replace(strTemp, "&circ;", "ˆ")
strTemp = Replace(strTemp, "&tilde;", "˜")
strTemp = Replace(strTemp, "&thinsp;", "")
strTemp = Replace(strTemp, "&zwnj;", "")
strTemp = Replace(strTemp, "&zwj;", "")
strTemp = Replace(strTemp, "&lrm;", "")
strTemp = Replace(strTemp, "&rlm;", "")
strTemp = Replace(strTemp, "&ndash;", "–")
strTemp = Replace(strTemp, "&mdash;", "—")
strTemp = Replace(strTemp, "&lsquo;", "‘")
strTemp = Replace(strTemp, "&rsquo;", "’")
strTemp = Replace(strTemp, "&sbquo;", "‚")
strTemp = Replace(strTemp, "&ldquo;", "“")
strTemp = Replace(strTemp, "&rdquo;", "”")
strTemp = Replace(strTemp, "&bdquo;", "„")
strTemp = Replace(strTemp, "&dagger;", "†")
strTemp = Replace(strTemp, "&Dagger;", "‡")
strTemp = Replace(strTemp, "&bull;", "•")
strTemp = Replace(strTemp, "&hellip;", "…")
strTemp = Replace(strTemp, "&permil;", "‰")
strTemp = Replace(strTemp, "&lsaquo;", "‹")
strTemp = Replace(strTemp, "&rsaquo;", "›")
strTemp = Replace(strTemp, "&euro;", "€")
strTemp = Replace(strTemp, "&trade;", "™")
  HTMLEntititesDecode = strTemp
End Function

But this doesn't cover html codes such as &#8260; or apply appropriate styling as indicated by the presence of html tags such <sup> or <sub> etc.

Any suggestions would be thoroughly appreciated. Thank you.

0 comments No comments
{count} vote