Search code examples
c++htmlvisual-c++mshtml

How do I get the font color from a piece of HTML source code?


I have a piece of HTML source like this:

<FONT color=#5a6571>Beverly Mitchell</FONT> <FONT color=#5a6571>Shawnee Smith</FONT> <FONT color=#5a6571>Glenn Plummer</FONT> <NOBR>more &gt;&gt;</NOBR>

I tried to retrieve the "color" value, like this:

MSHTML::IHTMLDocument2Ptr htmDoc1 = NULL;
SAFEARRAY *psaStrings1 = SafeArrayCreateVector(VT_VARIANT, 0, 1);
CoCreateInstance(CLSID_HTMLDocument, NULL, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, (void**) &htmDoc1);

VARIANT *param1 = NULL;
HRESULT hr = SafeArrayAccessData(psaStrings1, (LPVOID*)&param1);
param1->vt = VT_BSTR;
param1->bstrVal = SysAllocString(varSrc1.bstrVal);

hr = SafeArrayUnaccessData(psaStrings1);
hr = htmDoc1->write(psaStrings1);

MSHTML::IHTMLElementPtr pElemBody1 = NULL;
MSHTML::IHTMLDOMNodePtr pHTMLBodyDOMNode1 =NULL;

hr = htmDoc1->get_body(&pElemBody1);
if(SUCCEEDED(hr))
{
    hr = pElemBody1->QueryInterface(IID_IHTMLDOMNode,(void**)&pHTMLBodyDOMNode1);
    if(SUCCEEDED(hr))
    {
        ProcessDomNodeSmartWrapper(pHTMLBodyDOMNode1, ProcTgtTagStrVec);
    }
}    

long lLength = 0;
MSHTML::IHTMLElementCollectionPtr pElemColl1 = NULL;
MSHTML::IHTMLElementPtr pChElem1 = NULL;
MSHTML::IHTMLStylePtr pStyle1 = NULL;
IDispatchPtr ppvdisp1 = NULL;

hr = htmDoc1->get_all(&pElemColl1);
hr = pElemColl1->get_length(&lLength);
for(long i = 0; i < lLength; i++)
{
    _variant_t name(i);
    _variant_t index(i);

    ppvdisp1 = pElemColl1->item(name, index);
    if(ppvdisp1 && SUCCEEDED(hr))
    {
        hr = ppvdisp1->QueryInterface(IID_IHTMLElement, (void **)&pChElem1);

        if(pChElem1 && SUCCEEDED(hr))
        {
            BSTR bstrTagName = NULL;

            pChElem1->get_tagName(&bstrTagName);
            hr = pChElem1->get_style(&pStyle1);
            if(pStyle1 && SUCCEEDED(hr))
            {
                _variant_t varFtCol;

                hr = pStyle1->get_color(&varFtCol);
                if(hr = S_OK && varFtCol)
                {
                    hmStyles1[wstring(varFtCol.bstrVal)] = L"FontColor";
                }
            }
            if(bstrTagName)
               SysFreeString(bstrTagName);
        } // if pStyle && SUCCEEDED(hr)
    }//if ppvdisp && SUCCEEDED(hr)
}//for

But I can never get the "color" value - varFtCol.bstrVal is a bad pointer when I debug the program. This is what varFtCol showed when I debug the program:

-       varFtCol    {???}   _variant_t
-       tagVARIANT  BSTR = 0x00000000  tagVARIANT
        vt  8   unsigned short
-       BSTR    0x00000000     wchar_t *
            CXX0030: Error: expression cannot be evaluated

#5a6571 is a hex color represents for RGB value of (90,101,113).

How can I get this color info?


Solution

  • You shouldn't be getting style on pChElem1 because the color is not part of style in your case. Color is part of Font element.

    Instead you must call pChElem1->getAttribute("color" . . .)

    This will return #5a6571

    The following code is in MFC. But you can easily convert to regular Win32 if you are not using MFC.

    COLORREF GetColorFromHexString( CString szColor )
    {
        TCHAR *szScan;
        CString strTemp;
        CString strColor = szColor;
        long lRR = 0,lGG = 0,lBB = 0;
    
        //first we will remove # characters which come from XML document
        strColor.TrimLeft(_T('#'));
        strColor.TrimRight(_T('#'));
    
        //it should be of the form RRGGBB
        if (strColor.GetLength() == 6) {
            //get red color, from the hexadecimal string
            strTemp = strColor.Left(2);
            lRR = _tcstol(LPCTSTR(strTemp),&szScan,16);
    
            //get green color
            strTemp = strColor.Mid(2,2);
            lGG = _tcstol(LPCTSTR(strTemp),&szScan,16);
    
            //get blue color
            strTemp = strColor.Right(2);
            lBB = _tcstol(LPCTSTR(strTemp),&szScan,16);
        }
    
    
        return RGB(lRR,lGG,lBB);
    }