I'm trying to find a way to get the correct and full URL of a link.
Sometimes, with item.href
, I get the full url and sometimes only shows about:something
or about:..something
or about:../something
In this case with my current code, I iterate all links in this URL and want to get the full URL of the link related with text "VLOOKUP". The answer I get with my current code is
about:excel_vlookup.php
and if I want to simply concatenate the base_url (https://www.w3schools.com/excel/index.php) with Replace("about:excel_vlookup.php","about:","")
, I would get a wrong answer
like this https://www.w3schools.com/excel/index.phpexcel_vlookup.php
instead of https://www.w3schools.com/excel/excel_vlookup.php
that is the correct full URL when I put mouse over that link.
May someone know how to do this?
Sub full_url()
Dim htmlDoc As New HTMLDocument
Dim links As Object
Dim i As Integer
With New ServerXMLHTTP60
.Open "Get", "https://www.w3schools.com/excel/index.php", False
.send
htmlDoc.body.innerHTML = .responseText
End With
Set links = htmlDoc.body.getElementsByTagName("a")
With links
For i = 0 To .Length - 1
If .Item(i).innerText Like "*VLOOKUP*" Then
Debug.Print .Item(i).href
End If
Next
End With
End Sub
UPDATE Based on @taller's code, it works when href is like "about:something". Still need some tweak if href is a full url.
Sub full_url1()
Dim htmlDoc As New HTMLDocument
Dim links As Object
Dim i As Integer, sUrl As String, aTxt, iCnt As Long
Const PREFIX = "about:"
sUrl = "https://www.w3schools.com/excel/index.php"
aTxt = Split(sUrl, "/")
iCnt = UBound(aTxt)
With New ServerXMLHTTP60
.Open "Get", sUrl, False
.send
htmlDoc.body.innerHTML = .responseText
End With
Set links = htmlDoc.body.getElementsByTagName("a")
With links
For i = 0 To .Length - 1
If .Item(i).innerText Like "*Spaces*" Then
href = .Item(i).href
If InStr(1, href, "https://") = 1 Then
Debug.Print .Item(i).href
Else
aTxt(iCnt) = Mid(href, Len(PREFIX) + 1)
Debug.Print Join(aTxt, "/")
End If
Exit For
End If
Next
End With
End Sub
Microsoft documentation:
Sub full_url()
Dim htmlDoc As New HTMLDocument
Dim links As Object
Dim i As Integer, sUrl As String, aTxt, iCnt As Long
Const PREFIX = "about:"
sUrl = "https://www.w3schools.com/excel/index.php"
aTxt = Split(sUrl, "/")
iCnt = UBound(aTxt)
With New ServerXMLHTTP60
.Open "Get", sUrl, False
.send
htmlDoc.body.innerHTML = .responseText
End With
Set links = htmlDoc.body.getElementsByTagName("a")
With links
For i = 0 To .length - 1
If .Item(i).innerText Like "*Spaces*" Then
href = .Item(i).href
If InStr(1, href, "https://") = 1 Then
Debug.Print .Item(i).href
ElseIf InStr(1, href, PREFIX) = 1 Then
aTxt(iCnt) = Mid(href, Len(PREFIX) + 1)
If Left(aTxt(iCnt), 1) = "/" Then aTxt(iCnt) = Mid(aTxt(iCnt), 2)
Debug.Print Join(aTxt, "/")
End If
Exit For
End If
Next
End With
End Sub