I need to get links from HYPERLINK'd cell, but i really dont know how i can do it.
I need to get links from HYPERLINK'd cells [Pic. 1] and i cant find any documentation on this problem in GSpread's documentation. The result that i am getting on Pic. 2. Problem cell Output with different methods
There is my class that represents google sheet table
class GoogleSpreadSheet:
def __init__(self, link: str):
self.link: str = link
if self.link is None:
raise Exception("Bad link provided")
self.gc = gspread.service_account(filename="service_account.json")
self.sheet = self.gc.open_by_url(self.link).sheet1
def getAllValues(self, include_header: bool = True):
if include_header:
return self.sheet.get_values(value_render_option=ValueRenderOption.formula)
else:
return self.sheet.get_values()[1:]
def getColumnValues(self, column: int, include_header: bool = True):
""" Column A: Column = 1 """
if include_header:
return self.sheet.col_values(column)
else:
return self.sheet.col_values(column)[1:]
def load_links_from_file(filename: str):
with open(filename, "r") as f:
return [line.strip().replace("\n", "") for line in f.readlines()]
There is a main
function code that trying to get hyperlinks in cell.
def main():
# Install sheet
sheet = GoogleSpreadSheet("https://docs.google.com/spreadsheets/d/1GMyZAJkh3hGuNThGCJ4l6NODsqfMjRF2ddJrcpxlD0c")
# Print out different methods of getting values
print(sheet.sheet.cell(2, 1, value_render_option=ValueRenderOption.formula).value)
print(sheet.sheet.cell(2, 1, value_render_option=ValueRenderOption.formatted).value)
print(sheet.sheet.cell(2, 1, value_render_option=ValueRenderOption.unformatted).value)
print(sheet.sheet.cell(2, 1).value)
print(sheet.sheet.cell(2, 1))
# Outputs
# Kelli KL-5112 белая озон вб ям сбер
# Kelli KL-5112 белая озон вб ям сбер
# Kelli KL-5112 белая озон вб ям сбер
# Kelli KL-5112 белая озон вб ям сбер
# <Cell R2C1 'Kelli KL-5112 белая озон вб ям сбер'>
if __name__ == "__main__":
main()
sheet.sheet.cell()
uses "Method: spreadsheets.values.get". In this case, multiple hyperlinks cannot be retrieved from one cell. I thought that this might be the reason for your current issue.request
module with the access token retrieved from the client of gspread.The sample script is as follows.
In this sample script, from your showing script, the access token is retrieved from the service account. And, it supposes that your service account can access your Spreadsheet. Please be careful about this.
import google.auth.transport.requests
import gspread
import requests
import urllib.parse
client = gspread.service_account(filename="service_account.json"))
spreadsheetId = "###" # Please set your spreadsheet ID.
sheetName = "Sheet1" # Please set your sheet name.
# 1. Retrieve the access token.
request = google.auth.transport.requests.Request()
client.auth.refresh(request)
access_token = client.auth.token
# 2. Request to the method of spreadsheets.get in Sheets API using `requests` module.
fields = "sheets(data(rowData(values(formattedValue,hyperlink,textFormatRuns))))"
url = f"https://sheets.googleapis.com/v4/spreadsheets/{spreadsheetId}?ranges={urllib.parse.quote(sheetName)}&fields={urllib.parse.quote(fields)}"
res = requests.get(url, headers={"Authorization": "Bearer " + access_token})
# 3. Retrieve the hyperlink.
obj = res.json()
rows = obj["sheets"][0]['data'][0]['rowData']
res = []
for i, r in enumerate(rows):
temp1 = []
cols = r.get("values", [])
for j, c in enumerate(cols):
a1Notation = gspread.utils.rowcol_to_a1(i + 1, j + 1)
if "hyperlink" in c:
temp1.append({"cell": a1Notation, "hyperlinks": [{"text": c.get("formattedValue", ""), "hyperlink": c.get("hyperlink", "")}]})
elif "textFormatRuns" in c:
formattedValue = c.get("formattedValue", "")
textFormatRuns = c.get("textFormatRuns", [])
temp2 = {"cell": a1Notation, "hyperlinks": []}
for k, e in enumerate(textFormatRuns):
startIdx = e.get("startIndex", 0)
f = e.get("format", {})
if "link" in f:
temp2["hyperlinks"].append({"text": formattedValue[startIdx:textFormatRuns[k + 1]["startIndex"] if k + 1 < len(textFormatRuns) else len(formattedValue)], "hyperlink": f.get("link", {}).get("uri", "")})
if temp2["hyperlinks"] != []:
temp1.append(temp2)
if temp1 != []:
res.append(temp1)
print(res)
When the following sample Spreadsheet is used,
the following result is obtained. You can see that multiple hyperlinks are retrieved from each cell.
[
[
{
"cell": "A1",
"hyperlinks": [
{ "text": "sample2", "hyperlink": "https://www.google.com/" },
{ "text": "sample3", "hyperlink": "https://stackoverflow.com/" },
{ "text": "sample4", "hyperlink": "https://github.com/" },
{ "text": "sample5", "hyperlink": "https://medium.com/" }
]
}
],
[
{
"cell": "A2",
"hyperlinks": [
{ "text": "sample", "hyperlink": "https://www.google.com/" }
]
}
],
[
{
"cell": "A3",
"hyperlinks": [
{ "text": "sample2", "hyperlink": "https://www.google.com/" },
{ "text": "sample3", "hyperlink": "https://stackoverflow.com/" },
{ "text": "sample4", "hyperlink": "https://github.com/" },
{ "text": "sample5", "hyperlink": "https://medium.com/" }
]
},
{
"cell": "B3",
"hyperlinks": [
{ "text": "sample2", "hyperlink": "https://www.google.com/" },
{ "text": "sample3", "hyperlink": "https://stackoverflow.com/" },
{ "text": "sample4", "hyperlink": "https://github.com/" },
{ "text": "sample5", "hyperlink": "https://medium.com/" }
]
}
]
]