I'm trying to get a .mp3 file in a link from TikTok sounds
the problem is I can't extract it because it's inside <"script"> tag
I'm using pycurl instead of requests
all i need is to extract this from the response then extract the URL from UrlList" "playUrl":{"Uri":"musically-maliva-obj/7038595527327419141.mp3","UrlList":["https://sf16-ies-music-va.tiktokcdn.com/obj/musically-maliva-obj/7038595527327419141.mp3"]}
import pycurl
from io import BytesIO
import certifi
from bs4 import BeautifulSoup
url = "https://vm.tiktok.com/ZML1t1vW7/"
buffer = BytesIO()
c = pycurl.Curl()
c.setopt(pycurl.CAINFO, certifi.where())
c.setopt(c.URL, url)
c.setopt(pycurl.SSL_VERIFYPEER, 0)
c.setopt(pycurl.SSL_VERIFYHOST, 0)
c.setopt(pycurl.HTTPHEADER, ["User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"])
c.setopt(c.WRITEDATA, buffer)
c.setopt(c.FOLLOWLOCATION, True)
c.perform()
c.close()
body = buffer.getvalue()
response = body.decode('utf-8')
#response = response.split('"')
#response = response[1]
#response = response.split('.html?')
#response= response[0]
a = response.split("'") # gives me a list and i don't know how to search in it
soup = BeautifulSoup(response, 'html.parser') # cause response is a string
link = soup.find("script", id="sigi-persisted-data") #i tried to use bs4 but i couldn't find a reasult
print(link)
You can try extracting the json data, parse it to dictionary value and then navigate dictionary to get the data (json_data["/music/*-:id"]["musicData"]["playUrl"]["UrlList"][0]
)
import pycurl
from io import BytesIO
import certifi
from bs4 import BeautifulSoup
import re
import json
url = "https://vm.tiktok.com/ZML1t1vW7/"
buffer = BytesIO()
c = pycurl.Curl()
c.setopt(pycurl.CAINFO, certifi.where())
c.setopt(c.URL, url)
c.setopt(pycurl.SSL_VERIFYPEER, 0)
c.setopt(pycurl.SSL_VERIFYHOST, 0)
c.setopt(pycurl.HTTPHEADER, ["User-Agent: Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"])
c.setopt(c.WRITEDATA, buffer)
c.setopt(c.FOLLOWLOCATION, True)
c.perform()
c.close()
body = buffer.getvalue()
response = body.decode('utf-8')
soup = BeautifulSoup(response, 'html.parser')
scripts = soup.findAll("script")
for s in scripts:
s_str = str(s)
res = re.search(r'<script>window.__INIT_PROPS__ = (.*)</script>', s_str)
if res:
json_data = json.loads(res.group(1))
print(json_data["/music/*-:id"]["musicData"]["playUrl"]["UrlList"][0])