Search code examples
python-3.xbeautifulsouppython-asyncio

TypeError: a bytes-like object is required, not 'coroutine'


I think the error occurs because bs is getting the coroutine from page.content, I'm unsure how to fix it. Any help is appreciated.

This is all the relevant code in the program:

async def find_song_on_youtube(song_name, session):
    # TODO: Make this fx async
    song_name_query = song_name.replace(" ", "+")
    page = await session.request(method='GET', url=
    f"https://www.youtube.com/results?search_query={song_name_query}")
    page = page.content
    return sort_return_final_result(page, song_name)

not sorting these because unsure how to check if it's the actual artist page, all this transformation is to grab the area of html where the result json is - could break very easily

def sort_return_final_result(page, song_name):
    page = bs(page, 'html5lib')
    page = str(page.find_all("script")[-3]).split("=", 1)[1].strip()[:-1].split("\n")[0][:-1]
    page = json.loads(page)
    # drilling down to where the video contents are
    full_page = (page["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]
    ["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"])
    # sometimes the video is not in the first position, this should drill down until it finds
    # the first video item, should be in the videoRenderer key
    first_two_results = []
    for item in full_page:
        if len(first_two_results) >= 2:
            break
        try:
            page = item["videoRenderer"]
            first_two_results.append(parse_video_info(page))
        except KeyError:
            continue

Sort by views first, then grab the highest viewed video by the official artist if it's available

    first_two_results.sort(key=itemgetter("Views"), reverse=True)
    first_two_results.sort(key=itemgetter("Official Artist"), reverse=True)
    final_result = {}
    for item in first_two_results:
        if fuzz.partial_ratio(item["Name"], song_name.split('+')[1]) > 50:
            final_result = item
            break
    print(final_result)
    return final_result


def parse_video_info(page):
    # name of video
    name = page["title"]["runs"][0]["text"]
    # url of video
    url = page["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
    url = f'https://youtube.com{url}'
    # views
    views = int(page["viewCountText"]["simpleText"].split()[0].replace(",", ""))
    # official artist check
    try:
        official_artist = page["ownerBadges"][0]["metadataBadgeRenderer"]["tooltip"]
        if official_artist != "Official Artist Channel":
            raise KeyError
        official_artist = True
    except KeyError:
        official_artist = False
    return {
        "Name": name,  # encoding issues here might be a problem later
        "Url": url,
        "Views": views,
        "Official Artist": official_artist
    }


async def get_song_urls(playlist_or_album, resource_id):
    song_list = []  # to get rid of pycharm error
    rsrc_name = ""
    if playlist_or_album == "Playlist":  # rsrc id could be playlist or album id
        song_list = return_songs_from_playlist(resource_id)
        rsrc_name = spotfy_obj.playlist(resource_id)['name']
    elif playlist_or_album == "Album":
        song_list = return_songs_from_album(resource_id)
        rsrc = spotfy_obj.album(resource_id)
        rsrc_name = rsrc['name']
        rsrc_name += f" - by {rsrc['artists'][0]['name']}"
    print("Collected Songs from Playlist")
    t1 = time()
    async with ClientSession() as session:
        playlist = await asyncio.gather(*[find_song_on_youtube(song, session)
                                          for song in song_list])
    t2 = time()
    print(t2 - t1)
    dump_playlist_to_db(playlist, rsrc_name)
    print(playlist)


asyncio.run(get_song_urls("Album", "6a4HHZe13SySfC50BGy8Hm"))

How do I get the actual content from the request? Instead of the coroutine object?


Solution

  • Just use your_file_object.file.read() instead of your_file_object.content.

    So, in your case it should be page.file.read().

    If you are using flask it will be:

    your_file_object.read()
    

    If you are using fastapi it will be:

    your_file_object.file.read()
    

    It works in my case.