Search code examples
pythonpython-3.xweb-scrapingyoutube-apiyoutube-data-api

How to extract video titles with the Youtube API(Python)


I am making a small python application for myself that downloads the information from YouTube videos and for this I am using the YouTube api.

Recently I watched this video to help me get comments and their replies from a Youtube video and export them to an excel file, and everything works fine. However, the problem I have now, is that I want to extract the title as well from the YouTube video and can't seem to get my code working for it.

Like I mentioned previously, I watched the linked video and I also tried commenting on the author of the video's channel for help, unfortunately I did not get a reply.

I also tried looking around YouTube for any other helpful videos regarding my problem, but couldn't really find anything helpful to my problem.

Apart from asking there and looking for other videos, I also tried looking at the documentation and puzzling the code out like that, which also didn't work. Below is the code I am used:

# CODE FROM HERE: https://github.com/analyticswithadam/Python/blob/main/Pull_all_Comments_and_Replies_for_YouTube_Playlists.ipynb

from googleapiclient.discovery import build
import pandas as pd
import getpass

api_key = "API key here"
playlist_ids = ['Youtube Playlist Link Here']

# Build the YouTube client
youtube = build('youtube', 'v3', developerKey=api_key)

def get_all_video_ids_from_playlists(youtube, playlist_ids):
    all_videos = []  # Initialize a single list to hold all video IDs

    for playlist_id in playlist_ids:
        next_page_token = None

        # Fetch videos from the current playlist
        while True:
            playlist_request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId=playlist_id,
                maxResults=50,
                pageToken=next_page_token)
            playlist_response = playlist_request.execute()

            all_videos += [item['contentDetails']['videoId'] for item in playlist_response['items']]

            next_page_token = playlist_response.get('nextPageToken')

            if next_page_token is None:
                break

    return all_videos

# Fetch all video IDs from the specified playlists
video_ids = get_all_video_ids_from_playlists(youtube, playlist_ids)

# Now you can pass video_ids to the next function
# next_function(video_ids)

'''
# Broken-ass title code

def get_vid_title(youtube, video_id):  # Added video_id as an argument
    all_titles = []
    next_page_token = None

    while True:
        title_request = youtube.channels().list(
            part="snippet",
            videoId=video_id,
            pageToken=next_page_token,
            textFormat="plainText",
            maxResults=100
        )
        title_response = title_request.execute()

        for item in title_response['items']:
            vid_title = item['snippet']['title']
            all_titles.append({
                'Title': vid_title['title']
            })
            print(vid_title['title'])

    return all_titles 
'''

# Function to get replies for a specific comment
def get_replies(youtube, parent_id, video_id):  # Added video_id as an argument
    replies = []
    next_page_token = None

    while True:
        reply_request = youtube.comments().list(
            part="snippet",
            parentId=parent_id,
            textFormat="plainText",
            maxResults=100,
            pageToken=next_page_token
        )
        reply_response = reply_request.execute()

        for item in reply_response['items']:
            comment = item['snippet']
            replies.append({
                'Timestamp': comment['publishedAt'],
                'Username': comment['authorDisplayName'],
                'VideoID': video_id,
                'Comment': comment['textDisplay'],
                'likeCount': comment['likeCount'],
                'Date': comment['updatedAt'] if 'updatedAt' in comment else comment['publishedAt']
            })

        next_page_token = reply_response.get('nextPageToken')
        if not next_page_token:
            break

    return replies


# Function to get all comments (including replies) for a single video
def get_comments_for_video(youtube, video_id):
    all_comments = []
    next_page_token = None

    while True:
        comment_request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            pageToken=next_page_token,
            textFormat="plainText",
            maxResults=100
        )
        comment_response = comment_request.execute()

        for item in comment_response['items']:
            top_comment = item['snippet']['topLevelComment']['snippet']
            all_comments.append({
                'Timestamp': top_comment['publishedAt'],
                'Username': top_comment['authorDisplayName'],
                'VideoID': video_id,  # Directly using video_id from function parameter
                'Comment': top_comment['textDisplay'],
                'likeCount': top_comment['likeCount'],
                'Date': top_comment['updatedAt'] if 'updatedAt' in top_comment else top_comment['publishedAt']
            })

            # Fetch replies if there are any
            if item['snippet']['totalReplyCount'] > 0:
                all_comments.extend(get_replies(youtube, item['snippet']['topLevelComment']['id'], video_id))

        next_page_token = comment_response.get('nextPageToken')
        if not next_page_token:
            break

    return all_comments

# List to hold all comments from all videos
all_comments = []


for video_id in video_ids:
    video_comments = get_comments_for_video(youtube, video_id)
    all_comments.extend(video_comments)

# Create DataFrame
comments_df = pd.DataFrame(all_comments)


# Export whole dataset to the local machine as CSV File
csv_file = 'comments_data.csv'  # Name your file
comments_df.to_csv(csv_file, index=False)

Any help would be greatly appreciated.

Edit

Thanks to user Mauricio Arias Olave, I got the code working to do exactly as I want.

Here is the completed code for anyone who is curious:

# CODE FROM HERE: https://github.com/analyticswithadam/Python/blob/main/Pull_all_Comments_and_Replies_for_YouTube_Playlists.ipynb

from googleapiclient.discovery import build
import pandas as pd
import getpass

api_key = "API Key Here"
playlist_ids = ['Youtube playlist here']

# Build the YouTube client
youtube = build('youtube', 'v3', developerKey=api_key)

def get_all_video_ids_from_playlists(youtube, playlist_ids):
    all_videos = []  # Initialize a single list to hold all video IDs

    for playlist_id in playlist_ids:
        next_page_token = None

        # Fetch videos from the current playlist
        while True:
            playlist_request = youtube.playlistItems().list(
                part='snippet,contentDetails',
                playlistId=playlist_id,
                maxResults=50,
                pageToken=next_page_token)
            playlist_response = playlist_request.execute()

            #all_videos += [item['contentDetails']['videoId'] for item in playlist_response['items']]

            for pl_item in playlist_response["items"]:
                all_videos.append({"Video_ID": pl_item['contentDetails']['videoId'], "Title" : pl_item['snippet']['title']})

            next_page_token = playlist_response.get('nextPageToken')

            if next_page_token is None:
                break

    return all_videos

# Fetch all video IDs from the specified playlists
video_ids = get_all_video_ids_from_playlists(youtube, playlist_ids)

# Function to get replies for a specific comment
def get_replies(youtube, parent_id, video_id):  # Added video_id as an argument
    replies = []
    next_page_token = None

    while True:
        reply_request = youtube.comments().list(
            part="snippet",
            parentId=parent_id,
            textFormat="plainText",
            maxResults=100,
            pageToken=next_page_token
        )
        reply_response = reply_request.execute()

        for item in reply_response['items']:
            comment = item['snippet']
            replies.append({
                'Timestamp': comment['publishedAt'],
                'Username': comment['authorDisplayName'],
                'VideoID': video_id,
                'Comment': comment['textDisplay'],
                'likeCount': comment['likeCount'],
                'Date': comment['updatedAt'] if 'updatedAt' in comment else comment['publishedAt']
            })

        next_page_token = reply_response.get('nextPageToken')
        if not next_page_token:
            break

    return replies


# Function to get all comments (including replies) for a single video
def get_comments_for_video(youtube, video_id):
    all_comments = []
    next_page_token = None

    while True:
        comment_request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id["Video_ID"],
            pageToken=next_page_token,
            textFormat="plainText",
            maxResults=100
        )
        comment_response = comment_request.execute()

        for item in comment_response['items']:
            top_comment = item['snippet']['topLevelComment']['snippet']
            all_comments.append({
                'Timestamp': top_comment['publishedAt'],
                'Username': top_comment['authorDisplayName'],
                'VideoID': video_id["Video_ID"],  # Directly using video_id from function parameter
                'Title': video_id["Title"], # The title of the video.
                'Comment': top_comment['textDisplay'],
                'likeCount': top_comment['likeCount'],
                'Date': top_comment['updatedAt'] if 'updatedAt' in top_comment else top_comment['publishedAt']
            })

            # Fetch replies if there are any
            if item['snippet']['totalReplyCount'] > 0:
                all_comments.extend(get_replies(youtube, item['snippet']['topLevelComment']['id'], video_id))

        next_page_token = comment_response.get('nextPageToken')
        if not next_page_token:
            break

    return all_comments

# List to hold all comments from all videos
all_comments = []


for video_id in video_ids:
    video_comments = get_comments_for_video(youtube, video_id)
    all_comments.extend(video_comments)

# Create DataFrame
comments_df = pd.DataFrame(all_comments)


# Export whole dataset to the local machine as CSV File
csv_file = 'comments_data.csv'  # Name your file
comments_df.to_csv(csv_file, index=False)

Solution

  • On get_all_video_ids_from_playlists use:

    part='snippet,contentDetails'
    

    And on this line:

    all_videos += [item['contentDetails']['videoId'] for item in playlist_response['items']]
    

    Change it as follows:

    for pl_item in playlist_response 
        all_videos.append({"Video_ID": item['contentDetails']['videoId'], {"Title" : {item['snippet']['title']}})
    

    Then, on this line:

    for video_id in video_ids:
        video_comments = get_comments_for_video(youtube, video_id)
    

    You're passing the item - i.e. {"Video_ID: "xxxx", "Title": "xxxx"}:

    Finally, on your get_comments_for_video function:

    # Function to get all comments (including replies) for a single video
    def get_comments_for_video(youtube, video_id):
    

    Use:

    videoId=video_id["Video_ID"]
    

    and:

    'VideoID': video_id["Video_ID"],  # Directly using video_id from function parameter
    'Title': video_id["Title"], # The title of the video.