Search code examples
next.jsyoutube-api

How can fetch youtube transcriptions in next.js


"use client";

import { useSearchParams } from "next/navigation";

function VideoCreado() {
    const searchParmas = useSearchParams();
    const link = searchParmas.get("link");
    return (
        <div className="bg-gray-900 text-gray-100 min-h-screen flex flex-col items-center justify-center">
            <h1 className="text-3xl font-bold mb-4">video created!</h1>
            <p className="text-lg">transcript:</p>
            <p>{link}</p>
        </div>
    );
}

export default VideoCreado;

how can I use youtube API in next.js such that I can fetch the transcription of the youtube video with link = {link}?

I know how to do this in python but not in tsx:

from youtube_transcript_api import YouTubeTranscriptApi

def get_transcription(video_url):
    # Extract the video ID from the URL
    video_id = video_url.split("v=")[1]
    ampersand_position = video_id.find("&")
    if ampersand_position != -1:
        video_id = video_id[:ampersand_position]

    # Get the transcript
    transcript = YouTubeTranscriptApi.get_transcript(video_id)
    
    # Combine the transcription into a single string
    transcription = ' '.join([entry['text'] for entry in transcript])
    return transcription

# Example usage
video_url = "https://www.youtube.com/watch?v=8yZ-x-WuFw0&ab_channel=NamePointer"
transcription = get_transcription(video_url)
print(transcription)

Solution

  • You would need to use ytdl-core in order to get the transcript.

    See this entry on GitHub for a code example.

    With the code shared on the link above, I was able to create a minimal working example - see it working here on replit - you will see the captions on the console, but, I wasn't able to render the string on the website - since I really don't know about - I bet you can modify the code and make it work as you need.

    Here is the code sample:

    "use client";
    
    import { useSearchParams } from "next/navigation";
    import ytdl from "ytdl-core";
    import axios from "axios";
    import { XMLParser, XMLBuilder, XMLValidator } from "fast-xml-parser";
    
    // Get transcript of the given video.
    // Sources: 
    // https://stackoverflow.com/a/56213291/4092887
    // https://github.com/fent/node-ytdl-core/discussions/1088
    const getSubTitles = async (url: string) => {
      var first_transcript = "";
    
      try {
        const parser = new XMLParser();
        console.log("URL!!: " + url);
    
        const info = await ytdl.getInfo(url);
        if (!info.player_response.captions) return null;
        const tracks =
          info.player_response.captions.playerCaptionsTracklistRenderer
            .captionTracks;
    
        if (!tracks || !tracks.length) {
          return "No tracks";
        } else {
          //console.log(tracks);
          //return "Ok!";
    
          const parsedTracks = await Promise.all(
            tracks.map(async (track) => ({
              lang: track.languageCode,
              content: parser.parse((await axios.get(track.baseUrl)).data, {}),
            })),
          );
    
          //console.log(typeof parsedTracks[0]["content"]["transcript"]["text"]); // object!
    
          // Get the data from the object and build the string: 
          for (var i = 0; i < parsedTracks[0]['content']['transcript']['text'].length; i++) {
            first_transcript += parsedTracks[0]['content']['transcript']['text'][i] + "\n";
          }
          
          console.log("Captions:\n");
          console.log(first_transcript);
          return first_transcript;
        }
      } catch (e) {
        console.log("Exception happened:\n");
        console.log(e);
        first_transcript = e;
        return first_transcript;
      }
    };
    
    function VideoCreado () {
      var link = "https://www.youtube.com/watch?v=8yZ-x-WuFw0";
      var transcript_full = ""; // Store here the result of the async "getSubTitles" function - in order to see it on the website.
    
      // Call the async function to get the data: 
      getSubTitles(link);
    
      return (
        <div className="bg-gray-900 text-gray-100 min-h-screen flex flex-col items-center justify-center">
          <h1 className="text-3xl font-bold mb-4">video created!</h1>
          <p className="text-lg">Link:</p>
          <p>{link}</p>
          <br />
          <p className="text-lg">transcript:</p>
          <p>{transcript_full}</p>
        </div>
      );
    };
    

    Response - screenshot:

    Response in the console

    Response - sample in the console:

    > [email protected] dev
    > next dev --port 3000 --hostname 0.0.0.0
    
       ▲ Next.js 14.1.0
       - Local:        http://localhost:3000
       - Network:      http://0.0.0.0:3000
    
     ✓ Ready in 2.1s
     ○ Compiling / ...
     ✓ Compiled / in 1895ms (299 modules)
    URL!!: https://www.youtube.com/watch?v=8yZ-x-WuFw0
    Captions:
    
    as you could tell from my last video I
    started to add some tiles to my content
    so that everyone can understand me
    despite my terrible accent and as shown
    by the results of the poll I created you
    guys seem to like them however editing
    the subtitles in one by one is tedious
    and takes a lot of time so I wanted to
    automate the process by having the
    computer automatically create them using
    the transcripts I write for every video
    before recording the voiceover before
    ...