Download the transcript and return as a string.
Args:
video_id (str): The YouTube video ID.
Returns:
str: The transcript text or an empty string if an error occurs.
Source code in src/gemini-cli/gemini/youtube_transcript.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 | @staticmethod
def download_transcript(video_id):
"""
Download the transcript and return as a string.
Args:
video_id (str): The YouTube video ID.
Returns:
str: The transcript text or an empty string if an error occurs.
"""
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
transcript = transcript_list.find_generated_transcript(['en'])
formatter = TextFormatter()
transcript_text = formatter.format_transcript(transcript.fetch())
# Remove timecodes and speaker names
transcript_text = re.sub(r'\[\d+:\d+:\d+\]', '', transcript_text)
transcript_text = re.sub(r'<\w+>', '', transcript_text)
return transcript_text
except Exception as e:
console.log(f"Error downloading transcript: {e}", style='bold red')
return ""
|