from __future__ import unicode_literals
import youtube_dl
import pandas as pd
import csv
import re
# Initialize YouTube-DL Array
ydl_opts = {}
# read the csv file
number_of_rows = pd.read_csv('single.csv')
# Scrape Online Product
def run_scraper():
# Read CSV to List
with open("single.csv", "r") as f:
csv_reader = csv.reader(f)
next(csv_reader)
# Scrape Data From Store
for csv_line_entry in csv_reader:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
meta = ydl.extract_info(csv_line_entry[0], download=False)
description = meta['description']
#print('Description :', description)
# Function to Capture Timestamp Descriptions
get_links(description)
def get_links(description):
# Format: Timestamp + Text
description_text = re.findall(r'(\d{2}:\d{2}?.*)', description)
print(description_text)
print()
# Format: Text + Timestamp
description_text1 = re.findall(r'(.*\d{2}:\d{2}?)', description)
print(description_text1)
run_scraper()
Videos, Format
https://www.youtube.com/watch?v=kqtD5dpn9C8, Format: Timestamp + Text
https://www.youtube.com/watch?v=pJ3IPRqiD2M, Format: Text + Timestamp
https://www.youtube.com/watch?v=rfscVS0vtbw, No Regex in code
https://www.youtube.com/watch?v=t8pPdKYpowI, No Regex in code
My script pulls YouTube Urls from a CSV file in readiness to capture general YouTube description information such as intro, links, timestamps, etc.
I want to capture the YouTube Timestamp Descriptions only as highlighted in the image below:
I understand that YouTube timestamp formatting is not consistent and for that reason I have included a few examples in the CSV file.
In my function get_links, I have partially managed to extract Timestamp + Text and Text + Timestamp for 2 of the 4 CSV Urls listed.
I need a way to only show the Text or description part of the timestamp irrespective of the formatting type shown in all 4 CSV Urls.
Any help would be much appreciated.
Try:
import youtube_dl
import pandas as pd
import csv
import re
# Initialize YouTube-DL Array
ydl_opts = {}
r_pat = re.compile(r"\d+:\d+")
r_pat2 = re.compile(r"[^A-Za-z]*\d+:\d+:?\d*?[^A-Za-z]*")
# Scrape Online Product
def run_scraper():
# Read CSV to List
with open("single.csv", "r") as f:
csv_reader = csv.reader(f)
next(csv_reader)
# Scrape Data From Store
for csv_line_entry in csv_reader:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
meta = ydl.extract_info(csv_line_entry[0], download=False)
description = meta["description"]
out = get_links(description)
print(*out, sep="\n")
print("-" * 80)
def get_links(description):
rv = []
for line in description.splitlines():
if r_pat.search(line):
rv.append(r_pat2.sub("", line))
return rv
run_scraper()
Prints:
[youtube] kqtD5dpn9C8: Downloading webpage
Introduction
What You Can Do With Python
Your First Python Program
Variables
Receiving Input
Type Conversion
Strings
Arithmetic Operators
Operator Precedence
Comparison Operators
Logical Operators
If Statements
Exercise
While Loops
Lists
List Methods
For Loops
The range() Function
Tuples
--------------------------------------------------------------------------------
[youtube] pJ3IPRqiD2M: Downloading webpage
Python Course
What is Python
Why choose Python
Features of Python
Applications of Python
Salary Trends
Quiz
Installing Python
Python Variable
Python Tokens
...and so on.