Search code examples
pythondjangourllib2urllibbeautifulsoup

Downloaded videos with urllib and bs4 does not play


I have a folder online where I have video files so I need to download videos from there to my local system so in order to do that I am using Bs4 , urllib modules , The downloaded files are not playable

Please have a look at my code .

from bs4 import BeautifulSoup
import urllib2
import random
import urllib
from urllib2 import urlopen
         
def download(url):
    response = urllib.urlopen("http://ssdolutions/addadsfasdfulsdfaatadfae")
    doc  = response.read()
    soup = BeautifulSoup(doc)
    for link in soup.find_all('a'):
        x = (link.get('href'))
        name = random.randrange(1,10)
        full_name = str(name) + ".mp4"
        urllib.urlretrieve(url, full_name)
download("http://ssdolutions/addadsfasdfulsdfaatadfae")


Solution

  • Pass every link URL (stored in x variable) to urlretrieve call:

    from bs4 import BeautifulSoup
    import urllib2
    import random
    import urllib
    import os
    from urllib2 import urlopen
    
    def download(url):
        response = urllib.urlopen("http://ssdolutions/addadsfasdfulsdfaatadfae")
        doc  = response.read()
        soup = BeautifulSoup(doc)
        for link in soup.find_all('a'):
            url = (link.get('href'))
            # Extract filename from link URL
            filename = os.path.basename(url)
            file_data = os.path.splitext(filename)
            if len(file_data) > 1:
               file_ext = file_data[1]
               # this will allow you to download links with *.mp4 extension only
               if file_ext == ".mp4":
                  urllib.urlretrieve(url, filename)
    
    download("http://ssdolutions/addadsfasdfulsdfaatadfae")