Search code examples
pythonweb-scrapingbeautifulsoupscrapyurllib

Need to get the first image link with Python


I need to get the link of the first photo from the link "https://www.balticshipping.com/vessel/imo/9127382" using Python.

I am testing with the BeautifullSoup library but there is no way to get it. From what I see, the image is not in JPG or PNG format, therefore, it does not detect it.

from urllib.request import urlopen
from bs4 import BeautifulSoup
import re

html = urlopen('https://www.balticshipping.com/vessel/imo/9127382')
bs = BeautifulSoup(html, 'html.parser')
images = bs.find_all('img', {'src':re.compile('.png')})
for image in images: 
    print(image['src']+'\n')

Does anyone have any ideas how to do it?

Full Loop Code: ("s" contains many ships data (IMO, date, shipname...))

def create_geojson_features(s):

features = []

for _, row in s.iterrows():
    vessel_id = row['IMO']

    data = {
        "templates[]": [
            "modal_validation_errors:0",
            "modal_email_verificate:0",
            "r_vessel_types_multi:0",
            "r_positions_single:0",
            "vessel_profile:0",
        ],
        "request[0][module]": "ships",
        "request[0][action]": "list",
        "request[0][id]": "0",
        "request[0][data][0][name]": "imo",
        "request[0][data][0][value]": vessel_id,
        "request[0][sort]": "",
        "request[0][limit]": "1",
        "request[0][stamp]": "0",
        "request[1][module]": "top_stat",
        "request[1][action]": "list",
        "request[1][id]": "0",
        "request[1][data]": "",
        "request[1][sort]": "",
        "request[1][limit]": "",
        "request[1][stamp]": "0",
        "dictionary[]": ["countrys:0", "vessel_types:0", "positions:0"],
    }
    
    data = requests.post("https://www.balticshipping.com/", data=data).json()
    image = data["data"]["request"][0]["ships"][0]["data"]["gallery"][0]["file"]
    print(image)
    feature = {
        'type': 'Feature',
        'geometry': {
            'type':'Point',
            'coordinates':[row['lon'],row['lat']]
        },
        'properties': {
            'time': pd.to_datetime(row['date']).__str__(),
            'popup': "<img src=" + image.__str__() + " width = '250' height='200'/>"+'<br>'+'<br>'+'Shipname: '+row['shipname'].__str__() +'<br>'+ 'MMSI: '+row['mmsi'].__str__() +'<br>' + 'Group: '+row['group'].__str__() +'<br>''Speed: '+row['speed'].__str__()+' knots',
            'style': {'color' : ''},
            'icon': 'circle',
            'iconstyle':{
                'fillColor': row['fillColor'],
                'fillOpacity': 0.8,
                'radius': 5
            }
        }
    }
    features.append(feature)
return features

Solution

  • The data you see are loaded via Ajax from external source. You can use this example how to get the picture URLs:

    import json
    import requests
    
    
    url = "https://www.balticshipping.com/vessel/imo/9127382"
    vessel_id = url.split("/")[-1]
    
    data = {
        "templates[]": [
            "modal_validation_errors:0",
            "modal_email_verificate:0",
            "r_vessel_types_multi:0",
            "r_positions_single:0",
            "vessel_profile:0",
        ],
        "request[0][module]": "ships",
        "request[0][action]": "list",
        "request[0][id]": "0",
        "request[0][data][0][name]": "imo",
        "request[0][data][0][value]": vessel_id,
        "request[0][sort]": "",
        "request[0][limit]": "1",
        "request[0][stamp]": "0",
        "request[1][module]": "top_stat",
        "request[1][action]": "list",
        "request[1][id]": "0",
        "request[1][data]": "",
        "request[1][sort]": "",
        "request[1][limit]": "",
        "request[1][stamp]": "0",
        "dictionary[]": ["countrys:0", "vessel_types:0", "positions:0"],
    }
    
    data = requests.post("https://www.balticshipping.com/", data=data).json()
    
    # uncomment to print all data:
    # print(json.dumps(data, indent=4))
    
    for g in data["data"]["request"][0]["ships"][0]["data"]["gallery"]:
        print(g["file"])
    

    Prints:

    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=2948097
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=2864147
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=2830344
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=2674783
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=2521379
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=2083722
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=2083721
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=1599301
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=1464102
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=1464099
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=1464093
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=1464089
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=1110349
    https://photos.marinetraffic.com/ais/showphoto.aspx?photoid=433106