Search code examples
pythonbeautifulsouppython-requestsfor-in-loop

How to get all 'href' with soup in python ? I try so many times but not work


How to get all 'href' with soup in python ? I try so many times but in vain. Whatever I use 'soup.find' or 'soup.find_all' method to strugle for the 'href', it doesn't work.

python version:3.10

!pip install requests

import requests

import time
import pandas as pd
from bs4 import BeautifulSoup
productlink = []
headers = {'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Mobile Safari/537.36'}
for page in range(1,2):
    url = "https://www.momomall.com.tw/s/103487/dcategory/all/3/{page}"
    r = requests.get(url, headers = headers)
    Soup = BeautifulSoup(r.text,"lxml")
    for link in Soup.find_all('ul',class_="searchItem Stype"):
        print(len(link))
        Link = link.li.a
        LINK = Link.get('href')
        print(LINK)
        productlink.append(LINK)
print(productlink)


Solution

  • for page in range(1,2):
        url = "https://m.momomall.com.tw/m/store/DCategory.jsp?entp_code=103487&category_code=all&orderby=3&page={}".format(page)
        r = requests.get(url,headers = headers)
        soup = BeautifulSoup(r.text,'lxml')
        for goods_code in soup.select('a.nofollowBtn_star'):
            Goods_code = 'https://www.momomall.com.tw/s/103487/'+goods_code.get('goods_code')+'/'
            goodlink.append(Goods_code)
    
    for URL in goodlink:
        R = requests.get(URL, headers = headers)
        Soup = BeautifulSoup(R.text,"lxml")
        
        for dataprice in Soup.select('script'):
            import re
            discount_regex=re.compile('discountPrice = (\d{1,5})')
            print(re.search(discount_regex, dataprice).group(1))
        ```