Search code examples
pythonpython-3.xbeautifulsoupbs4dash

when converting bs4 scrape data as excel only one last data coming as output


when converting bs4 scrape data as excel only one last data coming as output

If it is a guidance code or explanation with code and hashtags also fine.

link of the website

import requests
import pandas as pd

headers = {'Authorization' : 'eyJhbGciOiJodHRwOi8vd3d3LnczLm9yZy8yMDAxLzA0L3htbGRzaWctbW9yZSNobWFjLXNoYTI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwOi8vc2NoZW1hcy54bWxzb2FwLm9yZy93cy8yMDA1LzA1L2lkZW50aXR5L2NsYWltcy9uYW1lIjoiYWRtaW4iLCJleHAiOjIxMjcwNDQ1MTcsImlzcyI6Imh0dHBzOi8vZGV2ZWxvcGVyLmhlYWx0aHBvc3QuY29tIiwiYXVkIjoiaHR0cHM6Ly9kZXZlbG9wZXIuaGVhbHRocG9zdC5jb20ifQ.zNvR3WpI17CCMC7rIrHQCrnJg_6qGM21BvTP_ed_Hj8'}    
json_post = {"query":"","start":0,"rows":10,"selectedFilters":{"availability":[],"clinicalInterest":[],"distance":[20],"gender":["Both"],"hasOnlineScheduling":False,"insurance":[],"isMHMG":False,"language":[],"locationType":[],"lonlat":[-95.36,29.76],"onlineScheduling":["Any"],"specialty":["Gastroenterology"]}}
req = requests.post("https://api.memorialhermann.org/api/doctorsearch", json=json_post, headers=headers)
data = req.json()

for doctor in data['docs']:
    doctor_name = (f"{doctor['Name']}")
    specialty = (f"{doctor['PrimarySpecialty']}")
    facility = (f"{doctor['PrimaryFacility']}")
    info = [doctor_name, specialty, facility]
    #print(info)
    #info = (f"{doctor['Name']:30}  {doctor['PrimarySpecialty']:20}  {doctor['PrimaryFacility']}")
    #print(info)
    df = pd.DataFrame({'Doctor Name':doctor_name,'Price':specialty,'Rating':facility}, index=['0'])
    df.to_csv('products.csv', index=['0'], encoding='utf-8')
    print(info)

1


Solution

  • Because preparing and saving to csv is inside the loop and each time overwrite the last entry , so it only keeps the last row, you acttually don't need loop :

    import requests
    import pandas as pd
    
    headers = {'Authorization' : 'eyJhbGciOiJodHRwOi8vd3d3LnczLm9yZy8yMDAxLzA0L3htbGRzaWctbW9yZSNobWFjLXNoYTI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwOi8vc2NoZW1hcy54bWxzb2FwLm9yZy93cy8yMDA1LzA1L2lkZW50aXR5L2NsYWltcy9uYW1lIjoiYWRtaW4iLCJleHAiOjIxMjcwNDQ1MTcsImlzcyI6Imh0dHBzOi8vZGV2ZWxvcGVyLmhlYWx0aHBvc3QuY29tIiwiYXVkIjoiaHR0cHM6Ly9kZXZlbG9wZXIuaGVhbHRocG9zdC5jb20ifQ.zNvR3WpI17CCMC7rIrHQCrnJg_6qGM21BvTP_ed_Hj8'}
    json_post = {"query":"","start":0,"rows":10,"selectedFilters":{"availability":[],"clinicalInterest":[],"distance":[20],"gender":["Both"],"hasOnlineScheduling":False,"insurance":[],"isMHMG":False,"language":[],"locationType":[],"lonlat":[-95.36,29.76],"onlineScheduling":["Any"],"specialty":["Gastroenterology"]}}
    req = requests.post("https://api.memorialhermann.org/api/doctorsearch", json=json_post, headers=headers)
    data = req.json()
    print(data['docs'])
    df = pd.DataFrame(data['docs'])
    
    df.to_csv('products.csv', encoding='utf-8')