I have an API with student data like this, for every student id there will be a corresponding API link with mark details.
for example:
https://api.school.com/2020/students.json
{
"Students": [
{
"StudentName": "AAA",
"Sid": 1020,
"Saddress": "st.aaa",
"Sdob": "10-11-1999"
},
{
"StudentName": "BBB",
"Sid": 1021,
"Saddress": "st.bbb",
"Sdob": "11-11-1999"
},
{
"StudentName": "CCC",
"Sid": 1022,
"Saddress": "st.fff",
"Sdob": "05-12-1999"
},
{
"StudentName": "DDD",
"Sid": 1023,
"Saddress": "st.ddd",
"Sdob": "15-09-1999"
},
{
"StudentName": "EEE",
"Sid": 1024,
"Saddress": "st.eee",
"Sdob": "10-11-1999"
},
{
"StudentName": "FFF",
"Sid": 1025,
"Saddress": "st.ddd",
"Sdob": "20-11-1999"
},
{
"StudentName": "GGG",
"Sid": 1026,
"Saddress": "st.ggg",
"Sdob": "25-11-1999"
},
{
"StudentName": "JJJ",
"Sid": 1019,
"Saddress": "st.aaa",
"Sdob": "18-11-1999"
}
]
}
https://api.school.com/2020/mark.json?sid=1020
{
"marks": [
{
"English": 11,
"Math": 12,
"Art": 13,
"Science": 14,
"History": 15,
"Geography": 16,
"Physical Education": 17,
"Chemistry": 18,
"Physics": 19,
"Biology": 20
}
]
}
https://api.school.com/2020/mark.json?sid=1021
{
"marks": [
{
"English": 21,
"Math": 22,
"Art": 23,
"Science": 24,
"History": 25,
"Geography": 26,
"Physical Education": 27,
"Chemistry": 28,
"Physics": 29,
"Biology": 30
}
]
}
https://api.school.com/2020/mark.json?sid=1022
{
"marks": [
{
"English": 31,
"Math": 32,
"Art": 33,
"Science": 34,
"History": 35,
"Geography": 36,
"Physical Education": 37,
"Chemistry": 38,
"Physics": 39,
"Biology": 40
}
]
}
https://api.school.com/2020/mark.json?sid=1023
{
"marks": [
{
"English": 41,
"Math": 42,
"Art": 43,
"Science": 44,
"History": 45,
"Geography": 46,
"Physical Education": 47,
"Chemistry": 48,
"Physics": 49,
"Biology": 50
}
]
}
https://api.school.com/2020/mark.json?sid=1024
{
"marks": [
{
"English": 51,
"Math": 52,
"Art": 53,
"Science": 54,
"History": 55,
"Geography": 56,
"Physical Education": 57,
"Chemistry": 58,
"Physics": 59,
"Biology": 60
}
]
}
https://api.school.com/2020/mark.json?sid=1025
{
"marks": [
{
"English": 61,
"Math": 62,
"Art": 63,
"Science": 64,
"History": 65,
"Geography": 66,
"Physical Education": 67,
"Chemistry": 68,
"Physics": 69,
"Biology": 70
}
]
}
https://api.school.com/2020/mark.json?sid=1026
{
"marks": [
{
"English": 71,
"Math": 72,
"Art": 73,
"Science": 74,
"History": 75,
"Geography": 76,
"Physical Education": 77,
"Chemistry": 78,
"Physics": 79,
"Biology": 80
}
]
}
https://api.school.com/2020/mark.json?sid=1019
{
"marks": [
{
"English": 1,
"Math": 2,
"Art": 3,
"Science": 4,
"History": 5,
"Geography": 6,
"Physical Education": 7,
"Chemistry": 8,
"Physics": 9,
"Biology": 10
}
]
}
I need to get an output like this in a csv file
My code(not completed):
import requests
import json
import pandas as pd
def getdata(link):
try:
response = s.get(link)
except requests.exceptions.RequestException as e:
print('Request Exception Found!')
json_data = response.json()
return json_data
s = requests.session()
json_data = getdata('https://api.school.com/2020/students.json')
print(json_data)
def normalize(json_data,record_path):
temp = json.dumps(json_data)
ar = json.loads(temp)
df = pd.json_normalize(ar[record_path])
return df
Student_df = normalize(json_data, 'Students')
Student_ids = Student_df["Sid"].tolist()
print(Student_ids)
links = []
for i, row in Student_df.groupby('Sid').size().items():
link = "https://api.school.com/2020/mark.json?sid="+str(i)
links.append(link)
for x in links:
normalize(getdata(x),'marks')
is it possible to insert the StudentName and Sid along with the mark? when the program fetches the mark from each API, it should be able to map and add the StudentName and Sid.
if i saves each mark api data to a json file in the local machine, then i can use this code and it works perfectly
Using concurrent.futures to asynchronously fetch mark data from api. (you can drop any columns you don't need in merge_frames() method.)
import concurrent.futures
import json
import os
import pandas as pd
import requests
class StudentsMarks:
def __init__(self):
self.root_url = "https://api.school.com/2020/"
self.students = self.get_students()
self.processors = os.cpu_count()
def get_students(self) -> pd.DataFrame:
url = "students.json"
return self.get_data(url=url, record="Students")
def process_marks(self) -> pd.DataFrame:
student_ids = self.students["Sid"].tolist()
with concurrent.futures.ProcessPoolExecutor(max_workers=self.processors) as executor:
grades = pd.concat(executor.map(self.get_data, student_ids)).reset_index(drop=True)
return self.merge_frames(grades)
def get_data(self, sid: int = "", url: str = "mark.json?sid=", record: str = "marks") -> pd.DataFrame:
url = f"{self.root_url}{url}{sid}"
with requests.Session() as request:
response = request.get(url, timeout=30)
if response.status_code != 200:
print(response.raise_for_status())
data = json.loads(response.text)
return pd.json_normalize(data=data, record_path=record).assign(id=sid)
def merge_frames(self, grades: pd.DataFrame) -> pd.DataFrame:
df = pd.merge(left=self.students, right=grades, left_on="Sid", right_on="id", suffixes=("_students", "_grades"))
return df[df.columns.drop(list(df.filter(regex="students|grades")))]
if __name__ == "__main__":
final = StudentsMarks().process_marks()
print(final)
Output:
Saddress Sdob Sid StudentName Art Biology Chemistry English Geography History Math Physical Education Physics Science
0 st.aaa 10-31-1999 1020 AAA 13 20 18 11 16 15 12 17 19 14
1 st.bbb 11-19-1999 1021 BBB 23 30 28 21 26 25 22 27 29 24
2 st.fff 12-22-1999 1022 CCC 33 40 38 31 36 35 32 37 39 34
3 st.ddd 09-15-1999 1023 DDD 43 50 48 41 46 45 42 47 49 44
4 st.eee 02-08-1999 1024 EEE 53 60 58 51 56 55 52 57 59 54
5 st.ddd 05-14-1999 1025 FFF 63 70 68 61 66 65 62 67 69 64
6 st.ggg 11-25-1999 1026 GGG 73 80 78 71 76 75 72 77 79 74
7 st.aaa 11-18-1999 1019 JJJ 3 10 8 1 6 5 2 7 9 4