skip to Main Content

I have an API with student data like this, for every student id there will be a corresponding API link with mark details.

for example:

https://api.school.com/2020/students.json

{
    "Students": [
        {
            "StudentName": "AAA",
            "Sid": 1020,
            "Saddress": "st.aaa",
            "Sdob": "10-11-1999"
        },
        {
            "StudentName": "BBB",
            "Sid": 1021,
            "Saddress": "st.bbb",
            "Sdob": "11-11-1999"
        },
        {
            "StudentName": "CCC",
            "Sid": 1022,
            "Saddress": "st.fff",
            "Sdob": "05-12-1999"
        },
        {
            "StudentName": "DDD",
            "Sid": 1023,
            "Saddress": "st.ddd",
            "Sdob": "15-09-1999"
        },
        {
            "StudentName": "EEE",
            "Sid": 1024,
            "Saddress": "st.eee",
            "Sdob": "10-11-1999"
        },
        {
            "StudentName": "FFF",
            "Sid": 1025,
            "Saddress": "st.ddd",
            "Sdob": "20-11-1999"
        },
        {
            "StudentName": "GGG",
            "Sid": 1026,
            "Saddress": "st.ggg",
            "Sdob": "25-11-1999"
        },
        {
            "StudentName": "JJJ",
            "Sid": 1019,
            "Saddress": "st.aaa",
            "Sdob": "18-11-1999"
        }
    ]
}

https://api.school.com/2020/mark.json?sid=1020

   {
    "marks": [
        {
            "English": 11,
            "Math": 12,
            "Art": 13,
            "Science": 14,
            "History": 15,
            "Geography": 16,
            "Physical Education": 17,
            "Chemistry": 18,
            "Physics": 19,
            "Biology": 20
        }
    ]
}

https://api.school.com/2020/mark.json?sid=1021

 {
    "marks": [
        {
            "English": 21,
            "Math": 22,
            "Art": 23,
            "Science": 24,
            "History": 25,
            "Geography": 26,
            "Physical Education": 27,
            "Chemistry": 28,
            "Physics": 29,
            "Biology": 30
        }
    ]
}

https://api.school.com/2020/mark.json?sid=1022

{
    "marks": [
        {
            "English": 31,
            "Math": 32,
            "Art": 33,
            "Science": 34,
            "History": 35,
            "Geography": 36,
            "Physical Education": 37,
            "Chemistry": 38,
            "Physics": 39,
            "Biology": 40
        }
    ]
}

https://api.school.com/2020/mark.json?sid=1023

{
    "marks": [
        {
            "English": 41,
            "Math": 42,
            "Art": 43,
            "Science": 44,
            "History": 45,
            "Geography": 46,
            "Physical Education": 47,
            "Chemistry": 48,
            "Physics": 49,
            "Biology": 50
        }
    ]
}

https://api.school.com/2020/mark.json?sid=1024

{
    "marks": [
        {
            "English": 51,
            "Math": 52,
            "Art": 53,
            "Science": 54,
            "History": 55,
            "Geography": 56,
            "Physical Education": 57,
            "Chemistry": 58,
            "Physics": 59,
            "Biology": 60
        }
    ]
}

https://api.school.com/2020/mark.json?sid=1025

{
    "marks": [
        {
            "English": 61,
            "Math": 62,
            "Art": 63,
            "Science": 64,
            "History": 65,
            "Geography": 66,
            "Physical Education": 67,
            "Chemistry": 68,
            "Physics": 69,
            "Biology": 70
        }
    ]
}

https://api.school.com/2020/mark.json?sid=1026

{
    "marks": [
        {
            "English": 71,
            "Math": 72,
            "Art": 73,
            "Science": 74,
            "History": 75,
            "Geography": 76,
            "Physical Education": 77,
            "Chemistry": 78,
            "Physics": 79,
            "Biology": 80
        }
    ]
}

https://api.school.com/2020/mark.json?sid=1019

   {
    "marks": [
        {
            "English": 1,
            "Math": 2,
            "Art": 3,
            "Science": 4,
            "History": 5,
            "Geography": 6,
            "Physical Education": 7,
            "Chemistry": 8,
            "Physics": 9,
            "Biology": 10
        }
    ]
}

I need to get an output like this in a csv file
what i want

My code(not completed):

import requests
import json
import pandas as pd

def getdata(link):
    try:
        response = s.get(link)
    except requests.exceptions.RequestException as e:
        print('Request Exception Found!')
    json_data = response.json()
    return json_data

s = requests.session()
json_data = getdata('https://api.school.com/2020/students.json')

print(json_data)

def normalize(json_data,record_path):
    temp = json.dumps(json_data)
    ar = json.loads(temp)
    df = pd.json_normalize(ar[record_path])
    return df

Student_df = normalize(json_data, 'Students')

Student_ids = Student_df["Sid"].tolist()

print(Student_ids)

links = []

for i, row in Student_df.groupby('Sid').size().items():
    link = "https://api.school.com/2020/mark.json?sid="+str(i)
    links.append(link)

for x in links:
    normalize(getdata(x),'marks')

is it possible to insert the StudentName and Sid along with the mark? when the program fetches the mark from each API, it should be able to map and add the StudentName and Sid.

if i saves each mark api data to a json file in the local machine, then i can use this code and it works perfectly

2

Answers


  1. Create a dict of records where the key is Sid so you can merge Student_df and Marks_df:

    link = 'https://api.school.com/2020/mark.json?sid={sid}'
    
    marks = {sid: normalize(getdata(link.format(sid=sid)), 'marks')
                  for sid in Student_df['Sid'].unique()}
    Marks_df = pd.concat(marks).droplevel(1)
    
    out = Student_df[['StudentName', 'Sid']].merge(Marks_df, left_on='Sid', right_index=True)
    

    Output:

    >>> out
      StudentName   Sid  English  Math  Art  Science  History  Geography  Physical Education  Chemistry  Physics  Biology
    0         AAA  1020       11    12   13       14       15         16                  17         18       19       20
    1         BBB  1021       21    22   23       24       25         26                  27         28       29       30
    2         CCC  1022       31    32   33       34       35         36                  37         38       39       40
    3         DDD  1023       41    42   43       44       45         46                  47         48       49       50
    4         EEE  1024       51    52   53       54       55         56                  57         58       59       60
    5         FFF  1025       61    62   63       64       65         66                  67         68       69       70
    6         GGG  1026       71    72   73       74       75         76                  77         78       79       80
    7         JJJ  1019        1     2    3        4        5          6                   7          8        9       10
    

    Reproducible example with FastAPI:

    # api.py
    from fastapi import FastAPI
    
    Students = [{'StudentName': 'AAA', 'Sid': 1020, 'Saddress': 'st.aaa', 'Sdob': '10-11-1999'}, {'StudentName': 'BBB', 'Sid': 1021, 'Saddress': 'st.bbb', 'Sdob': '11-11-1999'}, {'StudentName': 'CCC', 'Sid': 1022, 'Saddress': 'st.fff', 'Sdob': '05-12-1999'}, {'StudentName': 'DDD', 'Sid': 1023, 'Saddress': 'st.ddd', 'Sdob': '15-09-1999'}, {'StudentName': 'EEE', 'Sid': 1024, 'Saddress': 'st.eee', 'Sdob': '10-11-1999'}, {'StudentName': 'FFF', 'Sid': 1025, 'Saddress': 'st.ddd', 'Sdob': '20-11-1999'}, {'StudentName': 'GGG', 'Sid': 1026, 'Saddress': 'st.ggg', 'Sdob': '25-11-1999'}, {'StudentName': 'JJJ', 'Sid': 1019, 'Saddress': 'st.aaa', 'Sdob': '18-11-1999'}]
    
    Marks = {'1020': {'marks': [{'English': 11, 'Math': 12, 'Art': 13, 'Science': 14, 'History': 15, 'Geography': 16, 'Physical Education': 17, 'Chemistry': 18, 'Physics': 19, 'Biology': 20}]}, '1021': {'marks': [{'English': 21, 'Math': 22, 'Art': 23, 'Science': 24, 'History': 25, 'Geography': 26, 'Physical Education': 27, 'Chemistry': 28, 'Physics': 29, 'Biology': 30}]}, '1022': {'marks': [{'English': 31, 'Math': 32, 'Art': 33, 'Science': 34, 'History': 35, 'Geography': 36, 'Physical Education': 37, 'Chemistry': 38, 'Physics': 39, 'Biology': 40}]}, '1023': {'marks': [{'English': 41, 'Math': 42, 'Art': 43, 'Science': 44, 'History': 45, 'Geography': 46, 'Physical Education': 47, 'Chemistry': 48, 'Physics': 49, 'Biology': 50}]}, '1024': {'marks': [{'English': 51, 'Math': 52, 'Art': 53, 'Science': 54, 'History': 55, 'Geography': 56, 'Physical Education': 57, 'Chemistry': 58, 'Physics': 59, 'Biology': 60}]}, '1025': {'marks': [{'English': 61, 'Math': 62, 'Art': 63, 'Science': 64, 'History': 65, 'Geography': 66, 'Physical Education': 67, 'Chemistry': 68, 'Physics': 69, 'Biology': 70}]}, '1026': {'marks': [{'English': 71, 'Math': 72, 'Art': 73, 'Science': 74, 'History': 75, 'Geography': 76, 'Physical Education': 77, 'Chemistry': 78, 'Physics': 79, 'Biology': 80}]}, '1019': {'marks': [{'English': 1, 'Math': 2, 'Art': 3, 'Science': 4, 'History': 5, 'Geography': 6, 'Physical Education': 7, 'Chemistry': 8, 'Physics': 9, 'Biology': 10}]}}
    
    app = FastAPI()
    
    @app.get('/2020/students.json')
    def students():
        return Students
    
    @app.get('/2020/mark.json')
    def mark(sid):
        return Marks[sid]
    

    Usage:

    [...]$ uvicorn api:app --reload
    
    # requests.get('http://localhost:8000/2020/students.json')
    # requests.get('http://localhost:8000/2020/mark.json?sid=1019')
    
    Login or Signup to reply.
  2. Using concurrent.futures to asynchronously fetch mark data from api. (you can drop any columns you don’t need in merge_frames() method.)

    import concurrent.futures
    import json
    import os
    
    import pandas as pd
    import requests
    
    
    class StudentsMarks:
        def __init__(self):
            self.root_url = "https://api.school.com/2020/"
            self.students = self.get_students()
            self.processors = os.cpu_count()
    
        def get_students(self) -> pd.DataFrame:
            url = "students.json"
    
            return self.get_data(url=url, record="Students")
    
        def process_marks(self) -> pd.DataFrame:
            student_ids = self.students["Sid"].tolist()
    
            with concurrent.futures.ProcessPoolExecutor(max_workers=self.processors) as executor:
                grades = pd.concat(executor.map(self.get_data, student_ids)).reset_index(drop=True)
    
            return self.merge_frames(grades)
    
    
        def get_data(self, sid: int = "", url: str = "mark.json?sid=", record: str = "marks") -> pd.DataFrame:
            url = f"{self.root_url}{url}{sid}"
    
            with requests.Session() as request:
                response = request.get(url, timeout=30)
            if response.status_code != 200:
                print(response.raise_for_status())
    
            data = json.loads(response.text)
    
            return pd.json_normalize(data=data, record_path=record).assign(id=sid)
    
        def merge_frames(self, grades: pd.DataFrame) -> pd.DataFrame:
            df = pd.merge(left=self.students, right=grades, left_on="Sid", right_on="id", suffixes=("_students", "_grades"))
    
            return df[df.columns.drop(list(df.filter(regex="students|grades")))]
    
    
    if __name__ == "__main__":
        final = StudentsMarks().process_marks()
        print(final)
    

    Output:

      Saddress     Sdob     Sid StudentName  Art  Biology  Chemistry  English  Geography  History  Math  Physical Education  Physics  Science
    0  st.aaa  10-31-1999  1020     AAA      13       20         18       11         16       15    12                  17       19       14
    1  st.bbb  11-19-1999  1021     BBB      23       30         28       21         26       25    22                  27       29       24
    2  st.fff  12-22-1999  1022     CCC      33       40         38       31         36       35    32                  37       39       34
    3  st.ddd  09-15-1999  1023     DDD      43       50         48       41         46       45    42                  47       49       44
    4  st.eee  02-08-1999  1024     EEE      53       60         58       51         56       55    52                  57       59       54
    5  st.ddd  05-14-1999  1025     FFF      63       70         68       61         66       65    62                  67       69       64
    6  st.ggg  11-25-1999  1026     GGG      73       80         78       71         76       75    72                  77       79       74
    7  st.aaa  11-18-1999  1019     JJJ       3       10          8        1          6        5     2                   7        9        4
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search