skip to Main Content

I have a json pull using api and below is what the file looks like:

[
    {
        "id": 181,
        "emp_number": "527",
        "clock_id": "124",
        "organization_trackings": [
            {
                "title": "Division",
                "value": "200",
                "value_description": "Cons"
            },
            {
                "title": "Location",
                "value": "951",
                "value_description": "Jasp"
            },
            {
                "title": "Special",
                "value": "20",
                "value_description": "Remote"
            },
            {
                "title": "Primary Department",
                "value": "200",
                "value_description": "DPT1"
            },
            {
                "title": "Secondary Department",
                "value": "2000",
                "value_description": "DH"
            },
            {
                "title": "Function",
                "value": "0000",
                "value_description": "Resource"
            }
        ]
    },
    {
        "id": 181837,
        "emp_number": "649",
        "timeclock_id": "528",
        "organization_trackings": [
            {
                "title": "Division",
                "value": "200",
                "value_description": "Consultant"
            },
            {
                "title": "Location",
                "value": "001",
                "value_description": "Atlanta"
            },
            {
                "title": "Function",
                "value": "0000",
                "value_description": "Resource"
            }
        ]
    }
]

I need to convert this to 2 csv files

first csv fileis:  
id  emp_number  clock_id
181 527 124
181837  649 528

second csv file:

emp_number,title,value,value_description
527,Location,951,Jasp
527,Special,20,Remote
527,Primary Department,200,DPT1
527,Secondary Department,2000,DH
527,Function,0,Resource
649,Division,200,Consultant
649,Location,1,Atlanta
649,Function,0,Resource

I am new to python, can someone direct me how I can do it in python..?

2

Answers


  1. I guess the key "timeclock_id": "528" is a typo and should be "clock_id": "528".

    Try:

    import json
    
    with open("your_data.json", "r") as f_in:
        data = json.load(f_in)
    
    df1 = pd.DataFrame(data)
    df2 = pd.DataFrame(
        [
            {"emp_number": d["emp_number"], **dd}
            for d in data
            for dd in d["organization_trackings"]
        ]
    )
    
    df1.pop("organization_trackings")
    
    print(df1)
    print()
    print(df2)
    
    # df1.to_csv('df1.csv', index=False)
    # df2.to_csv('df2.csv', index=False)
    

    Prints:

           id emp_number clock_id
    0     181        527      124
    1  181837        649      528
    
      emp_number                 title value value_description
    0        527              Division   200              Cons
    1        527              Location   951              Jasp
    2        527               Special    20            Remote
    3        527    Primary Department   200              DPT1
    4        527  Secondary Department  2000                DH
    5        527              Function  0000          Resource
    6        649              Division   200        Consultant
    7        649              Location   001           Atlanta
    8        649              Function  0000          Resource
    
    Login or Signup to reply.
  2. this checks for clock_id and if that fails checks for timeclock_id.
    working sample: https://onlinegdb.com/WaR32RLl2

    from io import StringIO
    import csv, json
    
    json_file = """
    [ { "id": 181, "emp_number": "527", "clock_id": "124", "organization_trackings": [ { "title": "Division", "value": "200", "value_description": "Cons" }, { "title": "Location", "value": "951", "value_description": "Jasp" }, { "title": "Special", "value": "20", "value_description": "Remote" }, { "title": "Primary Department", "value": "200", "value_description": "DPT1" }, { "title": "Secondary Department", "value": "2000", "value_description": "DH" }, { "title": "Function", "value": "0000", "value_description": "Resource" } ] }, { "id": 181837, "emp_number": "649", "timeclock_id": "528", "organization_trackings": [ { "title": "Division", "value": "200", "value_description": "Consultant" }, { "title": "Location", "value": "001", "value_description": "Atlanta" }, { "title": "Function", "value": "0000", "value_description": "Resource" } ] } ]
    """
    
    with (
        StringIO(json_file) as file,  # replace with open(FILE_PATH, 'r')
        open('csv_file1.csv', 'w', newline='') as f1,
        open('csv_file2.csv', 'w', newline='') as f2):
        data = json.load(file)
        w1, w2 = csv.writer(f1), csv.writer(f2)
        w1.writerow(['id', 'emp_number', 'clock_id'])
        w2.writerow(['emp_number', 'title', 'value', 'value_description'])
        for i in data:
            _id, emp_number, clock_id = i.get('id'), i.get('emp_number'), i.get('clock_id', i.get('timeclock_id'))
            orgs = [[emp_number] + list(j.values()) for j in i['organization_trackings']]
            w1.writerow([_id, emp_number, clock_id])
            w2.writerows(orgs)
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search