skip to Main Content

I would like to convert a json file below:

    [
        {
            "userid": "5275800381",
            "status": "UserStatus.RECENTLY",
            "name": "Ah",
            "bot": false,
            "username": "None"
        },
        {
            "userid": "5824657725",
            "status": "UserStatus.LAST_MONTH",
            "name": "A45",
            "bot": false,
            "username": "None"
        },
        {
            "userid": "5160075986",
            "status": "UserStatus.RECENTLY",
            "name": "CTLA",
            "bot": false,
            "username": "james888"
        }
    ]

into a csv file with more columns and without duplicates as follows:

    username,user id,access hash,name,group,group id,is_bot,is_admin,dc_id,have_photo,phone,elaborated

The output file should be:

    username,user id,access hash,name,group,group id,is_bot,is_admin,dc_id,have_photo,phone,elaborated
    ,5275800381,False,False,False,False,False,False,False,False,False,False
    ,5824657725,False,False,False,False,False,False,False,False,False,False
    james888,5160075986,False,False,False,False,False,False,False,False,False,False

I have tried the following codes:

    import json

    with open('target_user2.json', 'r', encoding='utf-8') as fp:
        target = json.load(fp) #this file contains the json

    with open('members2.csv', 'w', encoding='utf-8') as nf:    # target_userid2.txt or target_userid2.json
        nf.write('username,user id,access hash,name,group,group id,is_bot,is_admin,dc_id,have_photo,phone,elaborated' + 'n')
        for item in target:
            if item['userid'] in [x['userid'] for x in target]:
                if item['username'] == "None":
                    item['username'] == ""
                    record = item['username'] + ',' + item['userid'] + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False'
                    nf.write(json.dumps(record).replace('"', '') + 'n')       # write data without ""

it does not work because error is generated by item[‘user id’] (user id with a space not working), but item[‘userid’] works.

How can I fix this?

Based on advice from Barmar, it WORKs which I update my codes below:

    import json

    with open('target_user2.json', 'r', encoding='utf-8') as fp:
        target = json.load(fp) #this file contains the json

    with open('members2.csv', 'w', encoding='utf-8') as nf:    # target_userid2.txt or target_userid2.json
        nf.write('username,user id,access hash,name,group,group id,is_bot,is_admin,dc_id,have_photo,phone,elaborated' + 'n')
        for item in target:
            if item['userid'] in [x['userid'] for x in target]:
                if item['username'] == "None":
                    item['username'] == ""
                record = item['username'] + ',' + item['userid'] + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False'
                nf.write(json.dumps(record).replace('"', '') + 'n')       # write data without ""

                

However, there is another issue coming up which is not intended. When username is None in target_user2.json, I assigned it to blank:

    item['username'] == ""           
        

It does not work; it writes all usernames with the same values from target_user2.json.

What is wrong with that?

3

Answers


  1. Chosen as BEST ANSWER

    Great help from Bamar, I corrected everything he advised; and it works very well:

        import json
    
        with open('target_user2.json', 'r', encoding='utf-8') as fp:
            target = json.load(fp) #this file contains the json
    
        with open('members2.csv', 'w', encoding='utf-8') as nf:    # target_userid2.txt or target_userid2.json
            nf.write('username,user id,access hash,name,group,group id,is_bot,is_admin,dc_id,have_photo,phone,elaborated' + 'n')
            for item in target:
                if item['userid'] in [x['userid'] for x in target]:     # remove space between user and id to avoid error
                    if item['username'] == "None":
                        item['username'] = ""
                    record = item['username'] + ',' + item['userid'] + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False' + ',' + 'False'
                    nf.write(json.dumps(record).replace('"', '') + 'n')       # write data without ""
    

    1. Use the csv module to write CSV files, rather than formatting it yourself.

    2. Use a set to detect duplicate userids and skip them.

    3. Fix the logic when replacing None username with an empty string.

    4. When accessing the JSON, you have to use userid as the key, not user id.

    import csv
    
    userids = set()
    
    with open('members2.csv', 'w', encoding='utf-8') as nf:    # target_userid2.txt or target_userid2.json
        nf_csv = csv.writer(nf)
        nf_csv.writerow(['username', 'user id', 'access hash', 'name', 'group', 'group id', 'is_bot', 'is_admin', 'dc_id', 'have_photo', 'phone', 'elaborated'])
        for item in target:
            if item['userid'] not in userids: # prevent duplicate userids
                userids.add(item['userid'])
                if item['username'] == "None":
                    item['username'] = ""
                record = [item['username'], item['userid'], 'False', 'False', 'False', 'False', 'False', 'False', 'False', 'False', 'False', 'False']
                nf_csv.writerow(record)
    
    Login or Signup to reply.
  2. Use the csv module to correctly handle CSV formatting, such as fields that contain quotes or the delimiter(comma in this case). csv.DictWriter also helps with managing the columns and default False value for missing fields:

    import csv
    import json
    
    with open('input.json') as f:
        data = json.load(f)
    
    # newline='' is an open requirement for writing csv files per documentation.
    with open('output.csv', 'w', newline='', encoding='utf8') as f:
        columns = 'username,user id,access hash,name,group,group id,is_bot,is_admin,dc_id,have_photo,phone,elaborated'.split(',')
        writer = csv.DictWriter(f, fieldnames=columns, restval=False)  # restval is the default value of fields
        writer.writeheader()
        ids = set()  # Track unique IDs
        for user in data:
            user_id = user['userid']
            if user_id not in ids:
                # Build a dict with fields to change.
                row = {'username': user['username'] if user['username'] != 'None' else '',
                       'user id': user_id,
                       #'name': user['name'],  # Always False in example, but if you want it...
                       'is_bot': user['bot']}
                writer.writerow(row)
                ids.add(user_id)
    

    Output:

    username,user id,access hash,name,group,group id,is_bot,is_admin,dc_id,have_photo,phone,elaborated
    ,5275800381,False,False,False,False,False,False,False,False,False,False
    ,5824657725,False,False,False,False,False,False,False,False,False,False
    james888,5160075986,False,False,False,False,False,False,False,False,False,False
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search