skip to Main Content

I need to merge file1.json file2.json (could be more) into onefile.json.

version is always the same value in all files. however vulnerabilities array and dependency_files array values different but there might be duplicate/which I want to remove if any after the merge

file1.json:

{
    "version": "x.x.x",
    "vulnerabilities": [
        {
            "id": "0000"
        },
        {
            "id": "11111"
        },
        {
            "id": "2222"
        }
    ],
    "dependency_files": [
        {
            "name": "name0000"
        },
        {
            "name": "name1111"
        },
        {
            "name": "name2222"
        }
        
    ]
}

file2.json:

{
    "version": "x.x.x",
    "vulnerabilities": [

        {
            "id": "2222"
        },
        {
            "id": "3333"
        }
    ],
    "dependency_files": [
        {
            "name": "name2222"
        },
        {
            "name": "name3333"
        }
    ]
}

onefile.json:

{
    "version": "x.x.x",
    "vulnerabilities": [
        {
            "id": "0000"
        },
        {
            "id": "11111"
        },
        {
            "id": "2222"
        },
        {
            "id": "3333"
        }
    ],
    "dependency_files": [
        {
            "name": "name0000"
        },
        {
            "name": "name1111"
        },
        {
            "name": "name2222"
        },
        {
            "name": "name3333"
        }
    ]
}

I tried a lot with no luck

2

Answers


  1. Using this python code

    import json
    
    def merge_dicts(*dicts):
        r = {}
        skip = 'version'
        for item in dicts:
            for key, value in item.items():
                if (key == skip):
                    r[skip] = value
                else:
                    r.setdefault(key, []).extend(value)
                    unique = []
                    for obj in r[key]:
                        if obj not in unique:
                            unique.append(obj)
                    r[key] = unique
        return r
    
    with open("file1.json") as file_1:
        data_1 = json.load(file_1)
    with open("file2.json") as file_2:
        data_2 = json.load(file_2)
    
    with open('data.json', 'w') as merge_file:
        json.dump(merge_dicts(data_1, data_2), merge_file, indent = 4)
    

    Result

    {
        "version": "x.x.x",
        "vulnerabilities": [
            {
                "id": "0000"
            },
            {
                "id": "11111"
            },
            {
                "id": "2222"
            },
            {
                "id": "3333"
            }
        ],
        "dependency_files": [
            {
                "name": "name0000"
            },
            {
                "name": "name1111"
            },
            {
                "name": "name2222"
            },
            {
                "name": "name3333"
            }
        ]
    }
    

    This code is multiple json files support

    import os, json
    
    def merge_dicts(*dicts):
        r = {}
        skip = 'version'
        for item in dicts:
            for key, value in item.items():
                if (key == skip):
                    r[skip] = value
                else:
                    r.setdefault(key, []).extend(value)
                    unique = []
                    for obj in r[key]:
                        if obj not in unique:
                            unique.append(obj)
                    r[key] = unique
        return r
    
    json_files = [pos_json for pos_json in os.listdir('./') if pos_json.endswith('.json')]
    a = []
    print(type(a))
    for json_file in json_files:
        with open(json_file) as file_item:
            read_data = json.load(file_item)
            a.append(read_data)
            file_item.close()
    
    with open('data.json', 'w') as merge_file:
        json.dump(merge_dicts(*tuple(a)), merge_file, indent = 4)
    
    Login or Signup to reply.
  2. You could have a reduce on all files, initialized with the first, hence no need for the -n option:

    jq '
      reduce inputs as {$vulnerabilities, $dependency_files} (.;
        .vulnerabilities = (.vulnerabilities + $vulnerabilities | unique_by(.id))
        | .dependency_files = (.dependency_files + $dependency_files | unique_by(.name))
      )
    ' file*.json
    
    {
      "version": "x.x.x",
      "vulnerabilities": [
        {
          "id": "0000"
        },
        {
          "id": "11111"
        },
        {
          "id": "2222"
        },
        {
          "id": "3333"
        }
      ],
      "dependency_files": [
        {
          "name": "name0000"
        },
        {
          "name": "name1111"
        },
        {
          "name": "name2222"
        },
        {
          "name": "name3333"
        }
      ]
    }
    

    Demo

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search