skip to Main Content

This is an example object that I got:

{ "query1" : [ { "name" : "John", "id" : 1234 }, { "name" : "Rose", "id" : 3214 }, { "name" : "John", "id" : 1234 } ] }

How can I remove the duplicates using group by and array navigation / unboxing?

I tried implementing the group by clause after the where clause but did not get the correct answer

3

Answers


  1. For remove duplicates item from Json object you can use this code:

    from collections import OrderedDict
    
    data = {
        "query1": [
            {"name": "John", "id": 1234},
            {"name": "Rose", "id": 3214},
            {"name": "John", "id": 1234},
        ]
    }
    
    unique_data = {}
    for key, array in data.items():
        unique_objects = OrderedDict()
        for obj in array:
            unique_objects[(obj["name"], obj["id"])] = obj
        unique_data[key] = list(unique_objects.values())
    
    print(unique_data)
    

    Result:

    {'query1': [{'name': 'John', 'id': 1234}, {'name': 'Rose', 'id': 3214}]}
    
    Login or Signup to reply.
  2. It’s possible to do this way, found the answer here:

    data = {
        "query1": [
            {"name": "John", "id": 1234},
            {"name": "Rose", "id": 3214},
            {"name": "John", "id": 1234},
        ]
    }
    
    query1 = data.get('query1')
    
    [dict(t) for t in {tuple(d.items()) for d in query1}]
    
    Login or Signup to reply.
  3. In JSONiq, you can indeed remove duplicates with a group by and array unboxing, like so:

    let $data := {
      "query1" : [
        { "name" : "John", "id" : 1234 },
        { "name" : "Rose", "id" : 3214 },
        { "name" : "John", "id" : 1234 }
      ]
    }
    return {
     "query1" : [
        for $obj in $data.query1[]
        group by $n := $obj.name, $i := $obj.id
        return $obj[1]
      ]
    }
    

    There is also a generic approach that will work even with unknown fields and more nested values:

    let $data := {
      "query1" : [
        { "name" : "John", "id" : 1234 },
        { "name" : "Rose", "id" : 3214 },
        { "name" : "John", "id" : 1234 }
      ]
    }
    return {
      "query1" : [
        for $obj at $i in $data.query1[]
        where
          every $other in $data.query1[][position() lt $i]
          satisfies not deep-equal($obj, $other)
        return $obj
      ]
    }
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search