MongoDB aggregation to group by multiple fields

user3518008
August 11, 2023
274 views
0 votes
2 Answers

I have following data in my collection

{
  “name": “test”,
  "data": {
    “statusOne”: “enabled”,
    “statusTwo”: “active”
  }
}
{
  “name": “test”,
  "data": {
    “statusOne”: “disabled”,
    “statusTwo”: “active”
  }
}
{
  “name": “another-test”,
  "data": {
    “statusOne”: “disabled”,
    “statusTwo”: “active”
  }
}

How to write an aggregation query to display the data like below. Need to group by name, statusOne, statusTwo. But segregate the result first based on name field. Then calculate the number of occurrences of statusOne and statusTwo. Both the results need to go into the same output field "data"

“output”: [
{
    “name”: “test”,
    "data": [
        {
            “status”: “active”,
            “count”: 2
        },
        {
            “status”: “disabled”,
            ”count”: 1
        },
        {
            “status”: “enabled”,
            ”count”: 1
        }
    ]
},
{
    “name”: “another-test”,
    "data": [
        {
            “status”: “active”,
            ”count”: 1
        },
        {
            “status”: “disabled”,
            ”count”: 1
        }
    ]
}
]

Tried to use group by sequence as mentioned here but no luck

[
  {
    $group: {
      _id: {
        appName: "$name”,
        cs: "$data.statusOne”,
        ps: "$data.statusTwo,
      },
      total: {
        $sum: 1,
      },
    },
  },
  {
    $group: {
      _id: "$_id.name”,
      total: { $sum: "$total" },
      ps: {
        $addToSet: {
          name: "$_id.ps",
          count: "$total",
        },
      },
      cs: {
        $addToSet: {
          name: "$_id.cs",
          count: "$total",
        },
      },
    },
  },
  {
    $project: {
      _id: 0,
      appName: "$_id",
      items: {
        $concatArrays: ["$ps", "$cs"],
      },
    },
  },
]

Answers

db.collection('tablename').aggregate([
  {
    $group: {
      _id: { name: "$name", status: "$data.statusOne" },
      count: { $sum: 1 }
    }
  },
  {
    $group: {
      _id: "$_id.name",
      data: {
        $push: {
          status: "$_id.status",
          count: "$count"
        }
      }
    }
  },
  {
    $project: {
      _id: 0,
      name: "$_id",
      data: 1
    }
  }
])

use this and change the collection name .

Here is a generalized solution:

db.foo.aggregate([
    {$project: {
        name: true,

        /*                                                                       
        We could get fancy with the output of $objectToArray, like               
        only going after statusOne and statusTwo:                                
                                                                                 
           Z: {$filter: {                                                        
               input: {$objectToArray: "$data"},                                 
               cond: {$in: ['$$this.k', ['statusOne','statusTwo']]}              
               }}                                                                
                                                                                 
           Or only looking for keys that start with 'status':                    
                                                                                 
           Z: {$filter: {                                                        
               input: {$objectToArray: "$data"},                                 
               cond: {$eq: ['status', {$substr:['$$this.k',0,6]}]}               
               }}                                                                
                                                                                 
          Let's keep it simple now and take the whole thing                      
          without any filtering.                                                 
        */

        Z: {$objectToArray: "$data"}
    }}
    ,{$unwind: "$Z"}

    // Now we have docs like this:                                               
    //{                                                                          
    //  _id: ObjectId("64d651283d8bc34d3928366d"),                               
    //  name: 'another-test',                                                    
    //  Z: {                                                                     
    //    k: 'statusOne',                                                        
    //    v: 'disabled'                                                          
    //  }                                                                        
    //}                                                                          

    // Now it is just a matter of grouping and reorganizing:                     
    ,{$group: {_id: {"name":"$name", "v":"$Z.v"}, N:{$sum:1}}}
    ,{$group: {_id: "$_id.name", data: {$push: {status:"$_id.v", count:"$N"}} }}
]);

yields

{
  _id: 'test',
  data: [
    {
      status: 'active',
      count: 2
    },
    {
      status: 'enabled',
      count: 1
    },
    {
      status: 'disabled',
      count: 1
    }
  ]
}
{
  _id: 'another-test',
  data: [
    {
      status: 'disabled',
      count: 1
    },
    {
      status: 'active',
      count: 1
    }
  ]
}

What if there are thousands of tests and we don’t want to $unwind?

You must be careful when throwing $unwind into a pipeline. If the average number of statuses per test gets large (say, 100) then there will be a LOT of docs in the pipeline. Below is an alternate solution that exploits $reduce:

c=db.foo.aggregate([
    {$project: {
    name: true,
    Z: {$objectToArray: "$data"}
    }}

    /*
     Instead of $unwind and using $group-$sum to count things, let's
     do it ourselves.  We will overwrite Z with a new Z.
     Also, since this is an iterim step, use shorter variable names for
     clarity.
    
     This if-then-else construction basically says:
         For each status type, status_type_count += 1
     This permits the input to have more than 1 status of the same type, e.g.

       "name": "test",
       "data": {
         "statusOne": "enabled",
         "statusTwo": "enabled"
       }

   In MQL in a $reduce loop, we don't say
       object.key = object.key + 1
   Instead we say:
       {$mergeObjects: [ "$$value", {key: {$add:["$$value.key",1]}} ]}
    */
    
    ,{$addFields: {Z: {$reduce: {
    input: "$Z",
    initialValue: {"A":0,"E":0,"D":0},
        in: {$cond: {
        if: {$eq:["$$this.v","active"]},
        then: {$mergeObjects: [ "$$value", {"A": {$add:["$$value.A",1]}} ]},
        else: {$cond: {
            if: {$eq:["$$this.v","disabled"]},
            then: {$mergeObjects: [ "$$value", {"D": {$add:["$$value.D",1]}} ]},
            else: {$cond: {
            if: {$eq:["$$this.v","enabled"]},
            then: {$mergeObjects: [ "$$value", {"E": {$add:["$$value.E",1]}} ]},
            else: "$$value"
            }}
        }}
        }}
    }}
          }}


    // Bring the names together and collect the counts:
    ,{$group: {
    _id: "$name", X: {$push: "$Z"}
    }}

    // Now, run a $reduce again to sum the counts AND put back the
    // big variable names:
    ,{$project: {data: {$reduce: {
    input: "$X",
    initialValue: {"active":0,"enabled":0,"disabled":0},
        in: {"active": {$add:["$$value.active","$$this.A"]},
         "disabled": {$add:["$$value.disabled","$$this.D"]},
         "enabled": {$add:["$$value.enabled","$$this.E"]}
        }
    }}
        }}


    // At this point we are "done" information-wise but the OP was
    // looking for an array of status as an RVAL not a key (e.g. "A")
    // so post-process:
    ,{$project: {
    _id:0,
    name:"$_id",
    data: {$map: {
        input: {$objectToArray: "$data"},
        in: {
        "status":"$$this.k",
        "count":"$$this.v",
        }
    }}
    }}
  
]);

Please signup or login to give your own answer.

Click here to cancel reply.