skip to Main Content

I’ve been trying to create a query on documents like these:

[
    {
        "timestamp": new ISODate('2020-01-01T00:00:00'),
        "objectId": "Id_A",
        "locationId": "Location_A"
    },
    {
        "timestamp": new ISODate('2021-01-01T00:00:00'),
        "objectId": "Id_A",
        "locationId": "Location_A"
    },
    {
        "timestamp": new ISODate('2022-01-01T00:00:00'),
        "objectId": "Id_A",
        "locationId": "Location_B"
    },
    {
        "timestamp": new ISODate('2021-01-01T00:00:00'),
        "objectId": "Id_B",
        "locationId": "Location_B"
    },
    {
        "timestamp": new ISODate('2022-01-01T00:00:00'),
        "objectId": "Id_A",
        "locationId": "Location_A"
    }
]

given multiple "and" queries, I want to count the matching documents per range

[$or: [
    { $and: [{
        "timestamp": {$gte: new ISODate('2020-01-01T00:00:00'),
                      $lt: new ISODate('2020-12-31T00:00:00'),
        },
        "objectId": "Id_A",
        "locationId": "Location_A"}]},
    { $and: [{
        "timestamp": {$gte: new ISODate('2020-01-01T00:00:00'),
                      $lt: new ISODate('2022-12-31T00:00:00'),
        },
        "objectId": "Id_A",
        "locationId": "Location_A"}]},
    { $and: [{
        "timestamp": {$gte: new ISODate('2022-01-01T00:00:00'),
                      $lt: new ISODate('2022-12-31T00:00:00'),
        },
        "objectId": "Id_A",
        "locationId": "Location_B"}]}
    ]
]

I want to map the counts to a result structure that looks like this

[
    {"objectId": "Id_A", "locationId": "Location_A", "rangeStart:": new ISODate('2020-01-01T00:00:00'), "rangeEnd": new ISODate('2020-12-31T00:00:00'), "count": 1},
    {"objectId": "Id_A", "locationId": "Location_A", "rangeStart:": new ISODate('2020-01-01T00:00:00'), "rangeEnd": new ISODate('2022-12-31T00:00:00'), "count": 3},
    {"objectId": "Id_A", "locationId": "Location_B", "rangeStart:": new ISODate('2022-01-01T00:00:00'), "rangeEnd": new ISODate('2022-12-31T00:00:00'), "count": 1},
]

Aggregations I looked at so far:

  • bucket
  • facet
  • group

But I still can’t figure it out.
What would your approach be?

Update 1
The rangeKeys are not assigned correctly with the solution I derived from @Takis suggestion:

I get the following result:

{ 
    "_id" : {
        "objectId" : "objectA", 
        "locationId" : "locationA", 
        "rangeKey" : "UUID2", 
        "count" : 1.0
    }
}

while I expect the following result:

{ 
    "_id" : {
        "objectId" : "objectA", 
        "locationId" : "locationA", 
        "rangeKey" : "UUID1",
        "count" : 1.0
    }
}, {
    
    "_id" : {
        "objectId" : "objectB", 
        "locationId" : "locationA", 
        "rangeKey" : "UUID2",
        "count" : 0.0
    }
}

This is the query I built

db.createCollection("object_location_tracking")
db.getCollection("object_location_tracking").insertMany([
    {
        _id: "1",
        locationId: "locationA",
        objectId: "objectA",
        timestamp: ISODate("2020-01-01T00:00:00Z")
    },
    {
        _id: "2",
        locationId: "locationB",
        objectId: "objectA",
        timestamp: ISODate("2020-01-01T00:00:00Z")
    },
    {
        _id: "3",
        locationId: "locationA",
        objectId: "objectB",
        timestamp: ISODate("2019-01-01T00:00:00Z")
    },
    {
        _id: "4",
        locationId: "locationB",
        objectId: "objectB",
        timestamp: ISODate("2020-01-01T00:00:00Z")
    }
]);
db.getCollection("object_location_tracking").aggregate(
    [
        { 
            "$match" : { 
                "locationId" : "locationA", 
                "$or" : [
                    { 
                        "$and" : [
                            { 
                                "objectId" : "objectA"
                            }, 
                            { 
                                "timestamp" : { 
                                    "$gte" : ISODate("2020-01-01T00:00:00.000+0000")
                                }
                            }, 
                            { 
                                "timestamp" : { 
                                    "$lt" : ISODate("2022-01-01T00:00:00.000+0000")
                                }
                            }
                        ]
                    }, 
                    { 
                        "$and" : [
                            { 
                                "objectId" : "objectB"
                            }, 
                            { 
                                "timestamp" : { 
                                    "$gte" : ISODate("2020-01-01T00:00:00.000+0000")
                                }
                            }, 
                            { 
                                "timestamp" : { 
                                    "$lt" : ISODate("2022-01-01T00:00:00.000+0000")
                                }
                            }
                        ]
                    }
                ]
            }
        }, 
        { 
            "$group" : { 
                "_id" : { 
                    "objectId" : "$objectId", 
                    "locationId" : "$locationId", 
                    "rangeKey" : { 
                        "$switch" : { 
                            "branches" : [
                                { 
                                    "case" : { 
                                        "$and" : [
                                            { 
                                                "$gte" : [
                                                    "$timestamp", 
                                                    ISODate("2020-01-01T00:00:00.000+0000")
                                                ]
                                            }, 
                                            { 
                                                "$lt" : [
                                                    "$timestamp", 
                                                    ISODate("2022-01-01T00:00:00.000+0000")
                                                ]
                                            }, 
                                            { 
                                                "objectId" : "objectB"
                                            }, 
                                            { 
                                                "locationId" : "locationA"
                                            }
                                        ]
                                    }, 
                                    "then" : "UUDI2"
                                }, 
                                { 
                                    "case" : { 
                                        "$and" : [
                                            { 
                                                "$gte" : [
                                                    "$timestamp", 
                                                    ISODate("2020-01-01T00:00:00.000+0000")
                                                ]
                                            }, 
                                            { 
                                                "$lt" : [
                                                    "$timestamp", 
                                                    ISODate("2022-01-01T00:00:00.000+0000")
                                                ]
                                            }, 
                                            { 
                                                "objectId" : "objectA"
                                            }, 
                                            { 
                                                "locationId" : "locationA"
                                            }
                                        ]
                                    }, 
                                    "then" : "UUID2"
                                }
                            ], 
                            "default" : "0"
                        }
                    }, 
                    "count" : { 
                        "$sum" : 1.0
                    }
                }
            }
        }
    ], 
    { 
        "allowDiskUse" : true
    }
);

2

Answers


  1. As hinted at in the comments, $facet will do the trick. Note the use of year-only constructors for ISODate for simplicity. The $project and $unwind are not absolutely necessary because they are a formatting convenience only per the OP. There will only be a single doc coming out of $facet which will be converted into three and only three discrete docs for the ranges so it is not a performance hit.

    db.foo.aggregate([
        {$facet: {
            "first_bucket": [
                {$match: {"objectId":"Id_A",
                          "locationId":"Location_A",
                          "timestamp": {$gte: new ISODate('2020-01-01'),
                                        $lt: new ISODate('2020-12-31')}
                         }},
                {$count: "N"}
            ],
    
            "second_bucket": [
                {$match: {"objectId":"Id_A",
                          "locationId":"Location_A",
                          "timestamp": {$gte: new ISODate('2020-01-01'),
                                        $lt: new ISODate('2022-12-31')}
                         }},
                {$count: "N"}
            ],
    
            "third_bucket": [
                {$match: {"objectId":"Id_A",
                          "locationId":"Location_B",
                          "timestamp": {$gte: new ISODate('2022-01-01'),
                                        $lt: new ISODate('2022-12-31')}
                         }},
                {$count: "N"}
            ]
        }},
        {$project: {X: [
            {"objectId":"Id_A",
             "locationId":"Location_A",
             "rangeStart": new ISODate('2020-01-01'),
             "rangeEnd": new ISODate('2020-12-31'),
             "count": {$first: '$first_bucket.N'}
            },
            {"objectId":"Id_A",
             "locationId":"Location_A",
             "rangeStart": new ISODate('2020-01-01'),
             "rangeEnd": new ISODate('2022-12-31'),
             "count": {$first: '$second_bucket.N'}
            },
            {"objectId":"Id_A",
             "locationId":"Location_B",
             "rangeStart": new ISODate('2022-01-01'),
             "rangeEnd": new ISODate('2022-12-31'),
             "count": {$first: '$third_bucket.N'}
        }
        ]
        }},
        {$unwind: '$X'},
        {$replaceRoot: {newRoot: '$X'}}
    ]);
    

    UPDATE

    $first is an operator available on v>=4.4. To make this solution work v<4.4, change the count expression in the $project from $first to:

     "count": {$arrayElemAt:['$the_bucket.N',0]}
    

    A slightly more interesting variation is to use $group in the $facet expression. This will produce more entries in the buckets but has the advantage of hardcoding only the date ranges.

    db.foo.aggregate([
        {$facet: {
            "first_bucket": [
                {$match: {"timestamp": {$gte: new ISODate('2020-01-01'),
                                        $lt: new ISODate('2020-12-31')}
                         }},
                {$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
                          N: {$sum:1}}}
            ],
            "second_bucket": [
                {$match: {"timestamp": {$gte: new ISODate('2020-01-01'),
                                        $lt: new ISODate('2022-12-31')}
                         }},
                {$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
                          N: {$sum:1}}}
            ],
            "third_bucket": [
                {$match: {"timestamp": {$gte: new ISODate('2022-01-01'),
                                        $lt: new ISODate('2022-12-31')}
                         }},
                {$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
                          N: {$sum:1}}}
            ]
    
        }}
    ]);
    
    Login or Signup to reply.
  2. Query

    • you can add one $match as first stage to keep only the valid ranges
      (this can use index also)
    • group on objectId and locationId and conditional range
    • i didn’t tested the bellow query because i dont have sample data, if doesn’t work and you are stuck, if you can add sample data and expected output

    *$facet could be used but facet has those problems (test it to see what is better for your query)

    • doesn’t use index (even if match is the first stage)
    • runs the pipeline multiple times, 1 time per field
    aggregate(
    [{"$group":
       {"_id":
         {"objectId":"$objectId",
          "locationId":"$locationId",
          "range":
           {"$switch":
             {"branches":
               [{"case":
                   {"$and":
                     [{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
                       {"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
                  "then":
                   {"rangeStart":ISODate("2020-01-01T00:00:00Z"),
                    "rangeEnd":ISODate("2020-12-31T00:00:00Z")}},
                 {"case":
                   {"$and":
                     [{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
                       {"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
                  "then":
                   {"rangeStart":ISODate("2020-01-01T00:00:00Z"),
                    "rangeEnd":ISODate("2020-12-31T00:00:00Z")}},
                 {"case":
                   {"$and":
                     [{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
                       {"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
                  "then":
                   {"rangeStart":ISODate("2020-01-01T00:00:00Z"),
                    "rangeEnd":ISODate("2020-12-31T00:00:00Z")}}],
              "default":"out-of-range"}}},
        "count":{"$sum":1}}},
     {"$project":
       {"_id":0,
        "count":1,
        "objectId":"$_id.objectId",
        "locationId":"$_id.locationId",
        "range":"$_id.range"}}])
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search