skip to Main Content

I have set of documents in my mongoDB collection. I am looking to get datewise aggregate count of document if date range is more than a day and hourly aggregate count for same column if date query is for single day. The data may have documents with same conversationId, hence it is necessary to group with conversationId as well.Below is sample of data for reference

   [
   {
      "_id":"c438a671-2391-4b85-815c-ecfcb3d2bb54",
      "status":"INTERNAL_UPDATE",
      "conversationId":"ac44781d-caab-4410-a708-9d6db8480fc3",
      "messageIds":[],
      "messageId":"4dc02026-ac06-4eb1-aa59-e385fcce4a36",
      "responseId":"0c00c83d-61c5-4937-846c-2e6a46aae857",
      "conversation":{},
      "message":{},
      "params":{},
      "timestamp":"2021-05-04T11:40:06.552Z",
      "source":{}
   },
   {
      "_id":"98370ddf-9ff8-4347-bab7-1f7777ab9e9d",
      "status":"NEW",
      "conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dca580",
      "messageIds":[],
      "messageId":"ba94b839-f795-44f2-aea0-173d26006f14",
      "responseId":"a2b75364-447b-4345-8008-2beccd6cbb34",
      "conversation":{},
      "message":{},
      "params":{},
      "timestamp":"2021-05-05T11:40:30.897Z",
      "source":{}
   },
   {
      "_id":"db1eae2b-62d9-455c-ab46-dbfc5baf8b67",
      "status":"INTERNAL_UPDATE",
      "conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dcb584",
      "messageIds":[],
      "messageId":"b83c743b-d36e-4fdd-9c03-21988af47263",
      "responseId":"97198c09-0130-48dc-a225-6d0faeff3116",
      "conversation":{},
      "message":{},
      "params":{},
      "timestamp":"2021-05-05T11:40:31.418Z",
      "source":{}
   },
   {
      "_id":"12a21495-f857-4f18-a06e-f8ba0b951ade",
      "status":"NEW",
      "conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
      "messageIds":[],
      "messageId":"51a48362-545c-4f9f-930b-42e4841fc974",
      "responseId":"4691468b-a43b-41d1-83df-1349fb554bfa",
      "conversation":{},
      "message":{},
      "params":{},
      "timestamp":"2021-05-06T11:43:58.174Z",
      "source":{}
   },
   {
      "_id":"4afaa735-4618-40cf-8b4f-00ee83b2c3c5",
      "status":"INTERNAL_UPDATE",
      "conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
      "messageIds":[],
      "messageId":"7c860126-bf1e-41b2-a7d3-6bcec3e8d5fb",
      "responseId":"09cec9a1-2621-481d-b527-d98b007ef5be",
      "conversation":{},
      "message":{},
      "params":{},
      "timestamp":"2021-05-06T11:43:58.736Z",
      "source":{}
   },
   {
      "_id":"cf8deeca-2cfd-497e-b92b-03204c84217a",
      "status":"NEW",
      "conversationId":"3c6870b5-88d6-4e21-8629-28137dea3fee",
      "messageIds":[],
      "messageId":"da84e414-2269-4812-8ddd-e2cd6c9be4fd",
      "responseId":"ae1014b2-0cc1-41f0-9990-cf724ed67ab7",
      "conversation":{},
      "message":{},
      "params":{},
      "timestamp":"2021-05-06T13:37:55.060Z",
      "source":{}
   }
]

Presently I am able to group by conversationId, but unable to get data aggregated datewise or on hourly basis if date range is on single date.

Below is the query for same

db.documentName.aggregate([
  {
    '$match': {
      '$and': [
        {
          timestamp: {
            '$gte': ISODate('2021-05-01T00:00:00.000Z'),
            '$lte': ISODate('2021-05-10T23:59:59.999Z')
          }
        },
        { 'source.author': { '$regex': 'user', '$options': 'i' } },
        {},
        {}
      ]
    }
  },
  { '$group': { 
      _id: {'conversationId': '$conversationId'} },
  { '$count': 'document_count' }
])

I have tried adding something like, $hour: ‘$timestamp’ with comma separation beside conversationId in $group, but its of no use and is giving error.

The desired result I am trying to get for above data is, something like this

    [{"date": "2021-05-04", "doc_count": 1},
    {"date": "2021-05-05", "doc_count": 2},
    {"date": "2021-05-06", "doc_count": 2}]

As for 2021-05-05 there are 2 docs with different conversationId, and for 2021-05-06 there are 3 docs in total but 2 documents have same conversationId hence aggregate count for 2021-05-06 is also 2. Hope this clarifies my quesiton.

2

Answers


  1. Chosen as BEST ANSWER

    As suggested by @nimrodserok, for mongo version 4.2.9 the query would be

        db.collection.aggregate([
      {
        $match: {
          timestamp: {
            $gte: ISODate("2021-05-01T00:00:00.000Z"),
            $lte: ISODate("2021-05-07T23:59:59.999Z")
          }
        }
      },
      {
        $project: {
          conversationId: 1,
          groupId: {
            $dateFromParts: {
              year: {
                $year: "$timestamp"
              },
              month: {
                $month: "$timestamp"
              },
              day: {
                $dayOfMonth: "$timestamp"
              },
              hour: {
                $cond: [
                  {
                    $gte: [
                      {
                        $subtract: [
                          {
                            $toLong: ISODate("2021-05-07T23:59:59.999Z")
                          },
                          {
                            $toLong: ISODate("2021-05-01T00:00:00.000Z")
                          }
                        ]
                      },
                      86400000
                    ]
                  },
                  0,
                  {
                    $hour: "$timestamp"
                  }
                ]
              }
            }
          }
        }
      },
      {
        $group: {
          _id: {
            conversationId: "$conversationId",
            groupId: "$groupId"
          }
        }
      },
      {
        $group: {
          _id: "$_id.groupId",
          doc_count: {
            $sum: 1
          }
        }
      },
      {
        $project: {
          date: {
            $toString: "$_id"
          },
          doc_count: 1,
          _id: 0
        }
      }
    ])
    

  2. The question is not entirely clear to me, but it sounds like you want something like this:

    The groupId is a field to rebuild the date including the hour, or not, according to your condition:

    EDIT:

    db.collection.aggregate([
      {$match: {
          timestamp: {
            $gte: ISODate("2021-05-01T00:00:00.000Z"),
            $lte: ISODate("2021-05-07T23:59:59.999Z")
          }
        }
      },
      {$project: {
          conversationId: 1,
          groupId: {
            $dateFromParts: {
              year: {$year: "$timestamp"},
              month: {$month: "$timestamp"},
              day: {$dayOfMonth: "$timestamp"},
              hour: {$cond: [
                  {$gte: [
                      {$dateDiff: {
                          startDate: ISODate("2021-05-01T00:00:00.000Z"),
                          endDate: ISODate("2021-05-07T23:59:59.999Z"),
                          unit: "day"}}, 1]},
                  0,
                  {$hour: "$timestamp"}]}
            }
          }
        }
      },
      {$group: {_id: {conversationId: "$conversationId", groupId: "$groupId"}}},
      {$group: {_id: "$_id.groupId", doc_count: {$sum: 1}}},
      {$project: {date: {$toString: "$_id"}, doc_count: 1, _id: 0}}
    ])
    

    See how it works on the playground example

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search