skip to Main Content

I have a document that looks like this

  {
    "_id": {
      "$oid": "6187e5fa4ebcc2db6c86081f"
    },
    "publicId": "S23DCL",
    "flights": [
      {
        "name": "f1",
        "_id": {
          "$oid": "620026293fc350024da614dd"
        },
        "wines": [
          {
            "id": "1",
            "name": "wine1"
          },
          {
            "id": "2",
            "name": "wine2"
          }
        ]
      },
      {
        "name": "f2",
        "_id": {
          "$oid": "6200263c3fc350024da614de"
        },
        "wines": [
          {
            "id": "3",
            "name": "leVin"
          },
          {
            "id": "4",
            "name": "theWine"
          }
        ]
      }
    ],
    "title": "Pinot Noir 2022",
    "scores": [
      {
        "userId": "f8Dv",
        "userName": "gugus",
        "scores": [
          {
            "score": 95,
            "wineId": "1"
          },
          {
            "score": 88,
            "wineId": "2"
          }
        ]
      },
      {
        "userId": "yLjh",
        "userName": "test",
        "scores": [
          {
            "score": 92,
            "wineId": "1"
          },
          {
            "score": 87,
            "wineId": "2"
          }
        ]
      },
      {
        "userId": "B6em",
        "userName": "jklsdf",
        "scores": [
          {
            "score": 88,
            "wineId": "1"
          },
          {
            "score": 90,
            "wineId": "2"
          },
          {
            "score": 92,
            "wineId": "3"
          },
          {
            "score": 86,
            "wineId": "4"
          }
        ],
        "isFinished": false
      }
    ],
  }

I if it’s too long for the question I can shorten it.
The whole document and query can also be found in mongoplayground

What I want to do is to create a scoreboard for all wines, meaning caluclating avg etc. for every wine, while still having just one document.

Something like this

{
  "title": "Pinot Noir 2022"
  "results": [
    {
       "wine": {
          "flightName": "f1",
           "wineIndex": 1,
           "name": "wine1",
           "wineId": "1"
        },
        "avg": 90,
        "scores": [
           {
        "scores": {
          "score": 87,
          "wineId": "1"
        },
        "userId": "yLjh",
        "userName": "test"
      },
        ] 
    }
   ]
}

I’m almost there but struggle with the part were I want a single result document after I grouped it.

the wineId within scores coresponds to the id in flights.wines

Here is what I have so far

db.collection.aggregate([
  {
    "$match": {
      "publicId": "S23DCL"
    }
  },
  {
    "$project": {
      "scores": "$scores",
      "publicId": "$publicId"
    }
  },
  {
    "$unwind": "$scores"
  },
  {
    "$unwind": "$scores.scores"
  },
  {
    $lookup: {
      from: "collection",
      let: {
        wId: "$scores.scores.wineId",
        "tastingId": "$_id"
      },
      pipeline: [
        {
          "$unwind": "$flights"
        },
        {
          "$unwind": {
            "path": "$flights.wines",
            "includeArrayIndex": "index"
          }
        },
        {
          $match: {
            $expr: {
              "$and": [
                {
                  "$eq": [
                    "$flights.wines.id",
                    "$$wId"
                  ]
                },
                {
                  "$eq": [
                    "$_id",
                    "$$tastingId"
                  ]
                }
              ]
            },
            
          }
        },
        {
          $project: {
            _id: 0,
            "name": "$flights.wines.name",
            "flight": "$flights.name",
            "wineId": "$flights.wines.id",
            "index": "$index"
          }
        }
      ],
      as: "wine"
    }
  },
  {
    "$replaceRoot": {
      "newRoot": {
        "$mergeObjects": [
          {
            "_id": "$_id",
            "scores": "$scores"
          },
          {
            "wine": {
              "$arrayElemAt": [
                "$wine",
                0
              ]
            }
          }
        ]
      }
    }
  },
  {
    "$group": {
      "_id": "$wine",
      "avg": {
        "$avg": "$scores.scores.score"
      },
      "min": {
        "$min": "$scores.scores.score"
      },
      "max": {
        "$max": "$scores.scores.score"
      },
      "stddev": {
        "$stdDevPop": "$scores.scores.score"
      },
      "tmp": {
        "$max": 1
      },
      "scores": {
        "$addToSet": "$scores"
      }
    }
  }
])

This gives me data as seen in results above. But data like "title" is missing.
Note the tmp field. I tried to add this dummy field and group on it in later stage.
Since there will always only be one group (because tmp is static).
Like

// query from abive
,{
 "$group": {
  "$_id": "tmp"
  // add my grouped results from before into an array
}
}

So how can I add my grouped results into an array, so I don’t get (n) groups back but one documents where the grouped results are in an array?

Hope it’s clear what I mean 🙂

2

Answers


  1. Maybe something like this:

     {
      $group: {
      _id: "tmp",
       array: {
        $push: "$$ROOT"
       }
      }
    }
    

    playground

    Login or Signup to reply.
  2. when you are replacing the root using replaceRoot your title gets missing . first you have to add it to the new root level.
    this returns your title .

    [
      {
        '$match': {
          'publicId': 'S23DCL'
        }
      }, {
        '$project': {
          'scores': '$scores', 
          'publicId': '$publicId', 
          'title': '$title', 
          'flights': '$flights'
        }
      }, {
        '$unwind': '$scores'
      }, {
        '$unwind': '$scores.scores'
      }, {
        '$lookup': {
          'from': 'collection', 
          'let': {
            'wId': '$scores.scores.wineId', 
            'tastingId': '$_id'
          }, 
          'pipeline': [
            {
              '$unwind': '$flights'
            }, {
              '$unwind': {
                'path': '$flights.wines', 
                'includeArrayIndex': 'index'
              }
            }, {
              '$match': {
                '$expr': {
                  '$and': [
                    {
                      '$eq': [
                        '$flights.wines.id', '$$wId'
                      ]
                    }, {
                      '$eq': [
                        '$_id', '$$tastingId'
                      ]
                    }
                  ]
                }
              }
            }, {
              '$project': {
                '_id': 0, 
                'name': '$flights.wines.name', 
                'flight': '$flights.name', 
                'wineId': '$flights.wines.id', 
                'index': '$index'
              }
            }
          ], 
          'as': 'wine'
        }
      }, {
        '$addFields': {
          'scores.title': '$title'
        }
      }, {
        '$replaceRoot': {
          'newRoot': {
            '$mergeObjects': [
              {
                '_id': '$_id', 
                'scores': '$scores'
              }, {
                'wine': {
                  '$arrayElemAt': [
                    '$wine', 0
                  ]
                }
              }
            ]
          }
        }
      }, {
        '$group': {
          '_id': '$wine', 
          'title': {
            '$last': '$scores.title'
          }, 
          'avg': {
            '$avg': '$scores.scores.score'
          }, 
          'min': {
            '$min': '$scores.scores.score'
          }, 
          'max': {
            '$max': '$scores.scores.score'
          }, 
          'stddev': {
            '$stdDevPop': '$scores.scores.score'
          }, 
          'tmp': {
            '$max': 1
          }, 
          'scores': {
            '$addToSet': '$scores'
          }
        }
      }
    ]
    
    

    but your current aggregation pipeline has too many stages. if you an explain what exactly you need from your data source may be it can be little optimised

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search