skip to Main Content

This is a bit long, but I am at a loss how to glue together multiple jq queries to filter out nested objects in a multi-level structure.

I am starting with JSON that looks like this.

{
  "image": "test",
  "duration": 40,
  "image_size": 2758644359,
  "os": "rhel",
  "version": "9.2",
  "resources": [
    {
      "resource": {
        "format": "rpm",
        "name": "binutils",
        "version": "2.35.2-37.el9",
        "src_version": "2.35.2-37.el9"
      },
      "installed": true,
      "checks": [
        {
          "name": "check1",
          "description": "first check",
          "severity": "moderate",
          "depth": 5.5
        },
        {
          "description": "second check",
          "severity": "moderate",
          "depth": 4.7
        },
        {
          "name": "check3",
          "description": "third check",
          "severity": "low",
          "depth": 4.2
        }
      ]
    },
    {
      "resource": {
        "format": "rpm",
        "name": "dbus",
        "version": "1:1.12.20-7.el9_1",
        "src_version": "1:1.12.20-7.el9_1"
      },
      "installed": true,
      "checks": [
        {
          "description": "second check",
          "severity": "moderate",
          "depth": 6.2
        }
      ]
    },
    {
      "resource": {
        "format": "rpm",
        "name": "dmidecode",
        "version": "1:3.3-7.el9",
        "src_version": "1:3.3-7.el9"
      },
      "installed": true,
      "checks": [
        {
          "name": "check1",
          "description": "first check",
          "severity": "moderate",
          "depth": 5.5
        },
        {
          "description": "second check",
          "severity": "moderate",
          "depth": 7.1
        }
      ]
    },
    {
      "resource": {
        "format": "rpm",
        "name": "gcc",
        "version": "11.3.1-4.3.el9",
        "src_version": "11.3.1-4.3.el9"
      },
      "installed": true,
      "checks": [
      ]
    }
  ],
  "image_assurance_results": {},
  "check_summary": {
    "total": 6,
    "low": 1,
    "moderate": 5
  },
  "initiating_user": "test_user"
}

I want to preserve the overall structure and pick off selected elements from the top level as well as the two nested levels in the arrays. This is my desired result.

{
  "image": "test",
  "os": "rhel",
  "resources": [
    {
      "resource": {
        "name": "binutils",
        "version": "2.35.2-37.el9"
      },
      "checks": [
        {
          "name": "check1",
          "description": "first check"
        },
        {
          "description": "second check"
        },
        {
          "name": "check3",
          "description": "third check"
        }
      ]
    },
    {
      "resource": {
        "name": "dbus",
        "version": "1:1.12.20-7.el9_1"
      },
      "checks": [
        {
          "description": "second check"
        }
      ]
    },
    {
      "resource": {
        "name": "dmidecode",
        "version": "1:3.3-7.el9"
      },
      "checks": [
        {
          "name": "check1",
          "description": "first check"
        },
        {
          "description": "second check"
        }
      ]
    },
    {
      "resource": {
        "name": "gcc",
        "version": "11.3.1-4.3.el9"
      },
      "checks": [
      ]
    }
  ],
  "check_summary": {
    "total": 6,
    "low": 1,
    "moderate": 5
  }
}

With this query I can eliminate the top level elements that I don’t want, retaining the full element set under resources:
jq '{image, os, resources, summary}'
And get this result

{
  "image": "test",
  "os": "rhel",
  "resources": [
    {
      "resource": {
        "format": "rpm",
        "name": "binutils",
        "version": "2.35.2-37.el9",
        "src_version": "2.35.2-37.el9"
      },
      "installed": true,
      "checks": [
        {
          "name": "check1",
          "description": "first check",
          "severity": "moderate",
          "depth": 5.5
        },
        {
          "description": "second check",
          "severity": "moderate",
          "depth": 4.7
        },
        {
          "name": "check3",
          "description": "third check",
          "severity": "low",
          "depth": 4.2
        }
      ]
    },
    {
      "resource": {
        "format": "rpm",
        "name": "dbus",
        "version": "1:1.12.20-7.el9_1",
        "src_version": "1:1.12.20-7.el9_1"
      },
      "installed": true,
      "checks": [
        {
          "description": "second check",
          "severity": "moderate",
          "depth": 6.2
        }
      ]
    },
    {
      "resource": {
        "format": "rpm",
        "name": "dmidecode",
        "version": "1:3.3-7.el9",
        "src_version": "1:3.3-7.el9"
      },
      "installed": true,
      "checks": [
        {
          "name": "check1",
          "description": "first check",
          "severity": "moderate",
          "depth": 5.5
        },
        {
          "description": "second check",
          "severity": "moderate",
          "depth": 7.1
        }
      ]
    },
    {
      "resource": {
        "format": "rpm",
        "name": "gcc",
        "version": "11.3.1-4.3.el9",
        "src_version": "11.3.1-4.3.el9"
      },
      "installed": true,
      "checks": []
    }
  ],
  "summary": null
}

And with this query I can filter out the elements I don’t want down in the nested "checks" objects
jq '.resources[].checks[] | with_entries(select(.key | in({"name":1, "description":1})))'

{
  "name": "check1",
  "description": "first check"
}
{
  "description": "second check"
}
{
  "name": "check3",
  "description": "third check"
}
{
  "description": "second check"
}
{
  "name": "check1",
  "description": "first check"
}
{
  "description": "second check"
}

But I can’t figure out how to pipe these two queries together to get the desired end result and add in one more operation to eliminate some of the top level elements under resource. How do I apply the with_entries filter to the initial down select of the top level?

2

Answers


  1. Use |= instead of | to update without losing the overall structure. Also, use IN (instead of ìn) to test if a value is contained in a given set. All put together:

    {image, os, resources, summary}
    | .resources[].resource |= with_entries(select(IN(.key; "name", "version")))
    | .resources[].checks[] |= with_entries(select(IN(.key; "name", "description")))
    
    {
      "image": "test",
      "os": "rhel",
      "resources": [
        {
          "resource": {
            "name": "binutils",
            "version": "2.35.2-37.el9"
          },
          "installed": true,
          "checks": [
            {
              "name": "check1",
              "description": "first check"
            },
            {
              "description": "second check"
            },
            {
              "name": "check3",
              "description": "third check"
            }
          ]
        },
        {
          "resource": {
            "name": "dbus",
            "version": "1:1.12.20-7.el9_1"
          },
          "installed": true,
          "checks": [
            {
              "description": "second check"
            }
          ]
        },
        {
          "resource": {
            "name": "dmidecode",
            "version": "1:3.3-7.el9"
          },
          "installed": true,
          "checks": [
            {
              "name": "check1",
              "description": "first check"
            },
            {
              "description": "second check"
            }
          ]
        },
        {
          "resource": {
            "name": "gcc",
            "version": "11.3.1-4.3.el9"
          },
          "installed": true,
          "checks": []
        }
      ],
      "summary": null
    }
    

    Demo

    Now, you could further factor out .resources[] from the loops to prevent double iteration, and, apparently, therein you also want to filter for {resource, checks} (to remove .installed):

    {image, os, resources, summary} | .resources[] |= (
      {resource, checks}
      | .resource |= with_entries(select(IN(.key; "name", "version")))
      | .checks[] |= with_entries(select(IN(.key; "name", "description")))
    )
    
    {
      "image": "test",
      "os": "rhel",
      "resources": [
        {
          "resource": {
            "name": "binutils",
            "version": "2.35.2-37.el9"
          },
          "checks": [
            {
              "name": "check1",
              "description": "first check"
            },
            {
              "description": "second check"
            },
            {
              "name": "check3",
              "description": "third check"
            }
          ]
        },
        {
          "resource": {
            "name": "dbus",
            "version": "1:1.12.20-7.el9_1"
          },
          "checks": [
            {
              "description": "second check"
            }
          ]
        },
        {
          "resource": {
            "name": "dmidecode",
            "version": "1:3.3-7.el9"
          },
          "checks": [
            {
              "name": "check1",
              "description": "first check"
            },
            {
              "description": "second check"
            }
          ]
        },
        {
          "resource": {
            "name": "gcc",
            "version": "11.3.1-4.3.el9"
          },
          "checks": []
        }
      ],
      "summary": null
    }
    

    Demo


    Alternatively (instead of updating with |=), use a constructive approach, and directly set everything you want to be contained (with using map on arrays):

    {
      image, os, "resources": .resources | map({
        "resource": .resource | with_entries(select(IN(.key; "name", "version"))),
        "checks": .checks | map(with_entries(select(IN(.key; "name", "description"))))
      }), summary
    }
    

    Demo

    Login or Signup to reply.
  2. As an alternative, you could construct the objects by picking out the properties you want to keep. Then filter down at each level.

    {image, os, resources, check_summary}
      | .resources[] |= ({resource, checks}
        | .resource |= {name, version}
        | .checks[] |= {name, description}
      )
    

    Or more directly, construct your result picking out the properties at the start.

    {
        image,
        os,
        resources: .resources | map({
            resource: .resource | {name, version},
            checks: .checks | map({name, description})
        }),
        check_summary
    }
    

    These approaches does have the disadvantage however that it will retain non-existent properties and set them to null. You could remove them at the end by deleting those properties that are null.

    | del(.. | select(. == null))
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search