skip to Main Content

I have a json that looks like this

dict = { "a1": { "b1" : 1 , "b2" ; { "c1" : 24, "c2" : 25}, "b3" : { "c3" : 45, "c4" : 1, "c5" : 4} }, "a2" : 4}

i want to give arrays like so
FIRSTS = ["a1"] SECONDS = ["b1", "b3"] THIRDS = ["c3"]

which would print this :
[b1 : 1], [c3 : 45]

i have written this code

message = ""
for first in FIRSTS:
  if first in json_object:
    if isinstance(json_object[first], dict):
      for second in SECONDS:
        if second in json_object[first]:
          if isinstance(json_object[first][second], dict):
            for third in THIRDS:
              if third in json_object[first][second]:
                message = message + f"[{third} : {json_object[first][second][third]}], "
              else:
                message = message + f"[{third} not found], "
          else:
            message = message + f"[{second} : {json_object[first][second]}], "
        else:
          message = message + f"[{second} not found], "
    else:
      message = message + f"[{first} : {json_object[first]}], "
  else:
    message = message + f"[{first} not found], "

print(message[:-2])

But I’d like a better way to do it

EDIT:
Hey i’m editing for clarification, so i want to print the key value pairs, when the value is not a sub json. So in my code i check for every key in FIRSTS, if it’s value is a json, if it is i check if it has a key that is equal to a key in SECONDS, and repeat with THIRDS, and if the value of the key is not a json i print the key value pair.

EDIT2:
Someone asked for an edit on the input, so i wanted to precise the input could be anything even keys that might not appear in the json, that’s why i do all the checking in my code

2

Answers


  1. USE CASE

    inputs:
    FIRSTS = ["a1", "a2"]
    SECONDS = ["b1", "b2", "b3"]
    THIRDS = ["c5", "c1"]
    json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
    
    output:
    [b1 : 1], [c5 not found], [c1 : 24], [c5 : 4], [c1 not found], [a2 : 4]
    
    
    
    inputs:
    FIRSTS = ["a1", "a2"]
    SECONDS = ["b1", "b3"]
    THIRDS = ["c5", "c1"]
    json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
    
    output:
    [b1 : 1], [c5 : 4], [c1 not found], [a2 : 4]
    
    # Get any leave value testing
    inputs:
    FIRSTS = ["a1", "a2"]
    SECONDS = ["b1", "b3"]
    THIRDS = []
    json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
    
    output:
    [a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': 4}}], [a2 : 4]
    
    inputs:
    FIRSTS = ["a1", "a2"]
    SECONDS = []
    THIRDS = []
    json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": 4}}, "a2": 4}
    
    output:
    [a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': 4}}], [a2 : 4]
    
    
    # you can set more leaves, it's more avalibale.
    print(", ".join(get_values(json_object, FIRSTS, SECONDS, THIRDS, FOURTH)))
    inputs:
    FIRSTS = ["a1", "a2"]
    SECONDS = ["b1", "b3"]
    THIRDS = ["c5"]
    FOURTH = ["f1"]
    json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}
    output:
    [b1 : 1], [c5 : {'f1': 99}], [a2 : 4]
    
    print(", ".join(get_values(json_object, FIRSTS, SECONDS, THIRDS, FOURTH)))
    inputs:
    FIRSTS = ["a1", "a2"]
    SECONDS = ["b1", "b3"]
    THIRDS = [""]
    FOURTH = ["f1"]
    json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}
    
    output:
    [b1 : 1], [ not found], [a2 : 4]
    
    
    
    print(", ".join(get_values(json_object, FIRSTS)))
    args is none
    inputs:
    FIRSTS = ["a1", "a2"]
    SECONDS = ["b1", "b3"]
    THIRDS = [""]
    FOURTH = ["f1"]
    json_object = {"a1": {"b1": 1, "b2": {"c1": 24, "c2": 25}, "b3": {"c3": 45, "c4": 1, "c5": {"f1":99}}}, "a2": 4}
    
    output:
    [a1 : {'b1': 1, 'b2': {'c1': 24, 'c2': 25}, 'b3': {'c3': 45, 'c4': 1, 'c5': {'f1': 99}}}], [a2 : 4]
    
    

    CODING

    def get_values(json_object, firsts, *args):
        # avoid args is tuple([]) or none
        now_lowest_level = True if args and list(args)[0] == [] else False
        seconds, next_args = args[0] if args else [], args[1:] if args and len(args[1:]) > 0 else []
    
        result = []
        for first in firsts:
            value = json_object.get(first, None)
            if not value:
                result.append(f"[{first} not found]")
                continue
            if not isinstance(value, dict):
                result.append(f"[{first} : {value}]")
                continue
                
            # if want not to get the lowerest level node, Please write :
            # if not seconds and not now_lowest_level:
            if not seconds and now_lowest_level:
                result.append(f"[{first} : {value}]")
                continue
    
            [result.extend(get_values(value, [second], *next_args)) for second in seconds]
    
        return result
    
    
    print(",".join(get_values(json_object, FIRSTS, SECONDS, THIRDS)))
    
    Login or Signup to reply.
  2. Try to break down your code into separate tasks that are easy to understand and easy to test. In this case, I went with prune_dict and get_leaf_nodes

    from pprint import pprint
    
    def prune_dict(data, *args):
        """
        prune a nested dict
        such that level N only has keys in args[N]
        """
        response = {}
        # separate the current args into "first" and "all the rest"
        # common in functional programming for recursion
        first, rest = args[:1], args[1:]
        # use set just in case you have really big data
        # to avoid possible big-O problems
        accepted = set(*first)
        for key, value in data.items():
            if key in accepted:
                if isinstance(value, dict):
                    recursive_value = prune_dict(value, *rest)
                    if recursive_value:
                        response[key] = recursive_value
                else:
                    response[key] = value
        return response
    
    def get_leaf_nodes(data: dict):
        """
        traverse a nested dict
        yield key, value pairs of the leaf nodes only
        """
        for key, value in data.items():
            if isinstance(value, dict):
                yield from get_leaf_nodes(value)
            else:
                yield key, value
    
    data = {
        "a1": {
            "b1": 1,
            "b2": {
                "c1": 24,
                "c2": 25
            },
            "b3": {
                "c3": 45,
                "c4": 1,
                "c5": 4
            }
        },
        "a2": 4
    }
    
    pruned = prune_dict(data,
                        ('a1', 'xx', ),
                        ('b1', 'b3', 'xx', ),
                        ('c3', 'xx', ))
    print('Just the pruned data:')
    pprint(pruned)
    print()
    
    # yielding just the keys and values
    # allows customized print formatting
    # (since the expected output was weird)
    print('Custom format of the pruned leaf nodes:')
    messages = []
    for k, v in get_leaf_nodes(pruned):
        messages.append(f'[{k} : {v}]')
    print(', '.join(messages))
    print()
    
    # Be advised that leaf node keys are NOT guaranteed to be unique.
    # That is only guaranteed within one dict, not other (nested) dicts.
    my_filters = [
        ['a1', 'xx'],
        ['b1', 'b3', 'xx'],
        ['c3', 'xx']]
    
    print('Another way to return or print the requested data:')
    pprint(dict(get_leaf_nodes(prune_dict(data, *my_filters))))
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search