skip to Main Content

I have been spending hours to convert the space indented text to JSON. In my raw text data, nested values are indented by 4 spaces and key:value are indented by 2 spaces. I am trying to make a JSON out of them.

My attempt

import json
from pprint import pprint

lines = raw_text.strip().split('n')
result = {}
stack = [(0, result)]

for line in lines:
    indent, content = line.split(':', 1)
    indent_level = len(indent) - len(indent.lstrip())
    key = content.strip()
    value = None if key == "null" else {}

    print(indent_level, stack[-1][0])

    while indent_level <= stack[-1][0]:
        stack.pop()

    parent = stack[-1][1] # i am getting index error
    parent[key] = value
    stack.append((indent_level, value))

j = json.dumps(result, indent=4)

Raw data

text = r"""
h0: 
    h1: de8
    h1b: null
    h1c: 
        h2: 
          h3: A
          h3a: S
          h3b: 
              h4: 
                h5: 81
    h1b:
      h1bi: null
      h1bii: null
"""

Required

j = {
    "h0": {
        "h1a": "de8",
        "h1b": "null",
        "h1c": {
            "h2a": {
                "h3a": "A",
                "h3b": "S",
                "h3c": {
                    "h4a": {
                        "h5a": "81"
                    }
                }
            }
        },
        "h1d": {
            "h1di": "null",
            "h1dii": {
                "h2di": "hello",
                "h2dii": "hi"
            }
        }
    }
}

3

Answers


  1. Chosen as BEST ANSWER

    As per the suggestion of @DasaniT, this worked for me

    text = r"""
    h0: 
        h1a: de8
        h1b: null
        h1c: 
            h2a: 
              h3a: A
              h3b: S
              h3c: 
                  h4a: 
                    h5a: 81
        h1d:
          h1di: null
          h1dii:
            h2di: hello
            h2dii: hi
    """
    
    import yaml
    
    j = yaml.safe_load(text)
    
    print(j)
    

  2. You can use the yaml package: https://pypi.org/project/PyYAML/

    The code will look like something like this:

    import yaml
    
    text = "<the_text_you_want_to_convert_to_json>"
    json = yaml.safe_load(text)
    print(json)
    

    and the output is:

    {'h0': {'h1': 'de8', 'h1b': {'h1bi': None, 'h1bii': None}, 'h1c': {'h2': {'h3': 'A', 'h3a': 'S', 'h3b': {'h4': {'h5': 81}}}}}}
    
    Login or Signup to reply.
  3. Here you go (without external lib)

    import pprint
    
    text = r"""
    h0: 
        h1: de8
        h1b: null
        h1c: 
            h2: 
              h3: A
              h3a: S
              h3b: 
                  h4: 
                    h5: 81
        h1b:
          h1bi: null
          h1bii: null
    """
    
    
    def add_inner_dict(the_dict, keys):
        for k in keys[:-1]:
            the_dict = the_dict[k]
        d = {}
        the_dict[keys[-1]] = d
        return d
    
    
    data = {}
    current_dict = None
    lines = text.split('n')
    key = None
    path = []
    for line in lines:
        stripped_line = line.strip()
        entries = stripped_line.split(':')
        if len(entries) == 2 and entries[1]:
            current_dict[entries[0]] = entries[1].strip()
        else:
            if entries[0].strip():
                key = entries[0]
                path.append(key)
                current_dict = add_inner_dict(data, path)
    
    pprint.pprint(data)
    

    output

    {'h0': {'h1': 'de8',
            'h1b': 'null',
            'h1c': {'h2': {'h3': 'A',
                           'h3a': 'S',
                           'h3b': {'h4': {'h1b': {'h1bi': 'null', 'h1bii': 'null'},
                                          'h5': '81'}}}}}}
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search