skip to Main Content

Existing Code

import json
filename = 'thunar-volman/debian/control'
dict1 = {}
with open(filename) as fh:
    for line in fh:
        print(line)
        command, description = line.strip().split(': ')
        print(command)
        print(description)
        dict1[command.strip()] = description.strip()

with open("test.json", "w") as out_file:
    json.dump(dict1, out_file, indent=4, sort_keys = False)


Error

Build-Depends
debhelper-compat (= 13),
               intltool,

Traceback (most recent call last):
  File "read.py", line 7, in <module>
    command, description = line.strip().split(': ')
ValueError: not enough values to unpack (expected 2, got 1)

The text file I am intending to process to json is here – https://salsa.debian.org/xfce-team/goodies/thunar-volman/-/blob/debian/master/debian/control

How can I process the content such that the content behind the colon of Build-Depends would be processed as the description for the Build-Depends command.

Any help would be very much appreciated as I am very new to json.

6

Answers


  1. Your file i’ts yaml

    For work with yaml files you need library ruamel.yaml

    Install

    pip install ruamel.yaml
    

    Load file and convert to json and write json

    from ruamel.yaml import YAML
    import json
    yaml=YAML(typ='safe')   # default, if not specfied, is 'rt' (round-trip)
    filename = 'thunar-volman/debian/control'
    dict1 = {}
    with open(filename) as fh:
        dict1 = yaml.load(filename)
    with open("test.json", "w") as out_file:
        json.dump(dict1, out_file, indent=4, sort_keys = False)
    
    Login or Signup to reply.
  2. You can do this:

    import json
    filename = 'thunar-volman/debian/control'
    dict1 = {}
    command = ""
    description = ""
    with open(filename) as fh:
        for line in fh:
            print(line)
            if line[0] == " ":
                description = line.strip()
                dict1[command.strip()].append(description.strip())
            else:
                command, description = line.strip().split(': ')
                dict1[command.strip()] = description.strip()
    
    with open("test.json", "w") as out_file:
        json.dump(dict1, out_file, indent=4, sort_keys = False)
    
    
    

    So basically if there is a white space in the start then it is going to append in the previous command, the new item.

    Login or Signup to reply.
  3. use maxsplit of Python String split() Method.

    from pathlib import Path
    import json
    
    
    with Path('control').open() as file:   # be sure to adjust your path
        commands, description = {}, ''
        for line in file:
            if not line.strip(): continue  # if line is blank continue loop
            if ':' in line:
                # use 'maxsplit=1' to split only the first occurance of ':'
                command, description = list(map(str.strip, line.split(':', maxsplit=1)))
            else:
                # if not ':' in line append line to description
                description += ' %s' % line.strip()
            commands[command] = description
    
    with Path('test.json') as file: file.write_text(json.dumps(commands, indent=4))
    

    output:

    {
        "Source": "thunar-volman",
        "Section": "xfce",
        "Priority": "optional",
        "Maintainer": "Debian Xfce Maintainers <[email protected]>",
        "Uploaders": "Yves-Alexis Perez <[email protected]>",
        "Build-Depends": "debhelper-compat (= 13), intltool, libexo-2-dev, libgtk-3-dev, libgudev-1.0-dev, libnotify-dev, libxfce4ui-2-dev, libxfce4util-dev, libxfconf-0-dev, xfce4-dev-tools (>= 4.16)",
        "Rules-Requires-Root": "no",
        "Standards-Version": "4.6.1",
        "Vcs-Git": "https://salsa.debian.org/xfce-team/goodies/thunar-volman.git",
        "Vcs-Browser": "https://salsa.debian.org/xfce-team/goodies/thunar-volman",
        "Homepage": "https://docs.xfce.org/xfce/thunar/thunar-volman",
        "Package": "thunar-volman",
        "Architecture": "linux-any",
        "Depends": "exo-utils, thunar, ${misc:Depends}, ${shlibs:Depends}",
        "Description": "Thunar extension for volumes management The Thunar Volume Manager is an extension for the Thunar file manager, which enables automatic management of removable drives and media."
    }
    
    Login or Signup to reply.
  4. Open the file and read it line by line. Ignore blank lines. Split on colon checking number of tokens. Ensure sanity of the input data

    from json import dumps
    
    CONTROL = '/Volumes/G-Drive/control'
    
    jdata = {}
    
    with open(CONTROL) as control:
        previous_key = None
        for line in control:
            if len(sline := line.strip()) > 0: # make sure to skip blank lines
                if line[0].isspace():
                    if previous_key is not None:
                        # you may not want the newline prefix
                        jdata[previous_key] += 'n' + sline
                    else:
                        raise ValueError('Line has leading whitespace but no previous keyword')
                elif len(tokens := sline.split(':', 1)) == 2: # note second argument to split()
                    # looks like a normal keyword and value
                    key, value = tokens
                    jdata[key] = value.lstrip()
                    previous_key = key
                else:
                    raise ValueError(f'Cannot understand "{line.rstrip()}"')
    
    print(dumps(jdata, indent=2))
    

    Output:

    {
      "Source": "thunar-volman",
      "Section": "xfce",
      "Priority": "optional",
      "Maintainer": "Debian Xfce Maintainers <[email protected]>",
      "Uploaders": "Yves-Alexis Perez <[email protected]>",
      "Build-Depends": "debhelper-compat (= 13),nintltool,nlibexo-2-dev,nlibgtk-3-dev,nlibgudev-1.0-dev,nlibnotify-dev,nlibxfce4ui-2-dev,nlibxfce4util-dev,nlibxfconf-0-dev,nxfce4-dev-tools (>= 4.16)",
      "Rules-Requires-Root": "no",
      "Standards-Version": "4.6.1",
      "Vcs-Git": "https://salsa.debian.org/xfce-team/goodies/thunar-volman.git",
      "Vcs-Browser": "https://salsa.debian.org/xfce-team/goodies/thunar-volman",
      "Homepage": "https://docs.xfce.org/xfce/thunar/thunar-volman",
      "Package": "thunar-volman",
      "Architecture": "linux-any",
      "Depends": "exo-utils, thunar, ${misc:Depends}, ${shlibs:Depends}",
      "Description": "Thunar extension for volumes managementnThe Thunar Volume Manager is an extension for the Thunar file manager, whichnenables automatic management of removable drives and media."
    }
    
    Login or Signup to reply.
  5. Your problem is very simple, I wrote working code for it in under five minutes, in one go.

    You have some lines representing a mapping, the lines can contain colons and a line that contains a colon indicates the start of a new key value pair.

    The key is on the left side of the colon and the value can span multiple lines.

    We can assign a variable named key, set it to initially None. We then loop through the lines, for each line, if we found a colon and the first character is not space, we have found a new key value pair.

    We add the previous key value pair to the dictionary if key is not None. We then set the current key value pair to be remembered, and use them in later iterations.

    And then if the line is not empty and not the start of a new pair, it is the continuation of the previous value, we add it to the previous value.

    In this way we can process all items correctly, but we will miss the last item.

    We can add it later.

    Code:

    import json
    
    lines = """Source: thunar-volman
    Section: xfce
    Priority: optional
    Maintainer: Debian Xfce Maintainers <[email protected]>
    Uploaders: Yves-Alexis Perez <[email protected]>
    Build-Depends: debhelper-compat (= 13),
                   intltool,
                   libexo-2-dev,
                   libgtk-3-dev,
                   libgudev-1.0-dev,
                   libnotify-dev,
                   libxfce4ui-2-dev,
                   libxfce4util-dev,
                   libxfconf-0-dev,
                   xfce4-dev-tools (>= 4.16)
    Rules-Requires-Root: no
    Standards-Version: 4.6.1
    Vcs-Git: https://salsa.debian.org/xfce-team/goodies/thunar-volman.git
    Vcs-Browser: https://salsa.debian.org/xfce-team/goodies/thunar-volman
    Homepage: https://docs.xfce.org/xfce/thunar/thunar-volman
    
    Package: thunar-volman
    Architecture: linux-any
    Depends: exo-utils, thunar, ${misc:Depends}, ${shlibs:Depends}
    Description: Thunar extension for volumes management
     The Thunar Volume Manager is an extension for the Thunar file manager, which
     enables automatic management of removable drives and media.
    """.splitlines()
    
    key = None
    dic = {}
    for line in lines:
        line = line.strip()
        if ':' in line and line[0] != ' ':
            if key:
                dic[key] = cache[0] if len(cache) == 1 else cache
            key, cache = line.split(':', 1)
            key, cache = key.strip(), [cache.strip()]
        elif line:
            cache.append(line)
    
    dic[key] = cache[0] if len(cache) == 1 else cache
    print(json.dumps(dic, indent=4, ensure_ascii=False))
    
    {
        "Source": "thunar-volman",
        "Section": "xfce",
        "Priority": "optional",
        "Maintainer": "Debian Xfce Maintainers <[email protected]>",
        "Uploaders": "Yves-Alexis Perez <[email protected]>",
        "Build-Depends": [
            "debhelper-compat (= 13),",
            "intltool,",
            "libexo-2-dev,",
            "libgtk-3-dev,",
            "libgudev-1.0-dev,",
            "libnotify-dev,",
            "libxfce4ui-2-dev,",
            "libxfce4util-dev,",
            "libxfconf-0-dev,",
            "xfce4-dev-tools (>= 4.16)"
        ],
        "Rules-Requires-Root": "no",
        "Standards-Version": "4.6.1",
        "Vcs-Git": "https://salsa.debian.org/xfce-team/goodies/thunar-volman.git",
        "Vcs-Browser": "https://salsa.debian.org/xfce-team/goodies/thunar-volman",
        "Homepage": "https://docs.xfce.org/xfce/thunar/thunar-volman",
        "Package": "thunar-volman",
        "Architecture": "linux-any",
        "Depends": "exo-utils, thunar, ${misc:Depends}, ${shlibs:Depends}",
        "Description": [
            "Thunar extension for volumes management",
            "The Thunar Volume Manager is an extension for the Thunar file manager, which",
            "enables automatic management of removable drives and media."
        ]
    }
    
    Login or Signup to reply.
  6. You can use re (text contains the string from your question) (regex101):

    import re
    
    out = dict(re.findall(r"^(S+):s*(.*?)(?=^S*:|Z)", text, flags=re.M | re.S))
    print(out)
    

    Prints:

    {
        "Source": "thunar-volmann",
        "Section": "xfcen",
        "Priority": "optionaln",
        "Maintainer": "Debian Xfce Maintainers <[email protected]>n",
        "Uploaders": "Yves-Alexis Perez <[email protected]>n",
        "Build-Depends": "debhelper-compat (= 13),n               intltool,n               libexo-2-dev,n               libgtk-3-dev,n               libgudev-1.0-dev,n               libnotify-dev,n               libxfce4ui-2-dev,n               libxfce4util-dev,n               libxfconf-0-dev,n               xfce4-dev-tools (>= 4.16)n",
        "Rules-Requires-Root": "non",
        "Standards-Version": "4.6.1n",
        "Vcs-Git": "https://salsa.debian.org/xfce-team/goodies/thunar-volman.gitn",
        "Vcs-Browser": "https://salsa.debian.org/xfce-team/goodies/thunar-volmann",
        "Homepage": "https://docs.xfce.org/xfce/thunar/thunar-volmannn",
        "Package": "thunar-volmann",
        "Architecture": "linux-anyn",
        "Depends": "exo-utils, thunar, ${misc:Depends}, ${shlibs:Depends}n",
        "Description": "Thunar extension for volumes managementn The Thunar Volume Manager is an extension for the Thunar file manager, whichn enables automatic management of removable drives and media.",
    }
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search