skip to Main Content

I’d like to import the output of:

kubectl get pods -o json

into a python pandas dataframe. This should contain also all containers and there resource request and limits.

My code starts as follows:

import json
import numpy as np
import pandas as pd
import os
pods_raw = os.popen('kubectl get pods -o json').read()
pods_json = json.loads(pods_raw)['items']

from here on I struggle to get the data in a correct way in a dataframe, especially the ‘spec.containers’ should be split up when multiple containers exist.

2

Answers


  1. Chosen as BEST ANSWER

    Currently I have the following code to solve this:

    #!/usr/bin/env python
    
    import json
    import pandas as pd
    import os
    
    kb = 1024
    mb = kb * kb
    gb = mb * kb
    tb = gb * kb
    
    def main():
        pods_raw = os.popen('kubectl get pods -A -o json').read()
        pods_json = json.loads(pods_raw)['items']
        first_split = ['status','metadata','spec']
        second_split = ['spec.containers','spec.containers.resources',"spec.containers.resources.limits","spec.containers.resources.requests"]
        df_pods = pd.DataFrame.from_dict(pods_json)
    
        df_pods = concat_data(df_pods, first_split)
    
        df_pods = expand_data(df_pods, ['spec.containers'])
    
        df_pods = concat_data(df_pods, second_split)
        df_pods.index
        df_pods.index.name='index'
        col_to_normalize = ['spec.containers.resources.limits.cpu',
                            'spec.containers.resources.limits.memory',
                            'spec.containers.resources.requests.cpu',
                            'spec.containers.resources.requests.memory']
    
        for col_name in col_to_normalize:
            df_pods[col_name] = df_pods[col_name].map(normalize_values)
        df_pods[col_to_normalize] = df_pods.groupby('index')[col_to_normalize].sum()
        df_pods = df_pods.drop_duplicates(['metadata.name'])
        df_pods[df_pods['status.phase'] == 'Running']
    
        print(df_pods)
    
    
    def concat_data(df: pd.DataFrame, expands: list) -> pd.DataFrame:
        for expantion in expands:
            # df = pd.concat( [df, df.pop(expantion).apply(pd.Series).add_prefix(f"{expantion}.")], axis=1)
            df = pd.concat( [df, df.pop(expantion).apply(pd.Series).add_prefix(f"{expantion}.")], axis=1)
        return df
    
    def expand_data(df: pd.DataFrame, expands: list) -> pd.DataFrame:
        for expantion in expands:
            s = df[expantion].apply(pd.Series).stack()
            s.index = s.index.droplevel(-1)
            s.index
            df.index = [x for x in df.index]
            del df[expantion]
            s.name = expantion
            df=df.join(s)
        return df
    
    def normalize_values(val: str) -> int:
        try:
            if val[-1] == 'm':
                return int(val[:-1]) / 1000
            if val[-2].lower() == "k":
                return int(val[:-2]) * kb
            if val[-2].lower() == "m":
                return int(val[:-2]) * mb
            if val[-2].lower() == "g":
                return int(val[:-2]) * gb
            if val[-2].lower() == "t":
                return int(val[:-2]) * tb
            return int(val)
        except:
            return 0
    
    if __name__ == '__main__':
        main()
    

    This works fine except for the following FutureWarning I get and don't know how to solve this yet:

    ./resources.py:43: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
    

    df = pd.concat( [df, df.pop(expantion).apply(pd.Series).add_prefix(f"{expantion}.")], axis=1)


  2. Here is an example how you can extract the data of interest to the dataframe. The output is only an example (as you didn’t specify the required output in the question):

    import json
    import pandas as pd
    
    # open the Json data from file (or use os.popen):
    with open("data.json", "r") as f_in:
        data = json.load(f_in)
    
    df = pd.DataFrame(data["items"])
    
    # metadata:
    df = pd.concat(
        [df, df.pop("metadata").apply(pd.Series).add_prefix("meta_")], axis=1
    )
    
    # spec:
    df = pd.concat(
        [df, df.pop("spec").apply(pd.Series).add_prefix("spec_")], axis=1
    )
    
    # status:
    df = pd.concat(
        [df, df.pop("status").apply(pd.Series).add_prefix("status_")], axis=1
    )
    
    
    # keep only columns of interests:
    df = df[["meta_name", "meta_namespace", "status_phase", "spec_containers"]]
    
    # explode spec_containers column
    df = df.explode("spec_containers")
    df = pd.concat(
        [
            df,
            df.pop("spec_containers")
            .apply(pd.Series)
            .add_prefix("spec_")[["spec_image", "spec_name"]],
        ],
        axis=1,
    )
    
    
    print(df)
    

    Prints:

                                            meta_name meta_namespace status_phase                                                                spec_image                  spec_name
    0                      apache-lb-648c5cb8cb-mw5zh        default      Running                                                                     httpd                     apache
    0                      apache-lb-648c5cb8cb-mw5zh        default      Running                                      index.docker.io/istio/proxyv2:1.13.4                istio-proxy
    1                          csi-cephfsplugin-fc79l        default      Running  rocks.canonical.com:443/cdk/sig-storage/csi-node-driver-registrar:v2.0.1           driver-registrar
    1                          csi-cephfsplugin-fc79l        default      Running                        rocks.canonical.com:443/cdk/cephcsi/cephcsi:v3.3.1           csi-cephfsplugin
    1                          csi-cephfsplugin-fc79l        default      Running                        rocks.canonical.com:443/cdk/cephcsi/cephcsi:v3.3.1        liveness-prometheus
    
    
    ...and so on.
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search