skip to Main Content

I have this Dockerfile:

FROM python:3.12.4-slim-bookworm

COPY ./python_bench.py ./pystone.py performance.py /home/

CMD [ "/bin/bash" ]

this is pystone.py:

#! /usr/bin/env python3

"""
"PYSTONE" Benchmark Program

Version:        Python/1.1 (corresponds to C/1.1 plus 2 Pystone fixes)

Author:         Reinhold P. Weicker,  CACM Vol 27, No 10, 10/84 pg. 1013.

                Translated from ADA to C by Rick Richardson.
                Every method to preserve ADA-likeness has been used,
                at the expense of C-ness.

                Translated from C to Python by Guido van Rossum.

Version History:

                Version 1.1 corrects two bugs in version 1.0:

                First, it leaked memory: in Proc1(), NextRecord ends
                up having a pointer to itself.  I have corrected this
                by zapping NextRecord.PtrComp at the end of Proc1().

                Second, Proc3() used the operator != to compare a
                record to None.  This is rather inefficient and not
                true to the intention of the original benchmark (where
                a pointer comparison to None is intended; the !=
                operator attempts to find a method __cmp__ to do value
                comparison of the record).  Version 1.1 runs 5-10
                percent faster than version 1.0, so benchmark figures
                of different versions can't be compared directly.

"""

LOOPS = 50000

from time import perf_counter

__version__ = "1.1"

[Ident1, Ident2, Ident3, Ident4, Ident5] = range(1, 6)

class Record:

    def __init__(self, PtrComp = None, Discr = 0, EnumComp = 0,
                       IntComp = 0, StringComp = 0):
        self.PtrComp = PtrComp
        self.Discr = Discr
        self.EnumComp = EnumComp
        self.IntComp = IntComp
        self.StringComp = StringComp

    def copy(self):
        return Record(self.PtrComp, self.Discr, self.EnumComp,
                      self.IntComp, self.StringComp)

TRUE = 1
FALSE = 0

def main(loops=LOOPS):
    benchtime, stones = pystones(loops)
    print("Pystone(%s) time for %d passes = %g" % 
          (__version__, loops, benchtime))
    print("This machine benchmarks at %g pystones/second" % stones)


def pystones(loops=LOOPS):
    return Proc0(loops)

IntGlob = 0
BoolGlob = FALSE
Char1Glob = ''
Char2Glob = ''
Array1Glob = [0]*51
Array2Glob = [x[:] for x in [Array1Glob]*51]
PtrGlb = None
PtrGlbNext = None

def Proc0(loops=LOOPS):
    global IntGlob
    global BoolGlob
    global Char1Glob
    global Char2Glob
    global Array1Glob
    global Array2Glob
    global PtrGlb
    global PtrGlbNext

    starttime = perf_counter()
    for i in range(loops):
        pass
    nulltime = perf_counter() - starttime

    PtrGlbNext = Record()
    PtrGlb = Record()
    PtrGlb.PtrComp = PtrGlbNext
    PtrGlb.Discr = Ident1
    PtrGlb.EnumComp = Ident3
    PtrGlb.IntComp = 40
    PtrGlb.StringComp = "DHRYSTONE PROGRAM, SOME STRING"
    String1Loc = "DHRYSTONE PROGRAM, 1'ST STRING"
    Array2Glob[8][7] = 10

    starttime = perf_counter()

    for i in range(loops):
        Proc5()
        Proc4()
        IntLoc1 = 2
        IntLoc2 = 3
        String2Loc = "DHRYSTONE PROGRAM, 2'ND STRING"
        EnumLoc = Ident2
        BoolGlob = not Func2(String1Loc, String2Loc)
        while IntLoc1 < IntLoc2:
            IntLoc3 = 5 * IntLoc1 - IntLoc2
            IntLoc3 = Proc7(IntLoc1, IntLoc2)
            IntLoc1 = IntLoc1 + 1
        Proc8(Array1Glob, Array2Glob, IntLoc1, IntLoc3)
        PtrGlb = Proc1(PtrGlb)
        CharIndex = 'A'
        while CharIndex <= Char2Glob:
            if EnumLoc == Func1(CharIndex, 'C'):
                EnumLoc = Proc6(Ident1)
            CharIndex = chr(ord(CharIndex)+1)
        IntLoc3 = IntLoc2 * IntLoc1
        IntLoc2 = IntLoc3 / IntLoc1
        IntLoc2 = 7 * (IntLoc3 - IntLoc2) - IntLoc1
        IntLoc1 = Proc2(IntLoc1)

    benchtime = perf_counter() - starttime - nulltime
    if benchtime == 0.0:
        loopsPerBenchtime = 0.0
    else:
        loopsPerBenchtime = (loops / benchtime)
    return benchtime, loopsPerBenchtime

def Proc1(PtrParIn):
    PtrParIn.PtrComp = NextRecord = PtrGlb.copy()
    PtrParIn.IntComp = 5
    NextRecord.IntComp = PtrParIn.IntComp
    NextRecord.PtrComp = PtrParIn.PtrComp
    NextRecord.PtrComp = Proc3(NextRecord.PtrComp)
    if NextRecord.Discr == Ident1:
        NextRecord.IntComp = 6
        NextRecord.EnumComp = Proc6(PtrParIn.EnumComp)
        NextRecord.PtrComp = PtrGlb.PtrComp
        NextRecord.IntComp = Proc7(NextRecord.IntComp, 10)
    else:
        PtrParIn = NextRecord.copy()
    NextRecord.PtrComp = None
    return PtrParIn

def Proc2(IntParIO):
    IntLoc = IntParIO + 10
    while 1:
        if Char1Glob == 'A':
            IntLoc = IntLoc - 1
            IntParIO = IntLoc - IntGlob
            EnumLoc = Ident1
        if EnumLoc == Ident1:
            break
    return IntParIO

def Proc3(PtrParOut):
    global IntGlob

    if PtrGlb is not None:
        PtrParOut = PtrGlb.PtrComp
    else:
        IntGlob = 100
    PtrGlb.IntComp = Proc7(10, IntGlob)
    return PtrParOut

def Proc4():
    global Char2Glob

    BoolLoc = Char1Glob == 'A'
    BoolLoc = BoolLoc or BoolGlob
    Char2Glob = 'B'

def Proc5():
    global Char1Glob
    global BoolGlob

    Char1Glob = 'A'
    BoolGlob = FALSE

def Proc6(EnumParIn):
    EnumParOut = EnumParIn
    if not Func3(EnumParIn):
        EnumParOut = Ident4
    if EnumParIn == Ident1:
        EnumParOut = Ident1
    elif EnumParIn == Ident2:
        if IntGlob > 100:
            EnumParOut = Ident1
        else:
            EnumParOut = Ident4
    elif EnumParIn == Ident3:
        EnumParOut = Ident2
    elif EnumParIn == Ident4:
        pass
    elif EnumParIn == Ident5:
        EnumParOut = Ident3
    return EnumParOut

def Proc7(IntParI1, IntParI2):
    IntLoc = IntParI1 + 2
    IntParOut = IntParI2 + IntLoc
    return IntParOut

def Proc8(Array1Par, Array2Par, IntParI1, IntParI2):
    global IntGlob

    IntLoc = IntParI1 + 5
    Array1Par[IntLoc] = IntParI2
    Array1Par[IntLoc+1] = Array1Par[IntLoc]
    Array1Par[IntLoc+30] = IntLoc
    for IntIndex in range(IntLoc, IntLoc+2):
        Array2Par[IntLoc][IntIndex] = IntLoc
    Array2Par[IntLoc][IntLoc-1] = Array2Par[IntLoc][IntLoc-1] + 1
    Array2Par[IntLoc+20][IntLoc] = Array1Par[IntLoc]
    IntGlob = 5

def Func1(CharPar1, CharPar2):
    CharLoc1 = CharPar1
    CharLoc2 = CharLoc1
    if CharLoc2 != CharPar2:
        return Ident1
    else:
        return Ident2

def Func2(StrParI1, StrParI2):
    IntLoc = 1
    while IntLoc <= 1:
        if Func1(StrParI1[IntLoc], StrParI2[IntLoc+1]) == Ident1:
            CharLoc = 'A'
            IntLoc = IntLoc + 1
    if CharLoc >= 'W' and CharLoc <= 'Z':
        IntLoc = 7
    if CharLoc == 'X':
        return TRUE
    else:
        if StrParI1 > StrParI2:
            IntLoc = IntLoc + 7
            return TRUE
        else:
            return FALSE

def Func3(EnumParIn):
    EnumLoc = EnumParIn
    if EnumLoc == Ident3: return TRUE
    return FALSE

if __name__ == '__main__':
    import sys
    def error(msg):
        print(msg, end=' ', file=sys.stderr)
        print("usage: %s [number_of_loops]" % sys.argv[0], file=sys.stderr)
        sys.exit(100)
    nargs = len(sys.argv) - 1
    if nargs > 1:
        error("%d arguments are too many;" % nargs)
    elif nargs == 1:
        try: loops = int(sys.argv[1])
        except ValueError:
            error("Invalid argument %r;" % sys.argv[1])
    else:
        loops = LOOPS
    main(loops)

this is python_bench.py:

#!/usr/bin/env python3

import math
from time import time

N = range(10)
N_i = range(1_000)
N_j = range(1_000)
x = 1

start = time()
for _ in N:
    for i in N_i:
        for j in N_j:
            x += -1**j * math.sqrt(i)/max(j,2)
stop = time()
print(stop-start)

this is performance.py: 
from multiprocessing import Pool, cpu_count
from datetime import datetime

def stress_test(args):
    cpu, value = args
    start_time = datetime.now()
    for i in range(value):
        value = value * i
    print(f"cpu: {cpu} time: {datetime.now() - start_time}")

if __name__ == '__main__':
    start_time = datetime.now()
    cpu_count = cpu_count()
    with Pool(cpu_count) as mp_pool:
        mp_pool.map(stress_test, [(cpu, 100000000) for cpu in range(cpu_count)])
    print(f"total: {datetime.now() - start_time}")

the Host is linux laptop with Ubuntu 22.04.4 LTS.

python on host:

$ file /usr/bin/python3.12
 ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=6c2ae97f8a91e9bdc027a8d877e9abef191aa422, for GNU/Linux 3.2.0, stripped

python on container:

$ file  /usr/local/bin/python3.12 

> ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=e1466a54058de9be791ef96f61c8e185388684eb, for GNU/Linux 3.2.0, stripped

Results

performance.py:

host:

cpu: 9 time: 0:00:02.379246
cpu: 3 time: 0:00:02.387882
cpu: 5 time: 0:00:02.388341
cpu: 6 time: 0:00:02.393504
cpu: 10 time: 0:00:02.400015
cpu: 8 time: 0:00:02.411345
cpu: 7 time: 0:00:02.412351
cpu: 4 time: 0:00:02.449361
cpu: 0 time: 0:00:02.666412
cpu: 11 time: 0:00:02.671237
cpu: 2 time: 0:00:02.763631
cpu: 1 time: 0:00:02.775081
total: 0:00:02.790534

Container:

cpu: 3 time: 0:00:03.275465
cpu: 4 time: 0:00:03.280247
cpu: 2 time: 0:00:03.280876
cpu: 11 time: 0:00:03.362792
cpu: 7 time: 0:00:03.365540
cpu: 8 time: 0:00:03.374378
cpu: 6 time: 0:00:03.377563
cpu: 9 time: 0:00:03.380101
cpu: 1 time: 0:00:03.574691
cpu: 0 time: 0:00:03.673995
cpu: 10 time: 0:00:03.773611
cpu: 5 time: 0:00:03.783492
total: 0:00:03.802251

pystone.py:

host:

Pystone(1.1) time for 50000 passes = 0.0531104
This machine benchmarks at 941435 pystones/second

container:

Pystone(1.1) time for 50000 passes = 0.062226
This machine benchmarks at 803522 pystones/second

python_bench.py:

host:

1.9780004024505615

container:

3.0173771381378174

Docker info

Client: Docker Engine - Community
 Version:    27.1.1
 Context:    default
 Debug Mode: false
 Plugins:
  buildx: Docker Buildx (Docker Inc.)
    Version:  v0.16.1
    Path:     /usr/libexec/docker/cli-plugins/docker-buildx
  compose: Docker Compose (Docker Inc.)
    Version:  v2.29.1
    Path:     /usr/libexec/docker/cli-plugins/docker-compose

Server:
 Containers: 6
  Running: 4
  Paused: 0
  Stopped: 2
 Images: 24
 Server Version: 27.1.1
 Storage Driver: overlay2
  Backing Filesystem: extfs
  Supports d_type: true
  Using metacopy: false
  Native Overlay Diff: true
  userxattr: false
 Logging Driver: json-file
 Cgroup Driver: systemd
 Cgroup Version: 2
 Plugins:
  Volume: local
  Network: bridge host ipvlan macvlan null overlay
  Log: awslogs fluentd gcplogs gelf journald json-file local splunk syslog
 Swarm: inactive
 Runtimes: runc io.containerd.runc.v2
 Default Runtime: runc
 Init Binary: docker-init
 containerd version: 2bf793ef6dc9a18e00cb12efb64355c2c9d5eb41
 runc version: v1.1.13-0-g58aa920
 init version: de40ad0
 Security Options:
  apparmor
  seccomp
   Profile: builtin
  cgroupns
 Kernel Version: 6.5.0-45-generic
 Operating System: Ubuntu 22.04.4 LTS
 OSType: linux
 Architecture: x86_64
 CPUs: 12
 Total Memory: 15.23GiB
 Name: farhad-LIFEBOOK-E5512
 ID: 28068456-89b2-497e-8c89-da5bf898b96a
 Docker Root Dir: /var/lib/docker
 Debug Mode: false
 Experimental: false
 Insecure Registries:
  127.0.0.0/8
 Live Restore Enabled: false
 $ lsb_release  -a 

No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 22.04.4 LTS
Release: 22.04
Codename: jammy

$ uname -r

6.5.0-45-generic

first I ran all 3 on consecutively on host and then on container. why the results are much lower than on the host? specially by pystone.py and the last script?

2

Answers


  1. Could it be container overhead? Meaning python+local machine runs faster than docker+python+local machine. If not, then maybe check for CPU limits? By default, there should be none.

    docker inspect <container_id>
    

    Look for CpuShares, CpuQuota, and/or CpuPeriod.
    You can also run docker stats and check if memory is limited.

    Login or Signup to reply.
  2. Ensure that your Docker container is allowed to use enough CPU and memory. You can check this by running:

    docker run --cpus=4 --memory=8g <your_image>

    And then see if there is any performance differences and tweak around this.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search