I have this Dockerfile
:
FROM python:3.12.4-slim-bookworm
COPY ./python_bench.py ./pystone.py performance.py /home/
CMD [ "/bin/bash" ]
this is pystone.py
:
#! /usr/bin/env python3
"""
"PYSTONE" Benchmark Program
Version: Python/1.1 (corresponds to C/1.1 plus 2 Pystone fixes)
Author: Reinhold P. Weicker, CACM Vol 27, No 10, 10/84 pg. 1013.
Translated from ADA to C by Rick Richardson.
Every method to preserve ADA-likeness has been used,
at the expense of C-ness.
Translated from C to Python by Guido van Rossum.
Version History:
Version 1.1 corrects two bugs in version 1.0:
First, it leaked memory: in Proc1(), NextRecord ends
up having a pointer to itself. I have corrected this
by zapping NextRecord.PtrComp at the end of Proc1().
Second, Proc3() used the operator != to compare a
record to None. This is rather inefficient and not
true to the intention of the original benchmark (where
a pointer comparison to None is intended; the !=
operator attempts to find a method __cmp__ to do value
comparison of the record). Version 1.1 runs 5-10
percent faster than version 1.0, so benchmark figures
of different versions can't be compared directly.
"""
LOOPS = 50000
from time import perf_counter
__version__ = "1.1"
[Ident1, Ident2, Ident3, Ident4, Ident5] = range(1, 6)
class Record:
def __init__(self, PtrComp = None, Discr = 0, EnumComp = 0,
IntComp = 0, StringComp = 0):
self.PtrComp = PtrComp
self.Discr = Discr
self.EnumComp = EnumComp
self.IntComp = IntComp
self.StringComp = StringComp
def copy(self):
return Record(self.PtrComp, self.Discr, self.EnumComp,
self.IntComp, self.StringComp)
TRUE = 1
FALSE = 0
def main(loops=LOOPS):
benchtime, stones = pystones(loops)
print("Pystone(%s) time for %d passes = %g" %
(__version__, loops, benchtime))
print("This machine benchmarks at %g pystones/second" % stones)
def pystones(loops=LOOPS):
return Proc0(loops)
IntGlob = 0
BoolGlob = FALSE
Char1Glob = ''
Char2Glob = ''
Array1Glob = [0]*51
Array2Glob = [x[:] for x in [Array1Glob]*51]
PtrGlb = None
PtrGlbNext = None
def Proc0(loops=LOOPS):
global IntGlob
global BoolGlob
global Char1Glob
global Char2Glob
global Array1Glob
global Array2Glob
global PtrGlb
global PtrGlbNext
starttime = perf_counter()
for i in range(loops):
pass
nulltime = perf_counter() - starttime
PtrGlbNext = Record()
PtrGlb = Record()
PtrGlb.PtrComp = PtrGlbNext
PtrGlb.Discr = Ident1
PtrGlb.EnumComp = Ident3
PtrGlb.IntComp = 40
PtrGlb.StringComp = "DHRYSTONE PROGRAM, SOME STRING"
String1Loc = "DHRYSTONE PROGRAM, 1'ST STRING"
Array2Glob[8][7] = 10
starttime = perf_counter()
for i in range(loops):
Proc5()
Proc4()
IntLoc1 = 2
IntLoc2 = 3
String2Loc = "DHRYSTONE PROGRAM, 2'ND STRING"
EnumLoc = Ident2
BoolGlob = not Func2(String1Loc, String2Loc)
while IntLoc1 < IntLoc2:
IntLoc3 = 5 * IntLoc1 - IntLoc2
IntLoc3 = Proc7(IntLoc1, IntLoc2)
IntLoc1 = IntLoc1 + 1
Proc8(Array1Glob, Array2Glob, IntLoc1, IntLoc3)
PtrGlb = Proc1(PtrGlb)
CharIndex = 'A'
while CharIndex <= Char2Glob:
if EnumLoc == Func1(CharIndex, 'C'):
EnumLoc = Proc6(Ident1)
CharIndex = chr(ord(CharIndex)+1)
IntLoc3 = IntLoc2 * IntLoc1
IntLoc2 = IntLoc3 / IntLoc1
IntLoc2 = 7 * (IntLoc3 - IntLoc2) - IntLoc1
IntLoc1 = Proc2(IntLoc1)
benchtime = perf_counter() - starttime - nulltime
if benchtime == 0.0:
loopsPerBenchtime = 0.0
else:
loopsPerBenchtime = (loops / benchtime)
return benchtime, loopsPerBenchtime
def Proc1(PtrParIn):
PtrParIn.PtrComp = NextRecord = PtrGlb.copy()
PtrParIn.IntComp = 5
NextRecord.IntComp = PtrParIn.IntComp
NextRecord.PtrComp = PtrParIn.PtrComp
NextRecord.PtrComp = Proc3(NextRecord.PtrComp)
if NextRecord.Discr == Ident1:
NextRecord.IntComp = 6
NextRecord.EnumComp = Proc6(PtrParIn.EnumComp)
NextRecord.PtrComp = PtrGlb.PtrComp
NextRecord.IntComp = Proc7(NextRecord.IntComp, 10)
else:
PtrParIn = NextRecord.copy()
NextRecord.PtrComp = None
return PtrParIn
def Proc2(IntParIO):
IntLoc = IntParIO + 10
while 1:
if Char1Glob == 'A':
IntLoc = IntLoc - 1
IntParIO = IntLoc - IntGlob
EnumLoc = Ident1
if EnumLoc == Ident1:
break
return IntParIO
def Proc3(PtrParOut):
global IntGlob
if PtrGlb is not None:
PtrParOut = PtrGlb.PtrComp
else:
IntGlob = 100
PtrGlb.IntComp = Proc7(10, IntGlob)
return PtrParOut
def Proc4():
global Char2Glob
BoolLoc = Char1Glob == 'A'
BoolLoc = BoolLoc or BoolGlob
Char2Glob = 'B'
def Proc5():
global Char1Glob
global BoolGlob
Char1Glob = 'A'
BoolGlob = FALSE
def Proc6(EnumParIn):
EnumParOut = EnumParIn
if not Func3(EnumParIn):
EnumParOut = Ident4
if EnumParIn == Ident1:
EnumParOut = Ident1
elif EnumParIn == Ident2:
if IntGlob > 100:
EnumParOut = Ident1
else:
EnumParOut = Ident4
elif EnumParIn == Ident3:
EnumParOut = Ident2
elif EnumParIn == Ident4:
pass
elif EnumParIn == Ident5:
EnumParOut = Ident3
return EnumParOut
def Proc7(IntParI1, IntParI2):
IntLoc = IntParI1 + 2
IntParOut = IntParI2 + IntLoc
return IntParOut
def Proc8(Array1Par, Array2Par, IntParI1, IntParI2):
global IntGlob
IntLoc = IntParI1 + 5
Array1Par[IntLoc] = IntParI2
Array1Par[IntLoc+1] = Array1Par[IntLoc]
Array1Par[IntLoc+30] = IntLoc
for IntIndex in range(IntLoc, IntLoc+2):
Array2Par[IntLoc][IntIndex] = IntLoc
Array2Par[IntLoc][IntLoc-1] = Array2Par[IntLoc][IntLoc-1] + 1
Array2Par[IntLoc+20][IntLoc] = Array1Par[IntLoc]
IntGlob = 5
def Func1(CharPar1, CharPar2):
CharLoc1 = CharPar1
CharLoc2 = CharLoc1
if CharLoc2 != CharPar2:
return Ident1
else:
return Ident2
def Func2(StrParI1, StrParI2):
IntLoc = 1
while IntLoc <= 1:
if Func1(StrParI1[IntLoc], StrParI2[IntLoc+1]) == Ident1:
CharLoc = 'A'
IntLoc = IntLoc + 1
if CharLoc >= 'W' and CharLoc <= 'Z':
IntLoc = 7
if CharLoc == 'X':
return TRUE
else:
if StrParI1 > StrParI2:
IntLoc = IntLoc + 7
return TRUE
else:
return FALSE
def Func3(EnumParIn):
EnumLoc = EnumParIn
if EnumLoc == Ident3: return TRUE
return FALSE
if __name__ == '__main__':
import sys
def error(msg):
print(msg, end=' ', file=sys.stderr)
print("usage: %s [number_of_loops]" % sys.argv[0], file=sys.stderr)
sys.exit(100)
nargs = len(sys.argv) - 1
if nargs > 1:
error("%d arguments are too many;" % nargs)
elif nargs == 1:
try: loops = int(sys.argv[1])
except ValueError:
error("Invalid argument %r;" % sys.argv[1])
else:
loops = LOOPS
main(loops)
this is python_bench.py
:
#!/usr/bin/env python3
import math
from time import time
N = range(10)
N_i = range(1_000)
N_j = range(1_000)
x = 1
start = time()
for _ in N:
for i in N_i:
for j in N_j:
x += -1**j * math.sqrt(i)/max(j,2)
stop = time()
print(stop-start)
this is performance.py:
from multiprocessing import Pool, cpu_count
from datetime import datetime
def stress_test(args):
cpu, value = args
start_time = datetime.now()
for i in range(value):
value = value * i
print(f"cpu: {cpu} time: {datetime.now() - start_time}")
if __name__ == '__main__':
start_time = datetime.now()
cpu_count = cpu_count()
with Pool(cpu_count) as mp_pool:
mp_pool.map(stress_test, [(cpu, 100000000) for cpu in range(cpu_count)])
print(f"total: {datetime.now() - start_time}")
the Host is linux laptop with Ubuntu 22.04.4 LTS.
python on host:
$ file /usr/bin/python3.12
ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=6c2ae97f8a91e9bdc027a8d877e9abef191aa422, for GNU/Linux 3.2.0, stripped
python on container:
$ file /usr/local/bin/python3.12
> ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=e1466a54058de9be791ef96f61c8e185388684eb, for GNU/Linux 3.2.0, stripped
Results
performance.py:
host:
cpu: 9 time: 0:00:02.379246
cpu: 3 time: 0:00:02.387882
cpu: 5 time: 0:00:02.388341
cpu: 6 time: 0:00:02.393504
cpu: 10 time: 0:00:02.400015
cpu: 8 time: 0:00:02.411345
cpu: 7 time: 0:00:02.412351
cpu: 4 time: 0:00:02.449361
cpu: 0 time: 0:00:02.666412
cpu: 11 time: 0:00:02.671237
cpu: 2 time: 0:00:02.763631
cpu: 1 time: 0:00:02.775081
total: 0:00:02.790534
Container:
cpu: 3 time: 0:00:03.275465
cpu: 4 time: 0:00:03.280247
cpu: 2 time: 0:00:03.280876
cpu: 11 time: 0:00:03.362792
cpu: 7 time: 0:00:03.365540
cpu: 8 time: 0:00:03.374378
cpu: 6 time: 0:00:03.377563
cpu: 9 time: 0:00:03.380101
cpu: 1 time: 0:00:03.574691
cpu: 0 time: 0:00:03.673995
cpu: 10 time: 0:00:03.773611
cpu: 5 time: 0:00:03.783492
total: 0:00:03.802251
pystone.py:
host:
Pystone(1.1) time for 50000 passes = 0.0531104
This machine benchmarks at 941435 pystones/second
container:
Pystone(1.1) time for 50000 passes = 0.062226
This machine benchmarks at 803522 pystones/second
python_bench.py:
host:
1.9780004024505615
container:
3.0173771381378174
Docker info
Client: Docker Engine - Community
Version: 27.1.1
Context: default
Debug Mode: false
Plugins:
buildx: Docker Buildx (Docker Inc.)
Version: v0.16.1
Path: /usr/libexec/docker/cli-plugins/docker-buildx
compose: Docker Compose (Docker Inc.)
Version: v2.29.1
Path: /usr/libexec/docker/cli-plugins/docker-compose
Server:
Containers: 6
Running: 4
Paused: 0
Stopped: 2
Images: 24
Server Version: 27.1.1
Storage Driver: overlay2
Backing Filesystem: extfs
Supports d_type: true
Using metacopy: false
Native Overlay Diff: true
userxattr: false
Logging Driver: json-file
Cgroup Driver: systemd
Cgroup Version: 2
Plugins:
Volume: local
Network: bridge host ipvlan macvlan null overlay
Log: awslogs fluentd gcplogs gelf journald json-file local splunk syslog
Swarm: inactive
Runtimes: runc io.containerd.runc.v2
Default Runtime: runc
Init Binary: docker-init
containerd version: 2bf793ef6dc9a18e00cb12efb64355c2c9d5eb41
runc version: v1.1.13-0-g58aa920
init version: de40ad0
Security Options:
apparmor
seccomp
Profile: builtin
cgroupns
Kernel Version: 6.5.0-45-generic
Operating System: Ubuntu 22.04.4 LTS
OSType: linux
Architecture: x86_64
CPUs: 12
Total Memory: 15.23GiB
Name: farhad-LIFEBOOK-E5512
ID: 28068456-89b2-497e-8c89-da5bf898b96a
Docker Root Dir: /var/lib/docker
Debug Mode: false
Experimental: false
Insecure Registries:
127.0.0.0/8
Live Restore Enabled: false
$ lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 22.04.4 LTS
Release: 22.04
Codename: jammy
$ uname -r
6.5.0-45-generic
first I ran all 3 on consecutively on host and then on container. why the results are much lower than on the host? specially by pystone.py and the last script?
2
Answers
Could it be container overhead? Meaning python+local machine runs faster than docker+python+local machine. If not, then maybe check for CPU limits? By default, there should be none.
Look for CpuShares, CpuQuota, and/or CpuPeriod.
You can also run docker stats and check if memory is limited.
Ensure that your Docker container is allowed to use enough CPU and memory. You can check this by running:
And then see if there is any performance differences and tweak around this.