skip to Main Content

I’m running a openmp program (gcc and libgomp on CentOS 8.5). I used strace to inspect and found that syscall clone was called over and over again (I posed part of the log below), which I believe implies that openmp threads were constantly recreated, since all the other non-openmp thread has a fixed number, and all initialized at the very begining of the main function.

But I have also tried to write a simply openmp program, it seems that openmp create a thread pool in initialization stage, and reused it later.

So my question is: in what situations, libgomp thread will terminate, and it recreats threads?

clone(child_stack=0x7f16bff89ef0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[3265184], tls=0x7f16bff8f700, child_tidptr=0x7f16bff8f9d0) = 3265184
sched_setaffinity(3265184, 16, [8])     = 0
futex(0x7f16bff8fd18, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x40083f14, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0x40083f14, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0x40083f14, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0x4012f184, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0x4012f184, FUTEX_WAKE_PRIVATE, 2147483647) = 0
clone(child_stack=0x7f16c1f8def0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[3265185], tls=0x7f16c1f93700, child_tidptr=0x7f16c1f939d0) = 3265185
sched_setaffinity(3265185, 16, [2])     = 0
futex(0x7f16c1f93d18, FUTEX_WAKE_PRIVATE, 1) = 1
clone(child_stack=0x7f16c078aef0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[3265186], tls=0x7f16c0790700, child_tidptr=0x7f16c07909d0) = 3265186
sched_setaffinity(3265186, 16, [4])     = 0
futex(0x7f16c0790d18, FUTEX_WAKE_PRIVATE, 1) = 1
clone(child_stack=0x7f16bff89ef0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[3265187], tls=0x7f16bff8f700, child_tidptr=0x7f16bff8f9d0) = 3265187
sched_setaffinity(3265187, 16, [6])     = 0
futex(0x7f16bff8fd18, FUTEX_WAKE_PRIVATE, 1) = 1
clone(child_stack=0x7f16c178cef0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[3265188], tls=0x7f16c1792700, child_tidptr=0x7f16c17929d0) = 3265188
sched_setaffinity(3265188, 16, [8])     = 0
futex(0x7f16c1792d18, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x40083f14, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0x40083f14, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0x40083f14, FUTEX_WAKE_PRIVATE, 2147483647) = 0
clone(child_stack=0x7f16c1f8def0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[3265189], tls=0x7f16c1f93700, child_tidptr=0x7f16c1f939d0) = 3265189
sched_setaffinity(3265189, 16, [2])     = 0
futex(0x7f16c1f93d18, FUTEX_WAKE_PRIVATE, 1) = 1
clone(child_stack=0x7f16c178cef0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[3265190], tls=0x7f16c1792700, child_tidptr=0x7f16c17929d0) = 3265190
sched_setaffinity(3265190, 16, [4])     = 0

environmental variables:

export PARALLEL_ENSEMBLE_THREADS=5
export GOMP_CPU_AFFINITY=7,2,4,6,8 

2

Answers


  1. This is more of a "sysadmin" answer, but you can use strace to give you stacktraces showing where a given syscall is invoked. Use the -k command line option for that. So if you, for example, try this:

    $ strace -etrace=clone -k -y -f -f  bash -c "ls /dev/null;command echo"
    clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7e1b55ccaa10) = 141358
     > /usr/lib/x86_64-linux-gnu/libc.so.6(_Fork+0x27) [0xee1a7]
     > /usr/lib/x86_64-linux-gnu/libc.so.6(__libc_fork+0x52) [0xf3fb2]
     > /usr/bin/bash(make_child+0x19e) [0x6909e]
     > /usr/bin/bash(adjust_shell_level+0x315) [0x56935]
     > /usr/bin/bash(adjust_shell_level+0x1bda) [0x581fa]
     > /usr/bin/bash(execute_command_internal+0xb88) [0x4ae08]
     > /usr/bin/bash(execute_command+0xce) [0x4dc6e]
     > /usr/bin/bash(execute_command_internal+0x2ff2) [0x4d272]
     > /usr/bin/bash(parse_and_execute+0x7ab) [0xb5ffb]
     > /usr/bin/bash(_rl_enable_paren_matching+0xb0ce) [0x11e2de]
     > /usr/bin/bash(main+0xf78) [0x33568]
     > /usr/lib/x86_64-linux-gnu/libc.so.6(__libc_init_first+0x8a) [0x2a1ca]
     > /usr/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x8b) [0x2a28b]
     > /usr/bin/bash(_start+0x25) [0x34385]
    strace: Process 141358 attached
    /dev/null
    ...
    

    (you’ll see the SIGCHLD after since strace will show signals by default, but that part isn’t relevant for the answer)

    With your application (tracing only clone(), following children -f) this should tell you where in libgomp‘s code the thread is created.

    Login or Signup to reply.
  2. I could reproduce the behavior:

    #include <omp.h>
    #include <stdio.h>
    
    int main() {
    
    #pragma omp parallel num_threads(3)
      printf("Hello from Thread %i of %in", omp_get_thread_num(),
             omp_get_num_threads());
    
      printf("nDone with first teamnn");
    #pragma omp parallel
      printf("Hello from Thread %i of %in", omp_get_thread_num(),
             omp_get_num_threads());
    
      printf("nDone with second teamnn");
    #pragma omp parallel num_threads(3)
      printf("Hello from Thread %i of %in", omp_get_thread_num(),
             omp_get_num_threads());
    
      printf("nDone with third teamnn");
    #pragma omp parallel
      printf("Hello from Thread %i of %in", omp_get_thread_num(),
             omp_get_num_threads());
    
      printf("nDone with forth teamnn");
    #pragma omp parallel num_threads(3)
      printf("Hello from Thread %i of %in", omp_get_thread_num(),
             omp_get_num_threads());
    }
    
    $ OMP_NUM_THREADS=2 strace -etrace=clone ./a.out
    clone(child_stack=0x154b3d1ebf30, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[235919], tls=0x154b3d1ec700, child_tidptr=0x154b3d1ec9d0) = 235919
    clone(child_stack=0x154b3cfeaf30, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[235920], tls=0x154b3cfeb700, child_tidptr=0x154b3cfeb9d0) = 235920
    Hello from Thread 0 of 3
    Hello from Thread 2 of 3
    Hello from Thread 1 of 3
    
    Done with first team
    
    Hello from Thread 1 of 2
    Hello from Thread 0 of 2
    
    Done with second team
    
    clone(child_stack=0x154b3cfeaf30, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[235921], tls=0x154b3cfeb700, child_tidptr=0x154b3cfeb9d0) = 235921
    Hello from Thread 1 of 3
    Hello from Thread 2 of 3
    Hello from Thread 0 of 3
    
    Done with third team
    
    Hello from Thread 1 of 2
    Hello from Thread 0 of 2
    
    Done with forth team
    
    clone(child_stack=0x154b3cfeaf30, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[235922], tls=0x154b3cfeb700, child_tidptr=0x154b3cfeb9d0) = 235922
    Hello from Thread 1 of 3
    Hello from Thread 2 of 3
    Hello from Thread 0 of 3
    +++ exited with 0 +++
    

    Whenever the team size shrinks, libgomp seems to drop the extra thread and needs to create a new thread for the next team.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search