skip to Main Content

Do anybody manage to start existed relicaset after MongoDB minor version upgrade from official MongoDB stable repo? On previous versions 5.0.14/4.4.18 everything works perfect.

{"t":{"$date":"2023-02-24T12:22:59.994+01:00"},"s":"I",  "c":"-",        "id":4333218, "ctx":"LogicalSessionCacheReap","msg":"Rescheduling the next replica set monitoring request","attr":{"replicaSet":"MainRepSet","host":"10.1.0.4:27017","delayMillis":0}}
{"t":{"$date":"2023-02-24T12:22:59.996+01:00"},"s":"I",  "c":"REPL",     "id":21783,   "ctx":"BackgroundSync","msg":"Waiting for pings from other members before syncing","attr":{"pingsNeeded":8}}
{"t":{"$date":"2023-02-24T12:22:59.996+01:00"},"s":"I",  "c":"CONNPOOL", "id":22576,   "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"Connecting","attr":{"hostAndPort":"10.1.2.8:27017"}}
{"t":{"$date":"2023-02-24T12:22:59.996+01:00"},"s":"I",  "c":"REPL",     "id":40445,   "ctx":"TopologyVersionObserver","msg":"Started TopologyVersionObserver"}
{"t":{"$date":"2023-02-24T12:22:59.998+01:00"},"s":"I",  "c":"-",        "id":4333222, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"RSM received error response","attr":{"host":"10.1.2.8:27017","error":"HostUnreachable: Error connecting to 10.1.2.8:27017 :: caused by :: Connection refused","replicaSet":"MainRepSet","response":"{}"}}
{"t":{"$date":"2023-02-24T12:22:59.998+01:00"},"s":"I",  "c":"NETWORK",  "id":23015,   "ctx":"listener","msg":"Listening on","attr":{"address":"/tmp/mongodb-27017.sock"}}
{"t":{"$date":"2023-02-24T12:22:59.998+01:00"},"s":"I",  "c":"NETWORK",  "id":4712102, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"Host failed in replica set","attr":{"replicaSet":"MainRepSet","host":"10.1.2.8:27017","error":{"code":6,"codeName":"HostUnreachable","errmsg":"Error connecting to 10.1.2.8:27017 :: caused by :: Connection refused"},"action":{"dropConnections":true,"requestImmediateCheck":true}}}
{"t":{"$date":"2023-02-24T12:22:59.998+01:00"},"s":"I",  "c":"NETWORK",  "id":23015,   "ctx":"listener","msg":"Listening on","attr":{"address":"0.0.0.0"}}
{"t":{"$date":"2023-02-24T12:22:59.998+01:00"},"s":"I",  "c":"NETWORK",  "id":23016,   "ctx":"listener","msg":"Waiting for connections","attr":{"port":27017,"ssl":"off"}}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"CONTROL",  "id":23377,   "ctx":"SignalHandler","msg":"Received signal","attr":{"signal":15,"error":"Terminated"}}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"CONTROL",  "id":23378,   "ctx":"SignalHandler","msg":"Signal was sent by kill(2)","attr":{"pid":1,"uid":0}}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"CONTROL",  "id":23381,   "ctx":"SignalHandler","msg":"will terminate after current cmd ends"}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"REPL",     "id":4784900, "ctx":"SignalHandler","msg":"Stepping down the ReplicationCoordinator for shutdown","attr":{"waitTimeMillis":15000}}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"COMMAND",  "id":4784901, "ctx":"SignalHandler","msg":"Shutting down the MirrorMaestro"}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"REPL",     "id":40441,   "ctx":"SignalHandler","msg":"Stopping TopologyVersionObserver"}
{"t":{"$date":"2023-02-24T12:23:00.002+01:00"},"s":"I",  "c":"REPL",     "id":40447,   "ctx":"TopologyVersionObserver","msg":"Stopped TopologyVersionObserver"}
{"t":{"$date":"2023-02-24T12:23:00.003+01:00"},"s":"I",  "c":"SHARDING", "id":4784902, "ctx":"SignalHandler","msg":"Shutting down the WaitForMajorityService"}
{"t":{"$date":"2023-02-24T12:23:00.008+01:00"},"s":"I",  "c":"CONTROL",  "id":4784903, "ctx":"SignalHandler","msg":"Shutting down the LogicalSessionCache"}
{"t":{"$date":"2023-02-24T12:23:00.008+01:00"},"s":"I",  "c":"NETWORK",  "id":4333208, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"RSM host selection timeout","attr":{"replicaSet":"MainRepSet","error":"FailedToSatisfyReadPreference: Could not find host matching read preference { mode: "primary" } for set MainRepSet"}}
{"t":{"$date":"2023-02-24T12:23:00.008+01:00"},"s":"I",  "c":"CONTROL",  "id":20714,   "ctx":"LogicalSessionCacheRefresh","msg":"Failed to refresh session cache, will try again at the next refresh interval","attr":{"error":"ClientMarkedKilled: client has been killed"}}
{"t":{"$date":"2023-02-24T12:23:00.008+01:00"},"s":"I",  "c":"NETWORK",  "id":4333208, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"RSM host selection timeout","attr":{"replicaSet":"MainRepSet","error":"FailedToSatisfyReadPreference: Could not find host matching read preference { mode: "primary" } for set MainRepSet"}}
{"t":{"$date":"2023-02-24T12:23:00.009+01:00"},"s":"I",  "c":"CONTROL",  "id":20711,   "ctx":"LogicalSessionCacheReap","msg":"Failed to reap transaction table","attr":{"error":"ClientMarkedKilled: client has been killed"}}
{"t":{"$date":"2023-02-24T12:23:00.009+01:00"},"s":"I",  "c":"NETWORK",  "id":20562,   "ctx":"SignalHandler","msg":"Shutdown: going to close listening sockets"}
{"t":{"$date":"2023-02-24T12:23:00.027+01:00"},"s":"I",  "c":"NETWORK",  "id":23729,   "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"ServerPingMonitor is now monitoring host","attr":{"host":"10.1.0.4:27017","replicaSet":"MainRepSet"}}

at first it can’t create /var/run/mongodb, but after manual creation and add permissions,

sudo mkdir -p /var/run/mongodb/
sudo chown -R mongodb:mongodb /var/run/mongodb

replicates still failed to start with previous error (no selinux/firewalld problems exists)

2

Answers


  1. Chosen as BEST ANSWER

    Ok the root cause is in corrupted mongod service file (corrupted by update) problem solved by adding next removed lines:

    ExecStartPre=/usr/bin/mkdir -p /var/run/mongodb
    ExecStartPre=/usr/bin/chown mongod:mongod /var/run/mongodb
    ExecStartPre=/usr/bin/chmod 0755 /var/run/mongodb
    PermissionsStartOnly=true
    PIDFile=/var/run/mongodb/mongod.pid
    Type=forking 
    

  2. This log means the mongod stopped because it was send a SIGTERM:

    {"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"CONTROL",  "id":23377,   "ctx":"SignalHandler","msg":"Received signal","attr":{"signal":15,"error":"Terminated"}}
    

    This line indicates the kill signal came from pid 1, which is usually init:

    {"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"CONTROL",  "id":23378,   "ctx":"SignalHandler","msg":"Signal was sent by kill(2)","attr":{"pid":1,"uid":0}}
    

    This can happen if the service file is not set up properly, or if the startup timeout is not long enough. See https://unix.stackexchange.com/a/276785

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search