Search code examples
mongodbcentosreplicaset

MongoDB 5.0.15/4.4.19 start issue after upgrade


Do anybody manage to start existed relicaset after MongoDB minor version upgrade from official MongoDB stable repo? On previous versions 5.0.14/4.4.18 everything works perfect.

{"t":{"$date":"2023-02-24T12:22:59.994+01:00"},"s":"I",  "c":"-",        "id":4333218, "ctx":"LogicalSessionCacheReap","msg":"Rescheduling the next replica set monitoring request","attr":{"replicaSet":"MainRepSet","host":"10.1.0.4:27017","delayMillis":0}}
{"t":{"$date":"2023-02-24T12:22:59.996+01:00"},"s":"I",  "c":"REPL",     "id":21783,   "ctx":"BackgroundSync","msg":"Waiting for pings from other members before syncing","attr":{"pingsNeeded":8}}
{"t":{"$date":"2023-02-24T12:22:59.996+01:00"},"s":"I",  "c":"CONNPOOL", "id":22576,   "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"Connecting","attr":{"hostAndPort":"10.1.2.8:27017"}}
{"t":{"$date":"2023-02-24T12:22:59.996+01:00"},"s":"I",  "c":"REPL",     "id":40445,   "ctx":"TopologyVersionObserver","msg":"Started TopologyVersionObserver"}
{"t":{"$date":"2023-02-24T12:22:59.998+01:00"},"s":"I",  "c":"-",        "id":4333222, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"RSM received error response","attr":{"host":"10.1.2.8:27017","error":"HostUnreachable: Error connecting to 10.1.2.8:27017 :: caused by :: Connection refused","replicaSet":"MainRepSet","response":"{}"}}
{"t":{"$date":"2023-02-24T12:22:59.998+01:00"},"s":"I",  "c":"NETWORK",  "id":23015,   "ctx":"listener","msg":"Listening on","attr":{"address":"/tmp/mongodb-27017.sock"}}
{"t":{"$date":"2023-02-24T12:22:59.998+01:00"},"s":"I",  "c":"NETWORK",  "id":4712102, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"Host failed in replica set","attr":{"replicaSet":"MainRepSet","host":"10.1.2.8:27017","error":{"code":6,"codeName":"HostUnreachable","errmsg":"Error connecting to 10.1.2.8:27017 :: caused by :: Connection refused"},"action":{"dropConnections":true,"requestImmediateCheck":true}}}
{"t":{"$date":"2023-02-24T12:22:59.998+01:00"},"s":"I",  "c":"NETWORK",  "id":23015,   "ctx":"listener","msg":"Listening on","attr":{"address":"0.0.0.0"}}
{"t":{"$date":"2023-02-24T12:22:59.998+01:00"},"s":"I",  "c":"NETWORK",  "id":23016,   "ctx":"listener","msg":"Waiting for connections","attr":{"port":27017,"ssl":"off"}}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"CONTROL",  "id":23377,   "ctx":"SignalHandler","msg":"Received signal","attr":{"signal":15,"error":"Terminated"}}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"CONTROL",  "id":23378,   "ctx":"SignalHandler","msg":"Signal was sent by kill(2)","attr":{"pid":1,"uid":0}}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"CONTROL",  "id":23381,   "ctx":"SignalHandler","msg":"will terminate after current cmd ends"}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"REPL",     "id":4784900, "ctx":"SignalHandler","msg":"Stepping down the ReplicationCoordinator for shutdown","attr":{"waitTimeMillis":15000}}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"COMMAND",  "id":4784901, "ctx":"SignalHandler","msg":"Shutting down the MirrorMaestro"}
{"t":{"$date":"2023-02-24T12:23:00.000+01:00"},"s":"I",  "c":"REPL",     "id":40441,   "ctx":"SignalHandler","msg":"Stopping TopologyVersionObserver"}
{"t":{"$date":"2023-02-24T12:23:00.002+01:00"},"s":"I",  "c":"REPL",     "id":40447,   "ctx":"TopologyVersionObserver","msg":"Stopped TopologyVersionObserver"}
{"t":{"$date":"2023-02-24T12:23:00.003+01:00"},"s":"I",  "c":"SHARDING", "id":4784902, "ctx":"SignalHandler","msg":"Shutting down the WaitForMajorityService"}
{"t":{"$date":"2023-02-24T12:23:00.008+01:00"},"s":"I",  "c":"CONTROL",  "id":4784903, "ctx":"SignalHandler","msg":"Shutting down the LogicalSessionCache"}
{"t":{"$date":"2023-02-24T12:23:00.008+01:00"},"s":"I",  "c":"NETWORK",  "id":4333208, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"RSM host selection timeout","attr":{"replicaSet":"MainRepSet","error":"FailedToSatisfyReadPreference: Could not find host matching read preference { mode: \"primary\" } for set MainRepSet"}}
{"t":{"$date":"2023-02-24T12:23:00.008+01:00"},"s":"I",  "c":"CONTROL",  "id":20714,   "ctx":"LogicalSessionCacheRefresh","msg":"Failed to refresh session cache, will try again at the next refresh interval","attr":{"error":"ClientMarkedKilled: client has been killed"}}
{"t":{"$date":"2023-02-24T12:23:00.008+01:00"},"s":"I",  "c":"NETWORK",  "id":4333208, "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"RSM host selection timeout","attr":{"replicaSet":"MainRepSet","error":"FailedToSatisfyReadPreference: Could not find host matching read preference { mode: \"primary\" } for set MainRepSet"}}
{"t":{"$date":"2023-02-24T12:23:00.009+01:00"},"s":"I",  "c":"CONTROL",  "id":20711,   "ctx":"LogicalSessionCacheReap","msg":"Failed to reap transaction table","attr":{"error":"ClientMarkedKilled: client has been killed"}}
{"t":{"$date":"2023-02-24T12:23:00.009+01:00"},"s":"I",  "c":"NETWORK",  "id":20562,   "ctx":"SignalHandler","msg":"Shutdown: going to close listening sockets"}
{"t":{"$date":"2023-02-24T12:23:00.027+01:00"},"s":"I",  "c":"NETWORK",  "id":23729,   "ctx":"ReplicaSetMonitor-TaskExecutor","msg":"ServerPingMonitor is now monitoring host","attr":{"host":"10.1.0.4:27017","replicaSet":"MainRepSet"}}

at first it can't create /var/run/mongodb, but after manual creation and add permissions,

sudo mkdir -p /var/run/mongodb/
sudo chown -R mongodb:mongodb /var/run/mongodb

replicates still failed to start with previous error (no selinux/firewalld problems exists)


Solution

  • Ok the root cause is in corrupted mongod service file (corrupted by update) problem solved by adding next removed lines:

    ExecStartPre=/usr/bin/mkdir -p /var/run/mongodb
    ExecStartPre=/usr/bin/chown mongod:mongod /var/run/mongodb
    ExecStartPre=/usr/bin/chmod 0755 /var/run/mongodb
    PermissionsStartOnly=true
    PIDFile=/var/run/mongodb/mongod.pid
    Type=forking