Skip to content

Commit

Permalink
CA-395554 Improve fairlocking reliability
Browse files Browse the repository at this point in the history
Under some circumstances (when the listen queue is short enough) a
process's blocking connect() to a UNIX-domain stream socket can return
success even when the server side has not yet called accept().

To work around this, we have the server portion write a small fixed blob
of data to each new connection and require the client to read it before
it considers itself to have the lock; merely being connected
successfully is no longer sufficient.

Signed-off-by: Tim Smith <[email protected]>
  • Loading branch information
Tim Smith committed Jul 23, 2024
1 parent 7d2b243 commit 25cd938
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 3 deletions.
14 changes: 13 additions & 1 deletion misc/fairlock/fairlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <sys/un.h>
#include <errno.h>
#include <syslog.h>
#include <signal.h>

int main(int argc, char *argv[]) {
struct sockaddr_un addr;
Expand Down Expand Up @@ -32,6 +33,11 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "listen(64) failed on socket %s: %s", argv[1], strerror(errno));
exit(1);
}
/* We write 5 bytes to the connection when we get it from the client, but we do not
* care if the client ever reads this. If they don't, we will get a SIGPIPE when we
* close the socket, which we will ignore. */
signal(SIGPIPE, SIG_IGN);

openlog("fairlock", LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL2);

/* Now we have a socket, enter an endless loop of:
Expand All @@ -51,9 +57,15 @@ int main(int argc, char *argv[]) {
while (1) {
while ((fd = accept(sock, NULL, NULL)) > -1) {
char buffer[128];
ssize_t br;

syslog(LOG_INFO, "%s acquired\n", argv[1]);
while (read(fd, buffer, sizeof(buffer)) > 0) {
/* We do not care about the return code of this write() and will ignore any
* SIGPIPE it might generate. The buffer is big enough that this will complete
* even though the socket is blocking */
write(fd, "LOCK", 5);
while ((br = read(fd, buffer, sizeof(buffer)-1)) > 0) {
buffer[br]='\0';
buffer[127]='\0';
syslog(LOG_INFO, "%s sent '%s'\n", argv[1], buffer);
}
Expand Down
11 changes: 9 additions & 2 deletions misc/fairlock/fairlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def __init__(self, name):
self.name = name
self.sockname = os.path.join(SOCKDIR, name)
self.connected = False
self.sock = None

def _ensure_service(self):
service=f"fairlock@{self.name}.service"
Expand All @@ -52,18 +53,24 @@ def __enter__(self):
raise FairlockDeadlock(f"Deadlock on Fairlock resource '{self.name}'")

self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
self.sock.setblocking(True)
try:
self.sock.connect(self.sockname)
ret = self.sock.connect(self.sockname)
# Merely being connected is not enough. Read a small blob of data.
read = self.sock.recv(10)
except (FileNotFoundError, ConnectionRefusedError):
self._ensure_service()
self.sock.connect(self.sockname)
ret = self.sock.connect(self.sockname)
# Merely being connected is not enough. Read a small blob of data.
read = self.sock.recv(10)

self.sock.send(f'{os.getpid()} - {time.monotonic()}'.encode())
self.connected = True
return self

def __exit__(self, type, value, traceback):
self.sock.close()
self.sock = None
self.connected = False
return False

9 changes: 9 additions & 0 deletions mk/sm.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -237,5 +237,14 @@ Manager and some other packages
%{_unitdir}/[email protected]
%{_libexecdir}/fairlock

%posttrans fairlock
## On upgrade, shut down existing lock services so new ones will
## be started. There should be no locks held during upgrade operations
## so this is safe.
if [ $1 -gt 1 ];
then
systemctl stop $(systemctl list-units fairlock@* --all --no-legend | cut -d' ' -f1)
fi

%changelog

0 comments on commit 25cd938

Please sign in to comment.