Changeset 140639


Ignore:
Timestamp:
Sep 28, 2015, 9:04:13 PM (5 years ago)
Author:
cal@…
Message:

base: tracelib: Add process tracking & killing

Some build systems (*cough* xcodebuild *cough*) start long-running daemon
process that do terminate after the build is finished. Because these processes
use file descriptors, they are instrumented by darwintrace and end up in an
inconsistent state when the tracelib server terminates at the end of the build.
These processes then commonly hang around as quasi-zombies and cannot be killed
but using SIGKILL.

Avoid this problem by tracking which process belongs to a incoming socket in
tracelib and sending these process both SIGTERM and SIGKILL after the main loop
terminates.

NOTE: This change implements the data structures and operations required to do
the tracking, but does not actually fill and drain the list of open sockets.
This is because I did some refactoring while writing these changes and the
control flow path that adds and removes entries from the list can not be easily
separated (short of a complete re-implementation against an older version that
will be replaced anyway) from other refactoring.

Location:
trunk/base
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/base/configure.ac

    r138947 r140639  
    247247AC_HEADER_DIRENT
    248248AC_HEADER_SYS_WAIT
    249 AC_CHECK_HEADERS([crt_externs.h err.h fcntl.h libkern/OSAtomic.h limits.h paths.h pwd.h \
     249AC_CHECK_HEADERS([crt_externs.h err.h fcntl.h libkern/OSAtomic.h libproc.h limits.h paths.h pwd.h \
    250250        readline/history.h readline/readline.h spawn.h sys/cdefs.h sys/event.h sys/fcntl.h sys/file.h \
    251251        sys/paths.h sys/socket.h sys/sysctl.h utime.h])
  • trunk/base/src/pextlib1.0/tracelib.c

    r140638 r140639  
    4343#include <limits.h>
    4444#include <pthread.h>
     45#include <signal.h>
    4546#include <stdarg.h>
     47#include <stdbool.h>
    4648#include <stdint.h>
    4749#include <stdio.h>
     
    6365#include <darwintracelib1.0/sandbox_actions.h>
    6466
     67#if defined(LOCAL_PEERPID) && defined(HAVE_LIBPROC_H)
     68#include <libproc.h>
     69#define HAVE_PEERPID_LIST
     70#endif /* defined(LOCAL_PEERPID) && defined(HAVE_LIBPROC_H) */
     71
    6572#include "tracelib.h"
    6673
     
    8693}
    8794#endif
     95
     96#ifdef HAVE_PEERPID_LIST
     97static bool peerpid_list_enqueue(int sock, pid_t pid);
     98static pid_t peerpid_list_dequeue(int sock);
     99static pid_t peerpid_list_get(int sock, const char **progname);
     100static void peerpid_list_walk(bool (*callback)(int sock, pid_t pid, const char *progname));
     101#endif /* defined(HAVE_PEERPID_LIST) */
     102
    88103
    89104static char *name;
     
    109124} sandbox_violation_t;
    110125static void sandbox_violation(int sock, const char *path, sandbox_violation_t type);
     126
     127#ifdef HAVE_PEERPID_LIST
     128typedef struct _peerpid {
     129    struct _peerpid *ppid_next;
     130    char            *ppid_prog;
     131    int              ppid_sock;
     132    pid_t            ppid_pid;
     133} peerpid_entry_t;
     134
     135static peerpid_entry_t *peer_list = NULL;
     136
     137/**
     138 * Add a new entry to the list of PIDs of peers. Call this once for each
     139 * accepted socket with the socket and the peer's PID.
     140 *
     141 * @param sock The new socket that was opened by the process with the given PID
     142 *             and should be added to the list of peers.
     143 * @param pid The PID of the new peer.
     144 * @return boolean indicating success.
     145 */
     146static bool peerpid_list_enqueue(int sock, pid_t pid) {
     147    char pathbuf[PROC_PIDPATHINFO_MAXSIZE];
     148    const char *progname = "<unknown>";
     149
     150    peerpid_entry_t *ppid = malloc(sizeof(peerpid_entry_t));
     151    if (!ppid) {
     152        return false;
     153    }
     154
     155    if (proc_pidpath(pid, pathbuf, sizeof(pathbuf))) {
     156        progname = pathbuf;
     157    }
     158
     159    ppid->ppid_prog = strdup(progname);
     160    if (!ppid->ppid_prog) {
     161        free(ppid);
     162        return false;
     163    }
     164    ppid->ppid_sock = sock;
     165    ppid->ppid_pid = pid;
     166    ppid->ppid_next = peer_list;
     167    peer_list = ppid;
     168    return true;
     169}
     170
     171/**
     172 * Given a socket, dequeue a peer from the current list of peers. Use this when
     173 * a socket is closed.
     174 *
     175 * @param sock The socket that is being closed and should be dequeued.
     176 * @return The PID of the socket that has been dequeued, or (pid_t) -1
     177 */
     178static pid_t peerpid_list_dequeue(int sock) {
     179    peerpid_entry_t **ref = &peer_list;
     180    while (*ref) {
     181        peerpid_entry_t *curr = *ref;
     182        if (curr->ppid_sock == sock) {
     183            // dequeue the element
     184            *ref = curr->ppid_next;
     185            pid_t pid = curr->ppid_pid;
     186            free(curr->ppid_prog);
     187            free(curr);
     188            return pid;
     189        }
     190
     191        ref = &curr->ppid_next;
     192    }
     193
     194    return (pid_t) -1;
     195}
     196
     197/**
     198 * Return the peer PID given a socket.
     199 *
     200 * @param sock The socket for which the peer PID is needed.
     201 * @param progname A pointer that will point to the string that holds the
     202 *                 command line corresponding to the PID at the time of
     203 *                 enqueuing. Set to NULL if not needed.
     204 * @return The peer's PID or (pid_t) -1, if the socket could not be found in the list.
     205 */
     206static pid_t peerpid_list_get(int sock, const char **progname) {
     207    peerpid_entry_t *curr = peer_list;
     208    while (curr) {
     209        if (curr->ppid_sock == sock) {
     210            if (progname) {
     211                *progname = curr->ppid_prog;
     212            }
     213            return curr->ppid_pid;
     214        }
     215
     216        curr = curr->ppid_next;
     217    }
     218
     219    return (pid_t) -1;
     220}
     221
     222/**
     223 * Walk the current list of (socket, peer PID) pairs and call a callback
     224 * function for each pair.
     225 *
     226 * @param func Callback function to call for each tuple of socket, peer PID and
     227 *             peer command line. The function should take an integer (the
     228 *             socket), a pid_t (the peer's PID) and a const char * (the peer's
     229 *             command line) and return a boolean (true, if the element should
     230 *             be removed from the list, false otherwise). The callback must
     231 *             not modify the list using peerpid_list_enqueue() or
     232 *             peerpid_list_dequeue().
     233 */
     234static void peerpid_list_walk(bool (*callback)(int sock, pid_t pid, const char *progname)) {
     235    peerpid_entry_t **ref = &peer_list;
     236    while (*ref) {
     237        peerpid_entry_t *curr = *ref;
     238        if (callback(curr->ppid_sock, curr->ppid_pid, curr->ppid_prog)) {
     239            // dequeue the element
     240            *ref = curr->ppid_next;
     241            free(curr->ppid_prog);
     242            free(curr);
     243            continue;
     244        }
     245
     246        ref = &curr->ppid_next;
     247    }
     248}
     249#endif /* defined(HAVE_PEERPID_LIST) */
    111250
    112251#define MAX_SOCKETS (1024)
     
    311450
    312451    if (len > BUFSIZE - 1) {
    313         fprintf(stderr, "tracelib: transfer too large: %" PRIu32 " bytes sent, but buffer holds %d on socket %d\n", len, BUFSIZE - 1, sock);
     452        pid_t pid = (pid_t) -1;
     453#ifdef HAVE_PEERPID_LIST
     454        pid = peerpid_list_get(sock, NULL);
     455#endif
     456        fprintf(stderr, "tracelib: transfer too large: %" PRIu32 " bytes sent, but buffer holds %d on socket %d from pid %ld\n", len, BUFSIZE - 1, sock, (unsigned long) pid);
    314457        return 0;
    315458    }
     
    510653    return TCL_OK;
    511654}
     655
     656#ifdef HAVE_PEERPID_LIST
     657/**
     658 * Callback to be passed to peerpid_list_walk(). Closes the open sockets and
     659 * sends SIGTERM to the associated processes. Leaves the list unmodified.
     660 */
     661static bool close_and_send_sigterm(int sock UNUSED, pid_t pid, const char *progname) {
     662    ui_warn(interp, "Sending SIGTERM to process %ld: %s", (unsigned long) pid, progname);
     663    kill(pid, SIGTERM);
     664
     665    // keep the elements in the list
     666    return false;
     667}
     668
     669/**
     670 * Callback to be passed to peerpid_list_walk(). Sends SIGKILL to the processes
     671 * and deletes the elements from the list.
     672 */
     673static bool send_sigkill_and_free(int sock, pid_t pid, const char *progname UNUSED) {
     674    close(sock);
     675    kill(pid, SIGKILL);
     676
     677    // remove the elements from the list
     678    return true;
     679}
     680#endif
    512681
    513682/* create this on heap rather than stack, due to its rather large size */
     
    719888
    720889    /* NOTE: We aren't necessarily closing all client sockets here! */
     890
     891    // Close remainig sockets to avoid dangling processes
    721892    if (opensockcount > 0) {
    722         fprintf(stderr, "tracelib: %d open sockets will leak at end of runcmd\n", opensockcount);
     893#ifdef HAVE_PEERPID_LIST
     894        ui_warn(interp, "tracelib: %d open sockets leaking at end of runcmd, closing, sending SIGTERM and SIGKILL\n", opensockcount);
     895        peerpid_list_walk(close_and_send_sigterm);
     896        peerpid_list_walk(send_sigkill_and_free);
     897#else
     898        ui_warn(interp, "tracelib: %d open sockets leaking at end of runcmd\n", opensockcount);
     899#endif
    723900    }
    724901    pthread_mutex_lock(&sock_mutex);
Note: See TracChangeset for help on using the changeset viewer.