/******************************************************************************* * * extract_UserDir_logs.c * * AUTHOR: * Dan Harkless * * COPYRIGHT: * This file is Copyright (C) 2016 by Dan Harkless, and is released under the * GNU General Public License . * * USAGE: * % extract_UserDir_logs [-d] * * EXAMPLE: * % extract_UserDir_logs /var/log/apache /var/user_logs/apache /home * * DESCRIPTION: * Surprisingly, Apache has no way to do per-user access and error logs. * 'VirtualHost's can have their own logs, but 'UserDir's can't. I therefore * wrote this program to continuously monitor the main access_log and error_log * and look for entries corresponding to specific users, and write per-user log * files. * * On invocation, you must specify the directory Apache writes its logs to * (which users shouldn't have read-access to), the directory to write the * per-user logs to (which users should have read-access to but NOT write- * access to, due to the dangers of malicious symlinks), and finally the parent * directory of the user home directories. If you have user home directories * under multiple parent directories on your system, you'll need to modify this * program to optionally take more than one (I made the last * parameter with this in mind). * * As this program runs, it will write files in the directory specified by the * second parameter called _access_log and _error_log. These files * will be set to mode 700 and will be owned by the respective user. * * The _access_log will contain all entries for URLs starting with * /~, /%7E, or /%7e. It will NOT include URLs like * /cgi-bin/, should you have something like that on your system (though * it would be easy to add support for this). * * Because Apache's error_log format doesn't include the URL that caused a * particular error, all we can do when going through it is look for * appearances of user home directory paths and assume errors should be * reported to the owners of those directories (which will sometimes be a bad * assumption if users link to each others' directories). Errors that don't * mention a user directory will not be extracted to anywhere. * * If you cycle your log files, you'll need to send each of the two fork()d * instances of extract_UserDir_logs HUP signals so they can re-open the new * versions. * * Note that after extract_UserDir_logs daemonizes, it writes any diagnostic * output to /extract_UserDir_logs.log. It does not log using * syslog(). If for some reason you don't want extract_UserDir_logs to * daemonize (as when debugging), pass it -d before the rest of the commandline * parameters. * * DATE MODIFICATION * ========== ================================================================== * 2016-03-14 Was previously comparing main_log_filename_g to the address of the * ACCESS_LOG string literal as a minor optimization, but gcc -Wall / * -Waddress now complains this is undefined behavior, so strcmp(). * 2003-02-23 Wasn't calling va_end() in die() -- no biggie, since we always * exited, but the code got copied to another file where we didn't. * 2002-05-17 Shouldn't initialize a struct sigaction with "= {0}" -- a call to * sigemptyset() is the fully portable way to initialize .sa_mask. * 2001-04-06 Added SIG{INFO,PWR,URG,WINCH} to list of non-fatal sigs to ignore. * 2001-04-06 Removed SIGEMT & SIGSYS from signal_name() -- not there on Linux. * 2000-12-07 Previously logged to /extract_UserDir_logs and * warned against putting the program there, as it'd be overwritten. * Now log to extract_UserDir_logs.log so there isn't that danger. * 2000-10-19 Original. * *******************************************************************************/ #include /* must precede */ #include /* must precede */ #include /* for opendir(), etc. */ #include /* for errno */ #include /* for O_RDONLY, etc. */ #include /* for getpwnam(), etc. */ #include /* for sigaction() etc. */ #include /* for BUFSIZ, etc. */ #include /* for va_list, etc. */ #include /* for EXIT_FAILURE, etc. */ #include /* for strrchr(), etc. */ #include /* for read(), etc. */ #define ACCESS_LOG "access_log" #define ERROR_LOG "error_log" #define LONGEST_USER_LOG_SUFFIX "_access_log" #define MAX_CONTIGUOUS_SIGNAL_INSTALLATION_FAILURES 32 #define MAX_SLEEP_SECONDS 60 /* Global variables (all have _g suffix) */ char* apache_log_dir_g; char* HOME_parent_g; char* line_g; char* main_log_filename_g; char* our_program_name_g; char* user_log_dir_g; int main_log_fd_g; size_t HOME_parent_len_g; size_t line_max_size_g = BUFSIZ; void die(const char* printf_format, ...); void extract_log_line(const char* username_ptr, size_t line_g_size); void generic_signal_handler(int caught_signal_num); void HUP_signal_handler(int caught_signal_num); size_t input_log_line(); void install_signal_handlers(); void open_main_log_file_and_delete_user_log_files(); void process_access_log(); void process_error_log(); char* signal_name(int signal_num); void usage_error(); void die(const char* printf_format, ...) { int errno_on_entry = errno; va_list printf_args; va_start(printf_args, printf_format); fprintf(stderr, "%s(%d): FATAL: ", our_program_name_g, (int)getpid()); vfprintf(stderr, printf_format, printf_args); fprintf(stderr, ": %s.\n", strerror(errno_on_entry)); va_end(printf_args); exit(EXIT_FAILURE); } void extract_log_line(const char* username_ptr, size_t line_g_size) { char* username_terminator_ptr = strchr(username_ptr, '/'); if (username_terminator_ptr != NULL) { struct passwd* passwd_entry; *username_terminator_ptr = '\0'; /* Would be nice to cache results of these calls, and keep the user log files open up through some timeout. */ passwd_entry = getpwnam(username_ptr); if (passwd_entry != NULL) { /* If changing this 256, change one below too. */ char user_log_filename[256 + sizeof(LONGEST_USER_LOG_SUFFIX)]; int user_log_fd; /* We'll just truncate absurdly long user names (which we'll most likely never encounter), to avoid unnecessary malloc()s and realloc()s. We use sprintf(), not snprintf(), for maximum portability (and use format string maximum field length to prevent buffer overflow). */ sprintf(user_log_filename, "%.256s_%s", username_ptr, main_log_filename_g); /* Don't need to use the username by itself any more, so now mend the line_g buffer before writing it out. */ *username_terminator_ptr = '/'; user_log_fd = open(user_log_filename, O_APPEND | O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR); if (user_log_fd < 0) die("open(\"%s/%s\"," " O_APPEND | O_CREAT | O_WRONLY," " S_IRUSR | S_IWUSR)", user_log_dir_g, user_log_filename); if (fchown(user_log_fd, passwd_entry->pw_uid, passwd_entry->pw_gid) < 0) die("fchown(\"%s/%s\", %d, %d)", user_log_dir_g, user_log_filename, passwd_entry->pw_uid, passwd_entry->pw_gid); write(user_log_fd, line_g, line_g_size); if (close(user_log_fd) < 0) die("close(%d)", user_log_fd); } } } void generic_signal_handler(int caught_signal_num) { char* caught_signal_name = signal_name(caught_signal_num); fprintf(stderr, "%s(%d): FATAL: Caught signal %d%s.\n", our_program_name_g, (int)getpid(), caught_signal_num, caught_signal_name); exit(EXIT_FAILURE); } void HUP_signal_handler(int caught_signal_num) { fprintf(stderr, "%s(%d): Received SIGHUP. Starting over on log files.\n", our_program_name_g, (int)getpid()); fflush(stderr); if (close(main_log_fd_g) < 0) die("close(\"%s\")", main_log_filename_g); open_main_log_file_and_delete_user_log_files(); } size_t input_log_line() { char c = '\0'; /* need to initialize to anything besides '\n' */ int prev_read_return, read_return = 1, seconds = 1; size_t i = 0; do { /* To be more efficient, we should either read using buffered I/O or do our own sub-buffering, but for now we do 1-byte unbuffered reads. */ prev_read_return = read_return; read_return = read(main_log_fd_g, &c, 1); if (read_return <= 0) { /* We've consumed everything written to the log file so far, or there's an error condition (hopefully a temporary one). Try to sleep it off. */ sleep(seconds); if (prev_read_return <= 0) /* Linearly back off from hitting the disk when nothing's coming in. */ if (seconds < MAX_SLEEP_SECONDS) seconds++; } else { if (i >= line_max_size_g - 2) { /* Oops, out of space -- double the line buffer size. */ line_max_size_g *= 2; line_g = realloc(line_g, line_max_size_g); if (line_g == NULL) die("realloc(0x%p, %lu)", line_g, line_max_size_g); } line_g[i++] = c; seconds = 1; /* we're seeing action; reset to minimal sleep */ } } while (c != '\n'); line_g[i] = '\0'; return i; } void install_signal_handlers() { int contiguous_failures = 0, signal_num = 1; struct sigaction generic_sigaction, HUP_sigaction; generic_sigaction.sa_flags = 0; generic_sigaction.sa_handler = generic_signal_handler; sigemptyset(&generic_sigaction.sa_mask); HUP_sigaction.sa_flags = 0; HUP_sigaction.sa_handler = HUP_signal_handler; sigemptyset(&HUP_sigaction.sa_mask); /* Unfortunately there's no MAXSIG/SIGMAX #define that can be depended on to be there. To be portable, we'll just start installing the generic signal handler at signal 1 (skipping stop and non-fatal and uncatchable signals) and keep going until we've had MAX_CONTIGUOUS_SIGNAL_INSTALLATION_FAILURES. */ while (contiguous_failures < MAX_CONTIGUOUS_SIGNAL_INSTALLATION_FAILURES) { switch (signal_num) { case SIGCHLD: /* default action does not terminate */ case SIGCONT: /* default action does not terminate */ #ifdef SIGINFO case SIGINFO: /* default action does not terminate */ #endif /* SIGINFO */ case SIGKILL: /* uncatchable */ case SIGPWR: /* default action does not terminate */ case SIGSTOP: /* uncatachable */ case SIGURG: /* default action does not terminate */ case SIGWINCH: /* default action does not terminate */ case SIGTSTP: /* default action does not terminate */ case SIGTTIN: /* default action does not terminate */ case SIGTTOU: /* default action does not terminate */ /* Don't install a handler for this signal. */ break; case SIGHUP: if (sigaction(SIGHUP, &HUP_sigaction, NULL) < 0) die("sigaction(SIGHUP, &HUP_sigaction, NULL)"); break; default: /* Try to install the generic signal handler. */ if (sigaction(signal_num, &generic_sigaction, NULL) == 0) contiguous_failures = 0; else contiguous_failures++; } signal_num++; } } void open_main_log_file_and_delete_user_log_files() { DIR* user_log_DIR; struct dirent* dirent_ptr; /* This chdir() is redundant when we're called from main(). I should do some malloc()ing and sprintf()ing and get rid of this chdir(). */ if (chdir(apache_log_dir_g) < 0) die("chdir(\"%s\")", apache_log_dir_g); /* Open the appropriate main log file. */ main_log_fd_g = open(main_log_filename_g, O_RDONLY); if (main_log_fd_g < 0) die("open(\"%s/%s\", O_RDONLY)", apache_log_dir_g, main_log_filename_g); if (chdir(user_log_dir_g) < 0) die("chdir(\"%s\")", user_log_dir_g); /* Contents of the user log files mimic the main log files, so we have to delete all the user log files (of the appropriate type) at this point since we're about to start extracting lines from the main log files starting at the beginning. */ user_log_DIR = opendir(user_log_dir_g); if (user_log_DIR == NULL) die("opendir(\"%s\")", user_log_dir_g); errno = 0; dirent_ptr = readdir(user_log_DIR); while (dirent_ptr != NULL) { char* ptr = strchr(dirent_ptr->d_name, '_'); if (ptr != NULL && strcmp(++ptr, main_log_filename_g) == 0) /* Looks like a per-user log file. Delete it. */ if (remove(dirent_ptr->d_name) < 0) die("remove(\"%s\")", dirent_ptr->d_name); dirent_ptr = readdir(user_log_DIR); } if (errno != 0) die("readdir(\"%s\")", user_log_dir_g); if (closedir(user_log_DIR) < 0) die("closedir(\"%s\")", user_log_dir_g); } void process_access_log() { while (1) { char* ptr; size_t line_g_size = input_log_line(); ptr = strchr(line_g, ']'); /* ptr at end of "...[]" */ if (ptr != NULL) { ptr = strchr(ptr, '/'); /* ptr at end of ' "(GET|HEAD|POST) /' */ if (ptr != NULL) { ptr++; if (*ptr == '~' || (*ptr == '%' && *(++ptr) == '7' && (*(++ptr) == 'E' || *ptr == 'e'))) extract_log_line(++ptr, line_g_size); } } } } void process_error_log() { while (1) { char* ptr; size_t line_g_size = input_log_line(); ptr = strstr(line_g, HOME_parent_g); /* ptr at start of parent */ if (ptr != NULL) { ptr += HOME_parent_len_g + 1; /* ptr at start of username */ extract_log_line(ptr, line_g_size); } } } char* signal_name(int signal_num) { /* The return value of this function is intended to be printed out just to the right of a printed signal number. For nonportable signals, we return the empty string. If all systems defined sys_siglist[], this wouldn't be necessary. */ switch (signal_num) { case SIGABRT: return " (SIGABRT)"; case SIGALRM: return " (SIGALRM)"; case SIGBUS: return " (SIGBUS)"; case SIGCHLD: return " (SIGCHLD)"; case SIGCONT: return " (SIGCONT)"; case SIGFPE: return " (SIGFPE)"; case SIGHUP: return " (SIGHUP)"; case SIGILL: return " (SIGILL)"; case SIGINT: return " (SIGINT)"; case SIGIO: return " (SIGIO)"; case SIGKILL: return " (SIGKILL)"; case SIGPIPE: return " (SIGPIPE)"; case SIGPROF: return " (SIGPROF)"; case SIGPWR: return " (SIGPWR)"; case SIGQUIT: return " (SIGQUIT)"; case SIGSEGV: return " (SIGSEGV)"; case SIGSTOP: return " (SIGSTOP)"; case SIGTERM: return " (SIGTERM)"; case SIGTRAP: return " (SIGTRAP)"; case SIGTSTP: return " (SIGTSTP)"; case SIGTTIN: return " (SIGTTIN)"; case SIGTTOU: return " (SIGTTOU)"; case SIGURG: return " (SIGURG)"; case SIGUSR1: return " (SIGUSR1)"; case SIGUSR2: return " (SIGUSR2)"; case SIGVTALRM: return " (SIGVTALRM)"; case SIGWINCH: return " (SIGWINCH)"; default: return ""; } } void usage_error() { fputs("Usage: extract_UserDir_logs [-d] " " \n", stderr); exit(EXIT_FAILURE); } int main(int argc, char** argv) { char* last_slash_in_argv_0; pid_t fork_return; last_slash_in_argv_0 = strrchr(argv[0], '/'); if (last_slash_in_argv_0 == NULL) our_program_name_g = argv[0]; else our_program_name_g = last_slash_in_argv_0 + 1; /* Process the commandline. */ if (argc < 4) usage_error(); if (strcmp(argv[1], "-d") == 0) { /* Don't daemonize. */ if (argc < 5) usage_error(); apache_log_dir_g = argv[2]; user_log_dir_g = argv[3]; HOME_parent_g = argv[4]; if (chdir(apache_log_dir_g) < 0) die("chdir(\"%s\")", apache_log_dir_g); } else { char* our_log_filename = malloc(strlen(our_program_name_g) + sizeof(".log")); /* Daemonize. */ fork_return = fork(); if (fork_return < 0) die("fork()"); else if (fork_return != 0) /* We're the original process. Terminate as part of daemonizing. */ exit(EXIT_SUCCESS); /* Become the leader of a new session and a new process group and disconnect from the controlling terminal. */ setsid(); apache_log_dir_g = argv[1]; user_log_dir_g = argv[2]; HOME_parent_g = argv[3]; if (chdir(apache_log_dir_g) < 0) die("chdir(\"%s\")", apache_log_dir_g); /* Close stdin and stdout. */ if (fclose(stdin) < 0) die("fclose(stdin)"); if (fclose(stdout) < 0) die("fclose(stdout)"); /* Redirect stderr to our log file. */ sprintf(our_log_filename, "%s.log", our_program_name_g); if (freopen(our_log_filename, "w", stderr) < 0) die("freopen(\"%s\", \"w\", stderr)", our_log_filename); free(our_log_filename); } HOME_parent_len_g = strlen(HOME_parent_g); umask(077); install_signal_handlers(); /* Have a child. We will process the access_log and child will process the error_log. */ fork_return = fork(); if (fork_return < 0) die("fork()"); else if (fork_return != 0) { main_log_filename_g = ACCESS_LOG; fprintf(stderr, "%s(%d): I am the parent and access_log handler.\n", our_program_name_g, (int)getpid()); fprintf(stderr, "%s(%d): I am the child and error_log handler.\n", our_program_name_g, (int)fork_return); fflush(stderr); } else main_log_filename_g = ERROR_LOG; /* Wait to malloc() this until now to avoid fork()-caused copy. */ line_g = malloc(line_max_size_g); if (line_g == NULL) die("malloc(%lu)", line_max_size_g); open_main_log_file_and_delete_user_log_files(); /* Now process the appropriate log file. */ if (strcmp(main_log_filename_g, ACCESS_LOG) == 0) process_access_log(); else process_error_log(); return EXIT_SUCCESS; /* never actually reached */ }