diff --git a/examples/syscall-steal.c b/examples/syscall-steal.c index 726bcfc..e885c84 100644 --- a/examples/syscall-steal.c +++ b/examples/syscall-steal.c @@ -43,6 +43,14 @@ #if defined(CONFIG_KPROBES) #define HAVE_KPROBES 1 +#if defined(CONFIG_X86_64) +/* If you have tried to use the syscall table to intercept syscalls and it + * doesn't work, you can try to use Kprobes to intercept syscalls. + * Set USE_KPROBES_PRE_HANDLER_BEFORE_SYSCALL to 1 to register a pre-handler + * before the syscall. + */ +#define USE_KPROBES_PRE_HANDLER_BEFORE_SYSCALL 0 +#endif #include #else #define HAVE_PARAM 1 @@ -58,12 +66,37 @@ module_param(sym, ulong, 0644); #endif /* Version < v5.7 */ -static unsigned long **sys_call_table_stolen; - /* UID we want to spy on - will be filled from the command line. */ static uid_t uid = -1; module_param(uid, int, 0644); +#if USE_KPROBES_PRE_HANDLER_BEFORE_SYSCALL + +/* syscall_sym is the symbol name of the syscall to spy on. The default is + * "__x64_sys_openat", which can be changed by the module parameter. You can + * look up the symbol name of a syscall in /proc/kallsyms. + */ +static char *syscall_sym = "__x64_sys_openat"; +module_param(syscall_sym, charp, 0644); + +static int sys_call_kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + if (__kuid_val(current_uid()) != uid) { + return 0; + } + + pr_info("%s called by %d\n", syscall_sym, uid); + return 0; +} + +static struct kprobe syscall_kprobe = { + .symbol_name = "__x64_sys_openat", + .pre_handler = sys_call_kprobe_pre_handler, +}; +#else + +static unsigned long **sys_call_table_stolen; + /* A pointer to the original system call. The reason we keep this, rather * than call the original function (sys_openat), is because somebody else * might have replaced the system call before us. Note that this is not @@ -202,9 +235,23 @@ static void disable_write_protection(void) clear_bit(16, &cr0); __write_cr0(cr0); } +#endif static int __init syscall_steal_start(void) { +#if USE_KPROBES_PRE_HANDLER_BEFORE_SYSCALL + + int err; + /* use symbol name from the module parameter */ + syscall_kprobe.symbol_name = syscall_sym; + err = register_kprobe(&syscall_kprobe); + if (err) { + pr_err("register_kprobe() on %s failed: %d\n", syscall_sym, err); + pr_err("Please check the symbol name from 'syscall_sym' parameter.\n"); + return err; + } + +#else if (!(sys_call_table_stolen = acquire_sys_call_table())) return -1; @@ -218,13 +265,17 @@ static int __init syscall_steal_start(void) enable_write_protection(); - pr_info("Spying on UID:%d\n", uid); +#endif + pr_info("Spying on UID:%d\n", uid); return 0; } static void __exit syscall_steal_end(void) { +#if USE_KPROBES_PRE_HANDLER_BEFORE_SYSCALL + unregister_kprobe(&syscall_kprobe); +#else if (!sys_call_table_stolen) return; @@ -239,6 +290,7 @@ static void __exit syscall_steal_end(void) disable_write_protection(); sys_call_table_stolen[__NR_openat] = (unsigned long *)original_call; enable_write_protection(); +#endif msleep(2000); } diff --git a/lkmpg.tex b/lkmpg.tex index a14c7f3..1f83fc8 100644 --- a/lkmpg.tex +++ b/lkmpg.tex @@ -1566,6 +1566,11 @@ At first glance, it appears we could solve this particular problem by checking i When A is removed, it sees that the system call was changed to \cpp|B_openat| so that it is no longer pointing to \cpp|A_openat|, so it will not restore it to \cpp|sys_openat| before it is removed from memory. Unfortunately, \cpp|B_openat| will still try to call \cpp|A_openat| which is no longer there, so that even without removing B the system would crash. +For x86 architecture, the system call table cannot be used to invoke a system call after commit +\href{https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1e3ad78334a69b36e107232e337f9d693dcc9df2}{1e3ad78} since v6.9. +This commit has been backported to long term stable kernels, like v5.15.154+, v6.1.85+, v6.6.26+ and v6.8.5+, see this \href{https://stackoverflow.com/a/78607015}{answer} for more details. +In this case, thanks to Kprobes, a hook can be used instead on the system call entry to intercept the system call. + Note that all the related problems make syscall stealing unfeasible for production use. In order to keep people from doing potential harmful things \cpp|sys_call_table| is no longer exported. This means, if you want to do something more than a mere dry run of this example, you will have to patch your current kernel in order to have \cpp|sys_call_table| exported.