Add a hook on syscall using Kprobes under x86 (#260)

For x86 architecture, the system call table cannot be used to invoke
a system call after commit 1e3ad78 since v6.9. This commit has been
backported to long term stable kernels, like v5.15.154+, v6.1.85+,
v6.6.26+ and v6.8.5+[1]. In this case, thanks to Kprobes, a hook can be
used instead on the system call entry to intercept the system call.

[1] https://stackoverflow.com/a/78607015

Co-authored-by: Hao Dong <hao.dong.work@outlook.com>
This commit is contained in:
Hao.Dong 2024-07-05 07:40:53 +08:00 committed by GitHub
parent 032d1b8c0c
commit 950408472e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 60 additions and 3 deletions

View File

@ -43,6 +43,14 @@
#if defined(CONFIG_KPROBES) #if defined(CONFIG_KPROBES)
#define HAVE_KPROBES 1 #define HAVE_KPROBES 1
#if defined(CONFIG_X86_64)
/* If you have tried to use the syscall table to intercept syscalls and it
* doesn't work, you can try to use Kprobes to intercept syscalls.
* Set USE_KPROBES_PRE_HANDLER_BEFORE_SYSCALL to 1 to register a pre-handler
* before the syscall.
*/
#define USE_KPROBES_PRE_HANDLER_BEFORE_SYSCALL 0
#endif
#include <linux/kprobes.h> #include <linux/kprobes.h>
#else #else
#define HAVE_PARAM 1 #define HAVE_PARAM 1
@ -58,12 +66,37 @@ module_param(sym, ulong, 0644);
#endif /* Version < v5.7 */ #endif /* Version < v5.7 */
static unsigned long **sys_call_table_stolen;
/* UID we want to spy on - will be filled from the command line. */ /* UID we want to spy on - will be filled from the command line. */
static uid_t uid = -1; static uid_t uid = -1;
module_param(uid, int, 0644); module_param(uid, int, 0644);
#if USE_KPROBES_PRE_HANDLER_BEFORE_SYSCALL
/* syscall_sym is the symbol name of the syscall to spy on. The default is
* "__x64_sys_openat", which can be changed by the module parameter. You can
* look up the symbol name of a syscall in /proc/kallsyms.
*/
static char *syscall_sym = "__x64_sys_openat";
module_param(syscall_sym, charp, 0644);
static int sys_call_kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
if (__kuid_val(current_uid()) != uid) {
return 0;
}
pr_info("%s called by %d\n", syscall_sym, uid);
return 0;
}
static struct kprobe syscall_kprobe = {
.symbol_name = "__x64_sys_openat",
.pre_handler = sys_call_kprobe_pre_handler,
};
#else
static unsigned long **sys_call_table_stolen;
/* A pointer to the original system call. The reason we keep this, rather /* A pointer to the original system call. The reason we keep this, rather
* than call the original function (sys_openat), is because somebody else * than call the original function (sys_openat), is because somebody else
* might have replaced the system call before us. Note that this is not * might have replaced the system call before us. Note that this is not
@ -202,9 +235,23 @@ static void disable_write_protection(void)
clear_bit(16, &cr0); clear_bit(16, &cr0);
__write_cr0(cr0); __write_cr0(cr0);
} }
#endif
static int __init syscall_steal_start(void) static int __init syscall_steal_start(void)
{ {
#if USE_KPROBES_PRE_HANDLER_BEFORE_SYSCALL
int err;
/* use symbol name from the module parameter */
syscall_kprobe.symbol_name = syscall_sym;
err = register_kprobe(&syscall_kprobe);
if (err) {
pr_err("register_kprobe() on %s failed: %d\n", syscall_sym, err);
pr_err("Please check the symbol name from 'syscall_sym' parameter.\n");
return err;
}
#else
if (!(sys_call_table_stolen = acquire_sys_call_table())) if (!(sys_call_table_stolen = acquire_sys_call_table()))
return -1; return -1;
@ -218,13 +265,17 @@ static int __init syscall_steal_start(void)
enable_write_protection(); enable_write_protection();
pr_info("Spying on UID:%d\n", uid); #endif
pr_info("Spying on UID:%d\n", uid);
return 0; return 0;
} }
static void __exit syscall_steal_end(void) static void __exit syscall_steal_end(void)
{ {
#if USE_KPROBES_PRE_HANDLER_BEFORE_SYSCALL
unregister_kprobe(&syscall_kprobe);
#else
if (!sys_call_table_stolen) if (!sys_call_table_stolen)
return; return;
@ -239,6 +290,7 @@ static void __exit syscall_steal_end(void)
disable_write_protection(); disable_write_protection();
sys_call_table_stolen[__NR_openat] = (unsigned long *)original_call; sys_call_table_stolen[__NR_openat] = (unsigned long *)original_call;
enable_write_protection(); enable_write_protection();
#endif
msleep(2000); msleep(2000);
} }

View File

@ -1566,6 +1566,11 @@ At first glance, it appears we could solve this particular problem by checking i
When A is removed, it sees that the system call was changed to \cpp|B_openat| so that it is no longer pointing to \cpp|A_openat|, so it will not restore it to \cpp|sys_openat| before it is removed from memory. When A is removed, it sees that the system call was changed to \cpp|B_openat| so that it is no longer pointing to \cpp|A_openat|, so it will not restore it to \cpp|sys_openat| before it is removed from memory.
Unfortunately, \cpp|B_openat| will still try to call \cpp|A_openat| which is no longer there, so that even without removing B the system would crash. Unfortunately, \cpp|B_openat| will still try to call \cpp|A_openat| which is no longer there, so that even without removing B the system would crash.
For x86 architecture, the system call table cannot be used to invoke a system call after commit
\href{https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1e3ad78334a69b36e107232e337f9d693dcc9df2}{1e3ad78} since v6.9.
This commit has been backported to long term stable kernels, like v5.15.154+, v6.1.85+, v6.6.26+ and v6.8.5+, see this \href{https://stackoverflow.com/a/78607015}{answer} for more details.
In this case, thanks to Kprobes, a hook can be used instead on the system call entry to intercept the system call.
Note that all the related problems make syscall stealing unfeasible for production use. Note that all the related problems make syscall stealing unfeasible for production use.
In order to keep people from doing potential harmful things \cpp|sys_call_table| is no longer exported. In order to keep people from doing potential harmful things \cpp|sys_call_table| is no longer exported.
This means, if you want to do something more than a mere dry run of this example, you will have to patch your current kernel in order to have \cpp|sys_call_table| exported. This means, if you want to do something more than a mere dry run of this example, you will have to patch your current kernel in order to have \cpp|sys_call_table| exported.