proc_ops: Be aware of kernel API changes (#13)
The proc_ops structure is defined in <include/linux/proc_fs.h> since Linux v5.6+. In older kernels, file_operations structure is used for custom hooks in /proc file system. However, it contains some members that are unnecessary in VFS, and every time VFS expands file_operations set, /proc code comes bloated. On the other hand, not only the space, but also some operations were saved by this structure to improve its performance.
This commit is contained in:
parent
a411938b73
commit
92b646ed64
73
lkmpg.tex
73
lkmpg.tex
|
@ -739,33 +739,35 @@ So just be aware that the word ``hardware'' in our discussion can mean something
|
||||||
|
|
||||||
\section{Character Device drivers}
|
\section{Character Device drivers}
|
||||||
\label{sec:chardev}
|
\label{sec:chardev}
|
||||||
\subsection{The proc\_ops Structure}
|
\subsection{The file\_operations Structure}
|
||||||
\label{sec:proc_ops}
|
\label{sec:file_operations}
|
||||||
The \verb|proc_ops| structure is defined in \textbf{/usr/include/linux/fs.h}, and holds pointers to functions defined by the driver that perform various operations on the device.
|
The \verb|file_operations| structure is defined in \textbf{/usr/include/linux/fs.h}, and holds pointers to functions defined by the driver that perform various operations on the device.
|
||||||
Each field of the structure corresponds to the address of some function defined by the driver to handle a requested operation.
|
Each field of the structure corresponds to the address of some function defined by the driver to handle a requested operation.
|
||||||
|
|
||||||
For example, every character driver needs to define a function that reads from the device.
|
For example, every character driver needs to define a function that reads from the device.
|
||||||
The \verb|proc_ops| structure holds the address of the module's function that performs that operation.
|
The \verb|file_operations| structure holds the address of the module's function that performs that operation.
|
||||||
Here is what the definition looks like for kernel 3.0:
|
Here is what the definition looks like for kernel 5.4:
|
||||||
|
|
||||||
\begin{code}
|
\begin{code}
|
||||||
struct proc_ops {
|
struct file_operations {
|
||||||
struct module *owner;
|
struct module *owner;
|
||||||
loff_t (*llseek) (struct file *, loff_t, int);
|
loff_t (*llseek) (struct file *, loff_t, int);
|
||||||
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
|
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
|
||||||
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
|
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
|
||||||
ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
|
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
|
||||||
ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
|
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
|
||||||
|
int (*iopoll)(struct kiocb *kiocb, bool spin);
|
||||||
int (*iterate) (struct file *, struct dir_context *);
|
int (*iterate) (struct file *, struct dir_context *);
|
||||||
unsigned int (*poll) (struct file *, struct poll_table_struct *);
|
int (*iterate_shared) (struct file *, struct dir_context *);
|
||||||
|
__poll_t (*poll) (struct file *, struct poll_table_struct *);
|
||||||
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
|
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
|
||||||
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
|
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
|
||||||
int (*mmap) (struct file *, struct vm_area_struct *);
|
int (*mmap) (struct file *, struct vm_area_struct *);
|
||||||
|
unsigned long mmap_supported_flags;
|
||||||
int (*open) (struct inode *, struct file *);
|
int (*open) (struct inode *, struct file *);
|
||||||
int (*flush) (struct file *, fl_owner_t id);
|
int (*flush) (struct file *, fl_owner_t id);
|
||||||
int (*release) (struct inode *, struct file *);
|
int (*release) (struct inode *, struct file *);
|
||||||
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
|
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
|
||||||
int (*aio_fsync) (struct kiocb *, int datasync);
|
|
||||||
int (*fasync) (int, struct file *, int);
|
int (*fasync) (int, struct file *, int);
|
||||||
int (*lock) (struct file *, int, struct file_lock *);
|
int (*lock) (struct file *, int, struct file_lock *);
|
||||||
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
|
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
|
||||||
|
@ -774,27 +776,33 @@ struct proc_ops {
|
||||||
int (*flock) (struct file *, int, struct file_lock *);
|
int (*flock) (struct file *, int, struct file_lock *);
|
||||||
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
|
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
|
||||||
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
|
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
|
||||||
int (*setlease)(struct file *, long, struct file_lock **);
|
int (*setlease)(struct file *, long, struct file_lock **, void **);
|
||||||
long (*fallocate)(struct file *file, int mode, loff_t offset,
|
long (*fallocate)(struct file *file, int mode, loff_t offset,
|
||||||
loff_t len);
|
loff_t len);
|
||||||
int (*show_fdinfo)(struct seq_file *m, struct file *f);
|
void (*show_fdinfo)(struct seq_file *m, struct file *f);
|
||||||
};
|
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
|
||||||
|
loff_t, size_t, unsigned int);
|
||||||
|
loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
|
||||||
|
struct file *file_out, loff_t pos_out,
|
||||||
|
loff_t len, unsigned int remap_flags);
|
||||||
|
int (*fadvise)(struct file *, loff_t, loff_t, int);
|
||||||
|
} __randomize_layout;
|
||||||
\end{code}
|
\end{code}
|
||||||
|
|
||||||
Some operations are not implemented by a driver.
|
Some operations are not implemented by a driver.
|
||||||
For example, a driver that handles a video card will not need to read from a directory structure.
|
For example, a driver that handles a video card will not need to read from a directory structure.
|
||||||
The corresponding entries in the \verb|proc_ops| structure should be set to NULL.
|
The corresponding entries in the \verb|file_operations| structure should be set to NULL.
|
||||||
|
|
||||||
There is a gcc extension that makes assigning to this structure more convenient.
|
There is a gcc extension that makes assigning to this structure more convenient.
|
||||||
You will see it in modern drivers, and may catch you by surprise.
|
You will see it in modern drivers, and may catch you by surprise.
|
||||||
This is what the new way of assigning to the structure looks like:
|
This is what the new way of assigning to the structure looks like:
|
||||||
|
|
||||||
\begin{code}
|
\begin{code}
|
||||||
struct proc_ops fops = {
|
struct file_operations fops = {
|
||||||
proc_read: device_read,
|
read: device_read,
|
||||||
proc_write: device_write,
|
write: device_write,
|
||||||
proc_open: device_open,
|
open: device_open,
|
||||||
proc_release: device_release
|
release: device_release
|
||||||
};
|
};
|
||||||
\end{code}
|
\end{code}
|
||||||
|
|
||||||
|
@ -803,17 +811,19 @@ You should use this syntax in case someone wants to port your driver.
|
||||||
It will help with compatibility:
|
It will help with compatibility:
|
||||||
|
|
||||||
\begin{code}
|
\begin{code}
|
||||||
struct proc_ops fops = {
|
struct file_operations fops = {
|
||||||
.proc_read = device_read,
|
.read = device_read,
|
||||||
.proc_write = device_write,
|
.write = device_write,
|
||||||
.proc_open = device_open,
|
.open = device_open,
|
||||||
.proc_release = device_release
|
.release = device_release
|
||||||
};
|
};
|
||||||
\end{code}
|
\end{code}
|
||||||
|
|
||||||
The meaning is clear, and you should be aware that any member of the structure which you do not explicitly assign will be initialized to NULL by gcc.
|
The meaning is clear, and you should be aware that any member of the structure which you do not explicitly assign will be initialized to NULL by gcc.
|
||||||
|
|
||||||
An instance of struct proc\_ops containing pointers to functions that are used to implement read, write, open, \ldots{} syscalls is commonly named fops.
|
An instance of struct \verb|file_operations| containing pointers to functions that are used to implement read, write, open, \ldots{} syscalls is commonly named fops.
|
||||||
|
|
||||||
|
Sin Linux v5.6, the \verb|proc_ops| structure was introduced to replace the use of the \verb|file_operations| structure when registering proc handlers.
|
||||||
|
|
||||||
\subsection{The file structure}
|
\subsection{The file structure}
|
||||||
\label{sec:file_struct}
|
\label{sec:file_struct}
|
||||||
|
@ -843,10 +853,10 @@ This is synonymous with assigning it a major number during the module's initiali
|
||||||
You do this by using the \verb|register_chrdev| function, defined by linux/fs.h.
|
You do this by using the \verb|register_chrdev| function, defined by linux/fs.h.
|
||||||
|
|
||||||
\begin{code}
|
\begin{code}
|
||||||
int register_chrdev(unsigned int major, const char *name, struct proc_ops *fops);
|
int register_chrdev(unsigned int major, const char *name, struct file_operations *fops);
|
||||||
\end{code}
|
\end{code}
|
||||||
|
|
||||||
where unsigned int major is the major number you want to request, \emph{const char *name} is the name of the device as it will appear in \textbf{/proc/devices} and \emph{struct proc\_ops *fops} is a pointer to the proc\_ops table for your driver.
|
where unsigned int major is the major number you want to request, \emph{const char *name} is the name of the device as it will appear in \textbf{/proc/devices} and \emph{struct file\_operations *fops} is a pointer to the \verb|file_operations| table for your driver.
|
||||||
A negative return value means the registration failed. Note that we didn't pass the minor number to register\_chrdev.
|
A negative return value means the registration failed. Note that we didn't pass the minor number to register\_chrdev.
|
||||||
That is because the kernel doesn't care about the minor number; only our driver uses it.
|
That is because the kernel doesn't care about the minor number; only our driver uses it.
|
||||||
|
|
||||||
|
@ -951,6 +961,13 @@ HelloWorld!
|
||||||
|
|
||||||
\samplec{examples/procfs1.c}
|
\samplec{examples/procfs1.c}
|
||||||
|
|
||||||
|
\subsection{The proc\_ops Structure}
|
||||||
|
\label{sec:proc_ops}
|
||||||
|
The \verb|proc_ops| structure is defined in \textbf{/usr/include/linux/proc\_fs.h} in Linux v5.6+.
|
||||||
|
In older kernels, it used \verb|file_operations| for custom hooks in \textbf{/proc} file system, but it contains some members that are unnecessary in VFS, and every time VFS expands \verb|file_operations| set, \textbf{/proc} code comes bloated.
|
||||||
|
On the other hand, not only the space, but also some operations were saved by this structure to improve its performance.
|
||||||
|
For example, the file which never disappears in \textbf{/proc} can set the \textbf{proc\_flag} as \textbf{PROC\_ENTRY\_PERMANENT} to save 2 atomic ops, 1 allocation, 1 free in per open/read/close sequence.
|
||||||
|
|
||||||
\subsection{Read and Write a /proc File}
|
\subsection{Read and Write a /proc File}
|
||||||
\label{sec:read_write_procfs}
|
\label{sec:read_write_procfs}
|
||||||
We have seen a very simple example for a /proc file where we only read the file /proc/helloworld.
|
We have seen a very simple example for a /proc file where we only read the file /proc/helloworld.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user