2018-11-13 09:31:52 +00:00
|
|
|
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
|
|
|
#
|
|
|
|
# system call numbers and entry vectors for alpha
|
|
|
|
#
|
|
|
|
# The format is:
|
|
|
|
# <number> <abi> <name> <entry point>
|
|
|
|
#
|
|
|
|
# The <abi> is always "common" for this file
|
|
|
|
#
|
|
|
|
0 common osf_syscall alpha_syscall_zero
|
|
|
|
1 common exit sys_exit
|
|
|
|
2 common fork alpha_fork
|
|
|
|
3 common read sys_read
|
|
|
|
4 common write sys_write
|
|
|
|
5 common osf_old_open sys_ni_syscall
|
|
|
|
6 common close sys_close
|
|
|
|
7 common osf_wait4 sys_osf_wait4
|
|
|
|
8 common osf_old_creat sys_ni_syscall
|
|
|
|
9 common link sys_link
|
|
|
|
10 common unlink sys_unlink
|
|
|
|
11 common osf_execve sys_ni_syscall
|
|
|
|
12 common chdir sys_chdir
|
|
|
|
13 common fchdir sys_fchdir
|
|
|
|
14 common mknod sys_mknod
|
|
|
|
15 common chmod sys_chmod
|
|
|
|
16 common chown sys_chown
|
|
|
|
17 common brk sys_osf_brk
|
|
|
|
18 common osf_getfsstat sys_ni_syscall
|
|
|
|
19 common lseek sys_lseek
|
|
|
|
20 common getxpid sys_getxpid
|
|
|
|
21 common osf_mount sys_osf_mount
|
alpha: update syscall macro definitions
Other architectures commonly use __NR_umount2 for sys_umount,
only ia64 and alpha use __NR_umount here. In order to synchronize
the generated tables, use umount2 like everyone else, and add back
the old name from asm/unistd.h for compatibility.
For shmat, alpha uses the osf_shmat name, we can do the same thing
here, which means we don't have to add an entry in the __IGNORE
list now that shmat is mandatory everywhere
alarm, creat, pause, time, and utime are optional everywhere
these days, no need to list them here any more.
I considered also adding the regular versions of the get*id system
calls that have different names and calling conventions on alpha,
which would further help unify the syscall ABI, but for now
I decided against that.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-12-30 14:16:48 +00:00
|
|
|
22 common umount2 sys_umount
|
2018-11-13 09:31:52 +00:00
|
|
|
23 common setuid sys_setuid
|
|
|
|
24 common getxuid sys_getxuid
|
|
|
|
25 common exec_with_loader sys_ni_syscall
|
|
|
|
26 common ptrace sys_ptrace
|
|
|
|
27 common osf_nrecvmsg sys_ni_syscall
|
|
|
|
28 common osf_nsendmsg sys_ni_syscall
|
|
|
|
29 common osf_nrecvfrom sys_ni_syscall
|
|
|
|
30 common osf_naccept sys_ni_syscall
|
|
|
|
31 common osf_ngetpeername sys_ni_syscall
|
|
|
|
32 common osf_ngetsockname sys_ni_syscall
|
|
|
|
33 common access sys_access
|
|
|
|
34 common osf_chflags sys_ni_syscall
|
|
|
|
35 common osf_fchflags sys_ni_syscall
|
|
|
|
36 common sync sys_sync
|
|
|
|
37 common kill sys_kill
|
|
|
|
38 common osf_old_stat sys_ni_syscall
|
|
|
|
39 common setpgid sys_setpgid
|
|
|
|
40 common osf_old_lstat sys_ni_syscall
|
|
|
|
41 common dup sys_dup
|
|
|
|
42 common pipe sys_alpha_pipe
|
|
|
|
43 common osf_set_program_attributes sys_osf_set_program_attributes
|
|
|
|
44 common osf_profil sys_ni_syscall
|
|
|
|
45 common open sys_open
|
|
|
|
46 common osf_old_sigaction sys_ni_syscall
|
|
|
|
47 common getxgid sys_getxgid
|
|
|
|
48 common osf_sigprocmask sys_osf_sigprocmask
|
|
|
|
49 common osf_getlogin sys_ni_syscall
|
|
|
|
50 common osf_setlogin sys_ni_syscall
|
|
|
|
51 common acct sys_acct
|
|
|
|
52 common sigpending sys_sigpending
|
|
|
|
54 common ioctl sys_ioctl
|
|
|
|
55 common osf_reboot sys_ni_syscall
|
|
|
|
56 common osf_revoke sys_ni_syscall
|
|
|
|
57 common symlink sys_symlink
|
|
|
|
58 common readlink sys_readlink
|
|
|
|
59 common execve sys_execve
|
|
|
|
60 common umask sys_umask
|
|
|
|
61 common chroot sys_chroot
|
|
|
|
62 common osf_old_fstat sys_ni_syscall
|
|
|
|
63 common getpgrp sys_getpgrp
|
|
|
|
64 common getpagesize sys_getpagesize
|
|
|
|
65 common osf_mremap sys_ni_syscall
|
|
|
|
66 common vfork alpha_vfork
|
|
|
|
67 common stat sys_newstat
|
|
|
|
68 common lstat sys_newlstat
|
|
|
|
69 common osf_sbrk sys_ni_syscall
|
|
|
|
70 common osf_sstk sys_ni_syscall
|
|
|
|
71 common mmap sys_osf_mmap
|
|
|
|
72 common osf_old_vadvise sys_ni_syscall
|
|
|
|
73 common munmap sys_munmap
|
|
|
|
74 common mprotect sys_mprotect
|
|
|
|
75 common madvise sys_madvise
|
|
|
|
76 common vhangup sys_vhangup
|
|
|
|
77 common osf_kmodcall sys_ni_syscall
|
|
|
|
78 common osf_mincore sys_ni_syscall
|
|
|
|
79 common getgroups sys_getgroups
|
|
|
|
80 common setgroups sys_setgroups
|
|
|
|
81 common osf_old_getpgrp sys_ni_syscall
|
|
|
|
82 common setpgrp sys_setpgid
|
2019-10-25 14:59:39 +00:00
|
|
|
83 common osf_setitimer compat_sys_setitimer
|
2018-11-13 09:31:52 +00:00
|
|
|
84 common osf_old_wait sys_ni_syscall
|
|
|
|
85 common osf_table sys_ni_syscall
|
2019-10-25 14:59:39 +00:00
|
|
|
86 common osf_getitimer compat_sys_getitimer
|
2018-11-13 09:31:52 +00:00
|
|
|
87 common gethostname sys_gethostname
|
|
|
|
88 common sethostname sys_sethostname
|
|
|
|
89 common getdtablesize sys_getdtablesize
|
|
|
|
90 common dup2 sys_dup2
|
|
|
|
91 common fstat sys_newfstat
|
|
|
|
92 common fcntl sys_fcntl
|
|
|
|
93 common osf_select sys_osf_select
|
|
|
|
94 common poll sys_poll
|
|
|
|
95 common fsync sys_fsync
|
|
|
|
96 common setpriority sys_setpriority
|
|
|
|
97 common socket sys_socket
|
|
|
|
98 common connect sys_connect
|
|
|
|
99 common accept sys_accept
|
|
|
|
100 common getpriority sys_osf_getpriority
|
|
|
|
101 common send sys_send
|
|
|
|
102 common recv sys_recv
|
|
|
|
103 common sigreturn sys_sigreturn
|
|
|
|
104 common bind sys_bind
|
|
|
|
105 common setsockopt sys_setsockopt
|
|
|
|
106 common listen sys_listen
|
|
|
|
107 common osf_plock sys_ni_syscall
|
|
|
|
108 common osf_old_sigvec sys_ni_syscall
|
|
|
|
109 common osf_old_sigblock sys_ni_syscall
|
|
|
|
110 common osf_old_sigsetmask sys_ni_syscall
|
|
|
|
111 common sigsuspend sys_sigsuspend
|
|
|
|
112 common osf_sigstack sys_osf_sigstack
|
|
|
|
113 common recvmsg sys_recvmsg
|
|
|
|
114 common sendmsg sys_sendmsg
|
|
|
|
115 common osf_old_vtrace sys_ni_syscall
|
|
|
|
116 common osf_gettimeofday sys_osf_gettimeofday
|
|
|
|
117 common osf_getrusage sys_osf_getrusage
|
|
|
|
118 common getsockopt sys_getsockopt
|
|
|
|
120 common readv sys_osf_readv
|
|
|
|
121 common writev sys_osf_writev
|
|
|
|
122 common osf_settimeofday sys_osf_settimeofday
|
|
|
|
123 common fchown sys_fchown
|
|
|
|
124 common fchmod sys_fchmod
|
|
|
|
125 common recvfrom sys_recvfrom
|
|
|
|
126 common setreuid sys_setreuid
|
|
|
|
127 common setregid sys_setregid
|
|
|
|
128 common rename sys_rename
|
|
|
|
129 common truncate sys_truncate
|
|
|
|
130 common ftruncate sys_ftruncate
|
|
|
|
131 common flock sys_flock
|
|
|
|
132 common setgid sys_setgid
|
|
|
|
133 common sendto sys_sendto
|
|
|
|
134 common shutdown sys_shutdown
|
|
|
|
135 common socketpair sys_socketpair
|
|
|
|
136 common mkdir sys_mkdir
|
|
|
|
137 common rmdir sys_rmdir
|
|
|
|
138 common osf_utimes sys_osf_utimes
|
|
|
|
139 common osf_old_sigreturn sys_ni_syscall
|
|
|
|
140 common osf_adjtime sys_ni_syscall
|
|
|
|
141 common getpeername sys_getpeername
|
|
|
|
142 common osf_gethostid sys_ni_syscall
|
|
|
|
143 common osf_sethostid sys_ni_syscall
|
|
|
|
144 common getrlimit sys_getrlimit
|
|
|
|
145 common setrlimit sys_setrlimit
|
|
|
|
146 common osf_old_killpg sys_ni_syscall
|
|
|
|
147 common setsid sys_setsid
|
|
|
|
148 common quotactl sys_quotactl
|
|
|
|
149 common osf_oldquota sys_ni_syscall
|
|
|
|
150 common getsockname sys_getsockname
|
|
|
|
153 common osf_pid_block sys_ni_syscall
|
|
|
|
154 common osf_pid_unblock sys_ni_syscall
|
|
|
|
156 common sigaction sys_osf_sigaction
|
|
|
|
157 common osf_sigwaitprim sys_ni_syscall
|
|
|
|
158 common osf_nfssvc sys_ni_syscall
|
|
|
|
159 common osf_getdirentries sys_osf_getdirentries
|
|
|
|
160 common osf_statfs sys_osf_statfs
|
|
|
|
161 common osf_fstatfs sys_osf_fstatfs
|
|
|
|
163 common osf_asynch_daemon sys_ni_syscall
|
|
|
|
164 common osf_getfh sys_ni_syscall
|
|
|
|
165 common osf_getdomainname sys_osf_getdomainname
|
|
|
|
166 common setdomainname sys_setdomainname
|
|
|
|
169 common osf_exportfs sys_ni_syscall
|
|
|
|
181 common osf_alt_plock sys_ni_syscall
|
|
|
|
184 common osf_getmnt sys_ni_syscall
|
|
|
|
187 common osf_alt_sigpending sys_ni_syscall
|
|
|
|
188 common osf_alt_setsid sys_ni_syscall
|
|
|
|
199 common osf_swapon sys_swapon
|
ipc: rename old-style shmctl/semctl/msgctl syscalls
The behavior of these system calls is slightly different between
architectures, as determined by the CONFIG_ARCH_WANT_IPC_PARSE_VERSION
symbol. Most architectures that implement the split IPC syscalls don't set
that symbol and only get the modern version, but alpha, arm, microblaze,
mips-n32, mips-n64 and xtensa expect the caller to pass the IPC_64 flag.
For the architectures that so far only implement sys_ipc(), i.e. m68k,
mips-o32, powerpc, s390, sh, sparc, and x86-32, we want the new behavior
when adding the split syscalls, so we need to distinguish between the
two groups of architectures.
The method I picked for this distinction is to have a separate system call
entry point: sys_old_*ctl() now uses ipc_parse_version, while sys_*ctl()
does not. The system call tables of the five architectures are changed
accordingly.
As an additional benefit, we no longer need the configuration specific
definition for ipc_parse_version(), it always does the same thing now,
but simply won't get called on architectures with the modern interface.
A small downside is that on architectures that do set
ARCH_WANT_IPC_PARSE_VERSION, we now have an extra set of entry points
that are never called. They only add a few bytes of bloat, so it seems
better to keep them compared to adding yet another Kconfig symbol.
I considered adding new syscall numbers for the IPC_64 variants for
consistency, but decided against that for now.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-12-31 21:22:40 +00:00
|
|
|
200 common msgctl sys_old_msgctl
|
2018-11-13 09:31:52 +00:00
|
|
|
201 common msgget sys_msgget
|
|
|
|
202 common msgrcv sys_msgrcv
|
|
|
|
203 common msgsnd sys_msgsnd
|
ipc: rename old-style shmctl/semctl/msgctl syscalls
The behavior of these system calls is slightly different between
architectures, as determined by the CONFIG_ARCH_WANT_IPC_PARSE_VERSION
symbol. Most architectures that implement the split IPC syscalls don't set
that symbol and only get the modern version, but alpha, arm, microblaze,
mips-n32, mips-n64 and xtensa expect the caller to pass the IPC_64 flag.
For the architectures that so far only implement sys_ipc(), i.e. m68k,
mips-o32, powerpc, s390, sh, sparc, and x86-32, we want the new behavior
when adding the split syscalls, so we need to distinguish between the
two groups of architectures.
The method I picked for this distinction is to have a separate system call
entry point: sys_old_*ctl() now uses ipc_parse_version, while sys_*ctl()
does not. The system call tables of the five architectures are changed
accordingly.
As an additional benefit, we no longer need the configuration specific
definition for ipc_parse_version(), it always does the same thing now,
but simply won't get called on architectures with the modern interface.
A small downside is that on architectures that do set
ARCH_WANT_IPC_PARSE_VERSION, we now have an extra set of entry points
that are never called. They only add a few bytes of bloat, so it seems
better to keep them compared to adding yet another Kconfig symbol.
I considered adding new syscall numbers for the IPC_64 variants for
consistency, but decided against that for now.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-12-31 21:22:40 +00:00
|
|
|
204 common semctl sys_old_semctl
|
2018-11-13 09:31:52 +00:00
|
|
|
205 common semget sys_semget
|
|
|
|
206 common semop sys_semop
|
|
|
|
207 common osf_utsname sys_osf_utsname
|
|
|
|
208 common lchown sys_lchown
|
alpha: update syscall macro definitions
Other architectures commonly use __NR_umount2 for sys_umount,
only ia64 and alpha use __NR_umount here. In order to synchronize
the generated tables, use umount2 like everyone else, and add back
the old name from asm/unistd.h for compatibility.
For shmat, alpha uses the osf_shmat name, we can do the same thing
here, which means we don't have to add an entry in the __IGNORE
list now that shmat is mandatory everywhere
alarm, creat, pause, time, and utime are optional everywhere
these days, no need to list them here any more.
I considered also adding the regular versions of the get*id system
calls that have different names and calling conventions on alpha,
which would further help unify the syscall ABI, but for now
I decided against that.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-12-30 14:16:48 +00:00
|
|
|
209 common shmat sys_shmat
|
ipc: rename old-style shmctl/semctl/msgctl syscalls
The behavior of these system calls is slightly different between
architectures, as determined by the CONFIG_ARCH_WANT_IPC_PARSE_VERSION
symbol. Most architectures that implement the split IPC syscalls don't set
that symbol and only get the modern version, but alpha, arm, microblaze,
mips-n32, mips-n64 and xtensa expect the caller to pass the IPC_64 flag.
For the architectures that so far only implement sys_ipc(), i.e. m68k,
mips-o32, powerpc, s390, sh, sparc, and x86-32, we want the new behavior
when adding the split syscalls, so we need to distinguish between the
two groups of architectures.
The method I picked for this distinction is to have a separate system call
entry point: sys_old_*ctl() now uses ipc_parse_version, while sys_*ctl()
does not. The system call tables of the five architectures are changed
accordingly.
As an additional benefit, we no longer need the configuration specific
definition for ipc_parse_version(), it always does the same thing now,
but simply won't get called on architectures with the modern interface.
A small downside is that on architectures that do set
ARCH_WANT_IPC_PARSE_VERSION, we now have an extra set of entry points
that are never called. They only add a few bytes of bloat, so it seems
better to keep them compared to adding yet another Kconfig symbol.
I considered adding new syscall numbers for the IPC_64 variants for
consistency, but decided against that for now.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2018-12-31 21:22:40 +00:00
|
|
|
210 common shmctl sys_old_shmctl
|
2018-11-13 09:31:52 +00:00
|
|
|
211 common shmdt sys_shmdt
|
|
|
|
212 common shmget sys_shmget
|
|
|
|
213 common osf_mvalid sys_ni_syscall
|
|
|
|
214 common osf_getaddressconf sys_ni_syscall
|
|
|
|
215 common osf_msleep sys_ni_syscall
|
|
|
|
216 common osf_mwakeup sys_ni_syscall
|
|
|
|
217 common msync sys_msync
|
|
|
|
218 common osf_signal sys_ni_syscall
|
|
|
|
219 common osf_utc_gettime sys_ni_syscall
|
|
|
|
220 common osf_utc_adjtime sys_ni_syscall
|
|
|
|
222 common osf_security sys_ni_syscall
|
|
|
|
223 common osf_kloadcall sys_ni_syscall
|
|
|
|
224 common osf_stat sys_osf_stat
|
|
|
|
225 common osf_lstat sys_osf_lstat
|
|
|
|
226 common osf_fstat sys_osf_fstat
|
|
|
|
227 common osf_statfs64 sys_osf_statfs64
|
|
|
|
228 common osf_fstatfs64 sys_osf_fstatfs64
|
|
|
|
233 common getpgid sys_getpgid
|
|
|
|
234 common getsid sys_getsid
|
|
|
|
235 common sigaltstack sys_sigaltstack
|
|
|
|
236 common osf_waitid sys_ni_syscall
|
|
|
|
237 common osf_priocntlset sys_ni_syscall
|
|
|
|
238 common osf_sigsendset sys_ni_syscall
|
|
|
|
239 common osf_set_speculative sys_ni_syscall
|
|
|
|
240 common osf_msfs_syscall sys_ni_syscall
|
|
|
|
241 common osf_sysinfo sys_osf_sysinfo
|
|
|
|
242 common osf_uadmin sys_ni_syscall
|
|
|
|
243 common osf_fuser sys_ni_syscall
|
|
|
|
244 common osf_proplist_syscall sys_osf_proplist_syscall
|
|
|
|
245 common osf_ntp_adjtime sys_ni_syscall
|
|
|
|
246 common osf_ntp_gettime sys_ni_syscall
|
|
|
|
247 common osf_pathconf sys_ni_syscall
|
|
|
|
248 common osf_fpathconf sys_ni_syscall
|
|
|
|
250 common osf_uswitch sys_ni_syscall
|
|
|
|
251 common osf_usleep_thread sys_osf_usleep_thread
|
|
|
|
252 common osf_audcntl sys_ni_syscall
|
|
|
|
253 common osf_audgen sys_ni_syscall
|
|
|
|
254 common sysfs sys_sysfs
|
|
|
|
255 common osf_subsys_info sys_ni_syscall
|
|
|
|
256 common osf_getsysinfo sys_osf_getsysinfo
|
|
|
|
257 common osf_setsysinfo sys_osf_setsysinfo
|
|
|
|
258 common osf_afs_syscall sys_ni_syscall
|
|
|
|
259 common osf_swapctl sys_ni_syscall
|
|
|
|
260 common osf_memcntl sys_ni_syscall
|
|
|
|
261 common osf_fdatasync sys_ni_syscall
|
2021-06-29 20:11:44 +00:00
|
|
|
300 common bdflush sys_ni_syscall
|
2018-11-13 09:31:52 +00:00
|
|
|
301 common sethae sys_sethae
|
|
|
|
302 common mount sys_mount
|
|
|
|
303 common old_adjtimex sys_old_adjtimex
|
|
|
|
304 common swapoff sys_swapoff
|
|
|
|
305 common getdents sys_getdents
|
|
|
|
306 common create_module sys_ni_syscall
|
|
|
|
307 common init_module sys_init_module
|
|
|
|
308 common delete_module sys_delete_module
|
|
|
|
309 common get_kernel_syms sys_ni_syscall
|
|
|
|
310 common syslog sys_syslog
|
|
|
|
311 common reboot sys_reboot
|
|
|
|
312 common clone alpha_clone
|
|
|
|
313 common uselib sys_uselib
|
|
|
|
314 common mlock sys_mlock
|
|
|
|
315 common munlock sys_munlock
|
|
|
|
316 common mlockall sys_mlockall
|
|
|
|
317 common munlockall sys_munlockall
|
|
|
|
318 common sysinfo sys_sysinfo
|
2020-08-15 00:31:07 +00:00
|
|
|
319 common _sysctl sys_ni_syscall
|
2018-11-13 09:31:52 +00:00
|
|
|
# 320 was sys_idle
|
|
|
|
321 common oldumount sys_oldumount
|
|
|
|
322 common swapon sys_swapon
|
|
|
|
323 common times sys_times
|
|
|
|
324 common personality sys_personality
|
|
|
|
325 common setfsuid sys_setfsuid
|
|
|
|
326 common setfsgid sys_setfsgid
|
|
|
|
327 common ustat sys_ustat
|
|
|
|
328 common statfs sys_statfs
|
|
|
|
329 common fstatfs sys_fstatfs
|
|
|
|
330 common sched_setparam sys_sched_setparam
|
|
|
|
331 common sched_getparam sys_sched_getparam
|
|
|
|
332 common sched_setscheduler sys_sched_setscheduler
|
|
|
|
333 common sched_getscheduler sys_sched_getscheduler
|
|
|
|
334 common sched_yield sys_sched_yield
|
|
|
|
335 common sched_get_priority_max sys_sched_get_priority_max
|
|
|
|
336 common sched_get_priority_min sys_sched_get_priority_min
|
|
|
|
337 common sched_rr_get_interval sys_sched_rr_get_interval
|
|
|
|
338 common afs_syscall sys_ni_syscall
|
|
|
|
339 common uname sys_newuname
|
|
|
|
340 common nanosleep sys_nanosleep
|
|
|
|
341 common mremap sys_mremap
|
|
|
|
342 common nfsservctl sys_ni_syscall
|
|
|
|
343 common setresuid sys_setresuid
|
|
|
|
344 common getresuid sys_getresuid
|
|
|
|
345 common pciconfig_read sys_pciconfig_read
|
|
|
|
346 common pciconfig_write sys_pciconfig_write
|
|
|
|
347 common query_module sys_ni_syscall
|
|
|
|
348 common prctl sys_prctl
|
|
|
|
349 common pread64 sys_pread64
|
|
|
|
350 common pwrite64 sys_pwrite64
|
|
|
|
351 common rt_sigreturn sys_rt_sigreturn
|
|
|
|
352 common rt_sigaction sys_rt_sigaction
|
|
|
|
353 common rt_sigprocmask sys_rt_sigprocmask
|
|
|
|
354 common rt_sigpending sys_rt_sigpending
|
|
|
|
355 common rt_sigtimedwait sys_rt_sigtimedwait
|
|
|
|
356 common rt_sigqueueinfo sys_rt_sigqueueinfo
|
|
|
|
357 common rt_sigsuspend sys_rt_sigsuspend
|
|
|
|
358 common select sys_select
|
|
|
|
359 common gettimeofday sys_gettimeofday
|
|
|
|
360 common settimeofday sys_settimeofday
|
|
|
|
361 common getitimer sys_getitimer
|
|
|
|
362 common setitimer sys_setitimer
|
|
|
|
363 common utimes sys_utimes
|
|
|
|
364 common getrusage sys_getrusage
|
|
|
|
365 common wait4 sys_wait4
|
|
|
|
366 common adjtimex sys_adjtimex
|
|
|
|
367 common getcwd sys_getcwd
|
|
|
|
368 common capget sys_capget
|
|
|
|
369 common capset sys_capset
|
|
|
|
370 common sendfile sys_sendfile64
|
|
|
|
371 common setresgid sys_setresgid
|
|
|
|
372 common getresgid sys_getresgid
|
|
|
|
373 common dipc sys_ni_syscall
|
|
|
|
374 common pivot_root sys_pivot_root
|
|
|
|
375 common mincore sys_mincore
|
|
|
|
376 common pciconfig_iobase sys_pciconfig_iobase
|
|
|
|
377 common getdents64 sys_getdents64
|
|
|
|
378 common gettid sys_gettid
|
|
|
|
379 common readahead sys_readahead
|
|
|
|
# 380 is unused
|
|
|
|
381 common tkill sys_tkill
|
|
|
|
382 common setxattr sys_setxattr
|
|
|
|
383 common lsetxattr sys_lsetxattr
|
|
|
|
384 common fsetxattr sys_fsetxattr
|
|
|
|
385 common getxattr sys_getxattr
|
|
|
|
386 common lgetxattr sys_lgetxattr
|
|
|
|
387 common fgetxattr sys_fgetxattr
|
|
|
|
388 common listxattr sys_listxattr
|
|
|
|
389 common llistxattr sys_llistxattr
|
|
|
|
390 common flistxattr sys_flistxattr
|
|
|
|
391 common removexattr sys_removexattr
|
|
|
|
392 common lremovexattr sys_lremovexattr
|
|
|
|
393 common fremovexattr sys_fremovexattr
|
|
|
|
394 common futex sys_futex
|
|
|
|
395 common sched_setaffinity sys_sched_setaffinity
|
|
|
|
396 common sched_getaffinity sys_sched_getaffinity
|
|
|
|
397 common tuxcall sys_ni_syscall
|
|
|
|
398 common io_setup sys_io_setup
|
|
|
|
399 common io_destroy sys_io_destroy
|
|
|
|
400 common io_getevents sys_io_getevents
|
|
|
|
401 common io_submit sys_io_submit
|
|
|
|
402 common io_cancel sys_io_cancel
|
|
|
|
405 common exit_group sys_exit_group
|
|
|
|
406 common lookup_dcookie sys_lookup_dcookie
|
|
|
|
407 common epoll_create sys_epoll_create
|
|
|
|
408 common epoll_ctl sys_epoll_ctl
|
|
|
|
409 common epoll_wait sys_epoll_wait
|
|
|
|
410 common remap_file_pages sys_remap_file_pages
|
|
|
|
411 common set_tid_address sys_set_tid_address
|
|
|
|
412 common restart_syscall sys_restart_syscall
|
|
|
|
413 common fadvise64 sys_fadvise64
|
|
|
|
414 common timer_create sys_timer_create
|
|
|
|
415 common timer_settime sys_timer_settime
|
|
|
|
416 common timer_gettime sys_timer_gettime
|
|
|
|
417 common timer_getoverrun sys_timer_getoverrun
|
|
|
|
418 common timer_delete sys_timer_delete
|
|
|
|
419 common clock_settime sys_clock_settime
|
|
|
|
420 common clock_gettime sys_clock_gettime
|
|
|
|
421 common clock_getres sys_clock_getres
|
|
|
|
422 common clock_nanosleep sys_clock_nanosleep
|
|
|
|
423 common semtimedop sys_semtimedop
|
|
|
|
424 common tgkill sys_tgkill
|
|
|
|
425 common stat64 sys_stat64
|
|
|
|
426 common lstat64 sys_lstat64
|
|
|
|
427 common fstat64 sys_fstat64
|
|
|
|
428 common vserver sys_ni_syscall
|
|
|
|
429 common mbind sys_ni_syscall
|
|
|
|
430 common get_mempolicy sys_ni_syscall
|
|
|
|
431 common set_mempolicy sys_ni_syscall
|
|
|
|
432 common mq_open sys_mq_open
|
|
|
|
433 common mq_unlink sys_mq_unlink
|
|
|
|
434 common mq_timedsend sys_mq_timedsend
|
|
|
|
435 common mq_timedreceive sys_mq_timedreceive
|
|
|
|
436 common mq_notify sys_mq_notify
|
|
|
|
437 common mq_getsetattr sys_mq_getsetattr
|
|
|
|
438 common waitid sys_waitid
|
|
|
|
439 common add_key sys_add_key
|
|
|
|
440 common request_key sys_request_key
|
|
|
|
441 common keyctl sys_keyctl
|
|
|
|
442 common ioprio_set sys_ioprio_set
|
|
|
|
443 common ioprio_get sys_ioprio_get
|
|
|
|
444 common inotify_init sys_inotify_init
|
|
|
|
445 common inotify_add_watch sys_inotify_add_watch
|
|
|
|
446 common inotify_rm_watch sys_inotify_rm_watch
|
|
|
|
447 common fdatasync sys_fdatasync
|
|
|
|
448 common kexec_load sys_kexec_load
|
|
|
|
449 common migrate_pages sys_migrate_pages
|
|
|
|
450 common openat sys_openat
|
|
|
|
451 common mkdirat sys_mkdirat
|
|
|
|
452 common mknodat sys_mknodat
|
|
|
|
453 common fchownat sys_fchownat
|
|
|
|
454 common futimesat sys_futimesat
|
|
|
|
455 common fstatat64 sys_fstatat64
|
|
|
|
456 common unlinkat sys_unlinkat
|
|
|
|
457 common renameat sys_renameat
|
|
|
|
458 common linkat sys_linkat
|
|
|
|
459 common symlinkat sys_symlinkat
|
|
|
|
460 common readlinkat sys_readlinkat
|
|
|
|
461 common fchmodat sys_fchmodat
|
|
|
|
462 common faccessat sys_faccessat
|
|
|
|
463 common pselect6 sys_pselect6
|
|
|
|
464 common ppoll sys_ppoll
|
|
|
|
465 common unshare sys_unshare
|
|
|
|
466 common set_robust_list sys_set_robust_list
|
|
|
|
467 common get_robust_list sys_get_robust_list
|
|
|
|
468 common splice sys_splice
|
|
|
|
469 common sync_file_range sys_sync_file_range
|
|
|
|
470 common tee sys_tee
|
|
|
|
471 common vmsplice sys_vmsplice
|
|
|
|
472 common move_pages sys_move_pages
|
|
|
|
473 common getcpu sys_getcpu
|
|
|
|
474 common epoll_pwait sys_epoll_pwait
|
|
|
|
475 common utimensat sys_utimensat
|
|
|
|
476 common signalfd sys_signalfd
|
|
|
|
477 common timerfd sys_ni_syscall
|
|
|
|
478 common eventfd sys_eventfd
|
|
|
|
479 common recvmmsg sys_recvmmsg
|
|
|
|
480 common fallocate sys_fallocate
|
|
|
|
481 common timerfd_create sys_timerfd_create
|
|
|
|
482 common timerfd_settime sys_timerfd_settime
|
|
|
|
483 common timerfd_gettime sys_timerfd_gettime
|
|
|
|
484 common signalfd4 sys_signalfd4
|
|
|
|
485 common eventfd2 sys_eventfd2
|
|
|
|
486 common epoll_create1 sys_epoll_create1
|
|
|
|
487 common dup3 sys_dup3
|
|
|
|
488 common pipe2 sys_pipe2
|
|
|
|
489 common inotify_init1 sys_inotify_init1
|
|
|
|
490 common preadv sys_preadv
|
|
|
|
491 common pwritev sys_pwritev
|
|
|
|
492 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
|
|
|
|
493 common perf_event_open sys_perf_event_open
|
|
|
|
494 common fanotify_init sys_fanotify_init
|
|
|
|
495 common fanotify_mark sys_fanotify_mark
|
|
|
|
496 common prlimit64 sys_prlimit64
|
|
|
|
497 common name_to_handle_at sys_name_to_handle_at
|
|
|
|
498 common open_by_handle_at sys_open_by_handle_at
|
|
|
|
499 common clock_adjtime sys_clock_adjtime
|
|
|
|
500 common syncfs sys_syncfs
|
|
|
|
501 common setns sys_setns
|
|
|
|
502 common accept4 sys_accept4
|
|
|
|
503 common sendmmsg sys_sendmmsg
|
|
|
|
504 common process_vm_readv sys_process_vm_readv
|
|
|
|
505 common process_vm_writev sys_process_vm_writev
|
|
|
|
506 common kcmp sys_kcmp
|
|
|
|
507 common finit_module sys_finit_module
|
|
|
|
508 common sched_setattr sys_sched_setattr
|
|
|
|
509 common sched_getattr sys_sched_getattr
|
|
|
|
510 common renameat2 sys_renameat2
|
|
|
|
511 common getrandom sys_getrandom
|
|
|
|
512 common memfd_create sys_memfd_create
|
|
|
|
513 common execveat sys_execveat
|
|
|
|
514 common seccomp sys_seccomp
|
|
|
|
515 common bpf sys_bpf
|
|
|
|
516 common userfaultfd sys_userfaultfd
|
|
|
|
517 common membarrier sys_membarrier
|
|
|
|
518 common mlock2 sys_mlock2
|
|
|
|
519 common copy_file_range sys_copy_file_range
|
|
|
|
520 common preadv2 sys_preadv2
|
|
|
|
521 common pwritev2 sys_pwritev2
|
|
|
|
522 common statx sys_statx
|
2019-01-03 21:10:26 +00:00
|
|
|
523 common io_pgetevents sys_io_pgetevents
|
2018-12-31 22:12:32 +00:00
|
|
|
524 common pkey_mprotect sys_pkey_mprotect
|
|
|
|
525 common pkey_alloc sys_pkey_alloc
|
|
|
|
526 common pkey_free sys_pkey_free
|
|
|
|
527 common rseq sys_rseq
|
2019-01-11 12:57:12 +00:00
|
|
|
528 common statfs64 sys_statfs64
|
|
|
|
529 common fstatfs64 sys_fstatfs64
|
alpha: add generic get{eg,eu,g,p,u,pp}id() syscalls
Alpha has traditionally followed the OSF1 calling conventions
here, with its getxpid, getxuid, getxgid system calls returning
two different values in separate registers.
Following what glibc has done here, we can define getpid,
getuid and getgid to be aliases for getxpid, getxuid and getxgid
respectively, and add new system call numbers for getppid, geteuid
and getegid.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2019-01-11 14:09:11 +00:00
|
|
|
530 common getegid sys_getegid
|
|
|
|
531 common geteuid sys_geteuid
|
|
|
|
532 common getppid sys_getppid
|
2019-01-10 11:45:11 +00:00
|
|
|
# all other architectures have common numbers for new syscall, alpha
|
|
|
|
# is the exception.
|
2019-02-28 12:59:19 +00:00
|
|
|
534 common pidfd_send_signal sys_pidfd_send_signal
|
|
|
|
535 common io_uring_setup sys_io_uring_setup
|
|
|
|
536 common io_uring_enter sys_io_uring_enter
|
|
|
|
537 common io_uring_register sys_io_uring_register
|
2019-05-16 11:52:34 +00:00
|
|
|
538 common open_tree sys_open_tree
|
|
|
|
539 common move_mount sys_move_mount
|
|
|
|
540 common fsopen sys_fsopen
|
|
|
|
541 common fsconfig sys_fsconfig
|
|
|
|
542 common fsmount sys_fsmount
|
|
|
|
543 common fspick sys_fspick
|
2019-05-24 10:44:59 +00:00
|
|
|
544 common pidfd_open sys_pidfd_open
|
2019-07-14 19:22:04 +00:00
|
|
|
# 545 reserved for clone3
|
2019-05-24 09:31:44 +00:00
|
|
|
546 common close_range sys_close_range
|
open: introduce openat2(2) syscall
/* Background. */
For a very long time, extending openat(2) with new features has been
incredibly frustrating. This stems from the fact that openat(2) is
possibly the most famous counter-example to the mantra "don't silently
accept garbage from userspace" -- it doesn't check whether unknown flags
are present[1].
This means that (generally) the addition of new flags to openat(2) has
been fraught with backwards-compatibility issues (O_TMPFILE has to be
defined as __O_TMPFILE|O_DIRECTORY|[O_RDWR or O_WRONLY] to ensure old
kernels gave errors, since it's insecure to silently ignore the
flag[2]). All new security-related flags therefore have a tough road to
being added to openat(2).
Userspace also has a hard time figuring out whether a particular flag is
supported on a particular kernel. While it is now possible with
contemporary kernels (thanks to [3]), older kernels will expose unknown
flag bits through fcntl(F_GETFL). Giving a clear -EINVAL during
openat(2) time matches modern syscall designs and is far more
fool-proof.
In addition, the newly-added path resolution restriction LOOKUP flags
(which we would like to expose to user-space) don't feel related to the
pre-existing O_* flag set -- they affect all components of path lookup.
We'd therefore like to add a new flag argument.
Adding a new syscall allows us to finally fix the flag-ignoring problem,
and we can make it extensible enough so that we will hopefully never
need an openat3(2).
/* Syscall Prototype. */
/*
* open_how is an extensible structure (similar in interface to
* clone3(2) or sched_setattr(2)). The size parameter must be set to
* sizeof(struct open_how), to allow for future extensions. All future
* extensions will be appended to open_how, with their zero value
* acting as a no-op default.
*/
struct open_how { /* ... */ };
int openat2(int dfd, const char *pathname,
struct open_how *how, size_t size);
/* Description. */
The initial version of 'struct open_how' contains the following fields:
flags
Used to specify openat(2)-style flags. However, any unknown flag
bits or otherwise incorrect flag combinations (like O_PATH|O_RDWR)
will result in -EINVAL. In addition, this field is 64-bits wide to
allow for more O_ flags than currently permitted with openat(2).
mode
The file mode for O_CREAT or O_TMPFILE.
Must be set to zero if flags does not contain O_CREAT or O_TMPFILE.
resolve
Restrict path resolution (in contrast to O_* flags they affect all
path components). The current set of flags are as follows (at the
moment, all of the RESOLVE_ flags are implemented as just passing
the corresponding LOOKUP_ flag).
RESOLVE_NO_XDEV => LOOKUP_NO_XDEV
RESOLVE_NO_SYMLINKS => LOOKUP_NO_SYMLINKS
RESOLVE_NO_MAGICLINKS => LOOKUP_NO_MAGICLINKS
RESOLVE_BENEATH => LOOKUP_BENEATH
RESOLVE_IN_ROOT => LOOKUP_IN_ROOT
open_how does not contain an embedded size field, because it is of
little benefit (userspace can figure out the kernel open_how size at
runtime fairly easily without it). It also only contains u64s (even
though ->mode arguably should be a u16) to avoid having padding fields
which are never used in the future.
Note that as a result of the new how->flags handling, O_PATH|O_TMPFILE
is no longer permitted for openat(2). As far as I can tell, this has
always been a bug and appears to not be used by userspace (and I've not
seen any problems on my machines by disallowing it). If it turns out
this breaks something, we can special-case it and only permit it for
openat(2) but not openat2(2).
After input from Florian Weimer, the new open_how and flag definitions
are inside a separate header from uapi/linux/fcntl.h, to avoid problems
that glibc has with importing that header.
/* Testing. */
In a follow-up patch there are over 200 selftests which ensure that this
syscall has the correct semantics and will correctly handle several
attack scenarios.
In addition, I've written a userspace library[4] which provides
convenient wrappers around openat2(RESOLVE_IN_ROOT) (this is necessary
because no other syscalls support RESOLVE_IN_ROOT, and thus lots of care
must be taken when using RESOLVE_IN_ROOT'd file descriptors with other
syscalls). During the development of this patch, I've run numerous
verification tests using libpathrs (showing that the API is reasonably
usable by userspace).
/* Future Work. */
Additional RESOLVE_ flags have been suggested during the review period.
These can be easily implemented separately (such as blocking auto-mount
during resolution).
Furthermore, there are some other proposed changes to the openat(2)
interface (the most obvious example is magic-link hardening[5]) which
would be a good opportunity to add a way for userspace to restrict how
O_PATH file descriptors can be re-opened.
Another possible avenue of future work would be some kind of
CHECK_FIELDS[6] flag which causes the kernel to indicate to userspace
which openat2(2) flags and fields are supported by the current kernel
(to avoid userspace having to go through several guesses to figure it
out).
[1]: https://lwn.net/Articles/588444/
[2]: https://lore.kernel.org/lkml/CA+55aFyyxJL1LyXZeBsf2ypriraj5ut1XkNDsunRBqgVjZU_6Q@mail.gmail.com
[3]: commit 629e014bb834 ("fs: completely ignore unknown open flags")
[4]: https://sourceware.org/bugzilla/show_bug.cgi?id=17523
[5]: https://lore.kernel.org/lkml/20190930183316.10190-2-cyphar@cyphar.com/
[6]: https://youtu.be/ggD-eb3yPVs
Suggested-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2020-01-18 12:07:59 +00:00
|
|
|
547 common openat2 sys_openat2
|
2020-01-07 17:59:26 +00:00
|
|
|
548 common pidfd_getfd sys_pidfd_getfd
|
2020-05-14 14:44:25 +00:00
|
|
|
549 common faccessat2 sys_faccessat2
|
mm/madvise: introduce process_madvise() syscall: an external memory hinting API
There is usecase that System Management Software(SMS) want to give a
memory hint like MADV_[COLD|PAGEEOUT] to other processes and in the
case of Android, it is the ActivityManagerService.
The information required to make the reclaim decision is not known to the
app. Instead, it is known to the centralized userspace
daemon(ActivityManagerService), and that daemon must be able to initiate
reclaim on its own without any app involvement.
To solve the issue, this patch introduces a new syscall
process_madvise(2). It uses pidfd of an external process to give the
hint. It also supports vector address range because Android app has
thousands of vmas due to zygote so it's totally waste of CPU and power if
we should call the syscall one by one for each vma.(With testing 2000-vma
syscall vs 1-vector syscall, it showed 15% performance improvement. I
think it would be bigger in real practice because the testing ran very
cache friendly environment).
Another potential use case for the vector range is to amortize the cost
ofTLB shootdowns for multiple ranges when using MADV_DONTNEED; this could
benefit users like TCP receive zerocopy and malloc implementations. In
future, we could find more usecases for other advises so let's make it
happens as API since we introduce a new syscall at this moment. With
that, existing madvise(2) user could replace it with process_madvise(2)
with their own pid if they want to have batch address ranges support
feature.
ince it could affect other process's address range, only privileged
process(PTRACE_MODE_ATTACH_FSCREDS) or something else(e.g., being the same
UID) gives it the right to ptrace the process could use it successfully.
The flag argument is reserved for future use if we need to extend the API.
I think supporting all hints madvise has/will supported/support to
process_madvise is rather risky. Because we are not sure all hints make
sense from external process and implementation for the hint may rely on
the caller being in the current context so it could be error-prone. Thus,
I just limited hints as MADV_[COLD|PAGEOUT] in this patch.
If someone want to add other hints, we could hear the usecase and review
it for each hint. It's safer for maintenance rather than introducing a
buggy syscall but hard to fix it later.
So finally, the API is as follows,
ssize_t process_madvise(int pidfd, const struct iovec *iovec,
unsigned long vlen, int advice, unsigned int flags);
DESCRIPTION
The process_madvise() system call is used to give advice or directions
to the kernel about the address ranges from external process as well as
local process. It provides the advice to address ranges of process
described by iovec and vlen. The goal of such advice is to improve
system or application performance.
The pidfd selects the process referred to by the PID file descriptor
specified in pidfd. (See pidofd_open(2) for further information)
The pointer iovec points to an array of iovec structures, defined in
<sys/uio.h> as:
struct iovec {
void *iov_base; /* starting address */
size_t iov_len; /* number of bytes to be advised */
};
The iovec describes address ranges beginning at address(iov_base)
and with size length of bytes(iov_len).
The vlen represents the number of elements in iovec.
The advice is indicated in the advice argument, which is one of the
following at this moment if the target process specified by pidfd is
external.
MADV_COLD
MADV_PAGEOUT
Permission to provide a hint to external process is governed by a
ptrace access mode PTRACE_MODE_ATTACH_FSCREDS check; see ptrace(2).
The process_madvise supports every advice madvise(2) has if target
process is in same thread group with calling process so user could
use process_madvise(2) to extend existing madvise(2) to support
vector address ranges.
RETURN VALUE
On success, process_madvise() returns the number of bytes advised.
This return value may be less than the total number of requested
bytes, if an error occurred. The caller should check return value
to determine whether a partial advice occurred.
FAQ:
Q.1 - Why does any external entity have better knowledge?
Quote from Sandeep
"For Android, every application (including the special SystemServer)
are forked from Zygote. The reason of course is to share as many
libraries and classes between the two as possible to benefit from the
preloading during boot.
After applications start, (almost) all of the APIs end up calling into
this SystemServer process over IPC (binder) and back to the
application.
In a fully running system, the SystemServer monitors every single
process periodically to calculate their PSS / RSS and also decides
which process is "important" to the user for interactivity.
So, because of how these processes start _and_ the fact that the
SystemServer is looping to monitor each process, it does tend to *know*
which address range of the application is not used / useful.
Besides, we can never rely on applications to clean things up
themselves. We've had the "hey app1, the system is low on memory,
please trim your memory usage down" notifications for a long time[1].
They rely on applications honoring the broadcasts and very few do.
So, if we want to avoid the inevitable killing of the application and
restarting it, some way to be able to tell the OS about unimportant
memory in these applications will be useful.
- ssp
Q.2 - How to guarantee the race(i.e., object validation) between when
giving a hint from an external process and get the hint from the target
process?
process_madvise operates on the target process's address space as it
exists at the instant that process_madvise is called. If the space
target process can run between the time the process_madvise process
inspects the target process address space and the time that
process_madvise is actually called, process_madvise may operate on
memory regions that the calling process does not expect. It's the
responsibility of the process calling process_madvise to close this
race condition. For example, the calling process can suspend the
target process with ptrace, SIGSTOP, or the freezer cgroup so that it
doesn't have an opportunity to change its own address space before
process_madvise is called. Another option is to operate on memory
regions that the caller knows a priori will be unchanged in the target
process. Yet another option is to accept the race for certain
process_madvise calls after reasoning that mistargeting will do no
harm. The suggested API itself does not provide synchronization. It
also apply other APIs like move_pages, process_vm_write.
The race isn't really a problem though. Why is it so wrong to require
that callers do their own synchronization in some manner? Nobody
objects to write(2) merely because it's possible for two processes to
open the same file and clobber each other's writes --- instead, we tell
people to use flock or something. Think about mmap. It never
guarantees newly allocated address space is still valid when the user
tries to access it because other threads could unmap the memory right
before. That's where we need synchronization by using other API or
design from userside. It shouldn't be part of API itself. If someone
needs more fine-grained synchronization rather than process level,
there were two ideas suggested - cookie[2] and anon-fd[3]. Both are
applicable via using last reserved argument of the API but I don't
think it's necessary right now since we have already ways to prevent
the race so don't want to add additional complexity with more
fine-grained optimization model.
To make the API extend, it reserved an unsigned long as last argument
so we could support it in future if someone really needs it.
Q.3 - Why doesn't ptrace work?
Injecting an madvise in the target process using ptrace would not work
for us because such injected madvise would have to be executed by the
target process, which means that process would have to be runnable and
that creates the risk of the abovementioned race and hinting a wrong
VMA. Furthermore, we want to act the hint in caller's context, not the
callee's, because the callee is usually limited in cpuset/cgroups or
even freezed state so they can't act by themselves quick enough, which
causes more thrashing/kill. It doesn't work if the target process are
ptraced(e.g., strace, debugger, minidump) because a process can have at
most one ptracer.
[1] https://developer.android.com/topic/performance/memory"
[2] process_getinfo for getting the cookie which is updated whenever
vma of process address layout are changed - Daniel Colascione -
https://lore.kernel.org/lkml/20190520035254.57579-1-minchan@kernel.org/T/#m7694416fd179b2066a2c62b5b139b14e3894e224
[3] anonymous fd which is used for the object(i.e., address range)
validation - Michal Hocko -
https://lore.kernel.org/lkml/20200120112722.GY18451@dhcp22.suse.cz/
[minchan@kernel.org: fix process_madvise build break for arm64]
Link: http://lkml.kernel.org/r/20200303145756.GA219683@google.com
[minchan@kernel.org: fix build error for mips of process_madvise]
Link: http://lkml.kernel.org/r/20200508052517.GA197378@google.com
[akpm@linux-foundation.org: fix patch ordering issue]
[akpm@linux-foundation.org: fix arm64 whoops]
[minchan@kernel.org: make process_madvise() vlen arg have type size_t, per Florian]
[akpm@linux-foundation.org: fix i386 build]
[sfr@canb.auug.org.au: fix syscall numbering]
Link: https://lkml.kernel.org/r/20200905142639.49fc3f1a@canb.auug.org.au
[sfr@canb.auug.org.au: madvise.c needs compat.h]
Link: https://lkml.kernel.org/r/20200908204547.285646b4@canb.auug.org.au
[minchan@kernel.org: fix mips build]
Link: https://lkml.kernel.org/r/20200909173655.GC2435453@google.com
[yuehaibing@huawei.com: remove duplicate header which is included twice]
Link: https://lkml.kernel.org/r/20200915121550.30584-1-yuehaibing@huawei.com
[minchan@kernel.org: do not use helper functions for process_madvise]
Link: https://lkml.kernel.org/r/20200921175539.GB387368@google.com
[akpm@linux-foundation.org: pidfd_get_pid() gained an argument]
[sfr@canb.auug.org.au: fix up for "iov_iter: transparently handle compat iovecs in import_iovec"]
Link: https://lkml.kernel.org/r/20200928212542.468e1fef@canb.auug.org.au
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: Christian Brauner <christian@brauner.io>
Cc: Daniel Colascione <dancol@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Dias <joaodias@google.com>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oleksandr Natalenko <oleksandr@redhat.com>
Cc: Sandeep Patil <sspatil@google.com>
Cc: SeongJae Park <sj38.park@gmail.com>
Cc: SeongJae Park <sjpark@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Sonny Rao <sonnyrao@google.com>
Cc: Tim Murray <timmurray@google.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Florian Weimer <fw@deneb.enyo.de>
Cc: <linux-man@vger.kernel.org>
Link: http://lkml.kernel.org/r/20200302193630.68771-3-minchan@kernel.org
Link: http://lkml.kernel.org/r/20200508183320.GA125527@google.com
Link: http://lkml.kernel.org/r/20200622192900.22757-4-minchan@kernel.org
Link: https://lkml.kernel.org/r/20200901000633.1920247-4-minchan@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-10-17 23:14:59 +00:00
|
|
|
550 common process_madvise sys_process_madvise
|
2020-12-18 22:05:41 +00:00
|
|
|
551 common epoll_pwait2 sys_epoll_pwait2
|
fs: add mount_setattr()
This implements the missing mount_setattr() syscall. While the new mount
api allows to change the properties of a superblock there is currently
no way to change the properties of a mount or a mount tree using file
descriptors which the new mount api is based on. In addition the old
mount api has the restriction that mount options cannot be applied
recursively. This hasn't changed since changing mount options on a
per-mount basis was implemented in [1] and has been a frequent request
not just for convenience but also for security reasons. The legacy
mount syscall is unable to accommodate this behavior without introducing
a whole new set of flags because MS_REC | MS_REMOUNT | MS_BIND |
MS_RDONLY | MS_NOEXEC | [...] only apply the mount option to the topmost
mount. Changing MS_REC to apply to the whole mount tree would mean
introducing a significant uapi change and would likely cause significant
regressions.
The new mount_setattr() syscall allows to recursively clear and set
mount options in one shot. Multiple calls to change mount options
requesting the same changes are idempotent:
int mount_setattr(int dfd, const char *path, unsigned flags,
struct mount_attr *uattr, size_t usize);
Flags to modify path resolution behavior are specified in the @flags
argument. Currently, AT_EMPTY_PATH, AT_RECURSIVE, AT_SYMLINK_NOFOLLOW,
and AT_NO_AUTOMOUNT are supported. If useful, additional lookup flags to
restrict path resolution as introduced with openat2() might be supported
in the future.
The mount_setattr() syscall can be expected to grow over time and is
designed with extensibility in mind. It follows the extensible syscall
pattern we have used with other syscalls such as openat2(), clone3(),
sched_{set,get}attr(), and others.
The set of mount options is passed in the uapi struct mount_attr which
currently has the following layout:
struct mount_attr {
__u64 attr_set;
__u64 attr_clr;
__u64 propagation;
__u64 userns_fd;
};
The @attr_set and @attr_clr members are used to clear and set mount
options. This way a user can e.g. request that a set of flags is to be
raised such as turning mounts readonly by raising MOUNT_ATTR_RDONLY in
@attr_set while at the same time requesting that another set of flags is
to be lowered such as removing noexec from a mount tree by specifying
MOUNT_ATTR_NOEXEC in @attr_clr.
Note, since the MOUNT_ATTR_<atime> values are an enum starting from 0,
not a bitmap, users wanting to transition to a different atime setting
cannot simply specify the atime setting in @attr_set, but must also
specify MOUNT_ATTR__ATIME in the @attr_clr field. So we ensure that
MOUNT_ATTR__ATIME can't be partially set in @attr_clr and that @attr_set
can't have any atime bits set if MOUNT_ATTR__ATIME isn't set in
@attr_clr.
The @propagation field lets callers specify the propagation type of a
mount tree. Propagation is a single property that has four different
settings and as such is not really a flag argument but an enum.
Specifically, it would be unclear what setting and clearing propagation
settings in combination would amount to. The legacy mount() syscall thus
forbids the combination of multiple propagation settings too. The goal
is to keep the semantics of mount propagation somewhat simple as they
are overly complex as it is.
The @userns_fd field lets user specify a user namespace whose idmapping
becomes the idmapping of the mount. This is implemented and explained in
detail in the next patch.
[1]: commit 2e4b7fcd9260 ("[PATCH] r/o bind mounts: honor mount writer counts at remount")
Link: https://lore.kernel.org/r/20210121131959.646623-35-christian.brauner@ubuntu.com
Cc: David Howells <dhowells@redhat.com>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
2021-01-21 13:19:53 +00:00
|
|
|
552 common mount_setattr sys_mount_setattr
|
2021-05-31 16:42:58 +00:00
|
|
|
553 common quotactl_fd sys_quotactl_fd
|
2021-04-22 15:41:19 +00:00
|
|
|
554 common landlock_create_ruleset sys_landlock_create_ruleset
|
|
|
|
555 common landlock_add_rule sys_landlock_add_rule
|
|
|
|
556 common landlock_restrict_self sys_landlock_restrict_self
|
2021-09-02 22:00:33 +00:00
|
|
|
# 557 reserved for memfd_secret
|
|
|
|
558 common process_mrelease sys_process_mrelease
|
2021-11-24 13:21:12 +00:00
|
|
|
559 common futex_waitv sys_futex_waitv
|
2022-01-14 22:08:21 +00:00
|
|
|
560 common set_mempolicy_home_node sys_ni_syscall
|