Merge branch 'master' into mm-nonmm-stable

This commit is contained in:
akpm 2022-06-27 10:31:44 -07:00
commit ee56c3e8ee
661 changed files with 8469 additions and 5706 deletions

View File

@ -10,6 +10,8 @@
# Please keep this list dictionary sorted. # Please keep this list dictionary sorted.
# #
Aaron Durbin <adurbin@google.com> Aaron Durbin <adurbin@google.com>
Abel Vesa <abelvesa@kernel.org> <abel.vesa@nxp.com>
Abel Vesa <abelvesa@kernel.org> <abelvesa@gmail.com>
Abhinav Kumar <quic_abhinavk@quicinc.com> <abhinavk@codeaurora.org> Abhinav Kumar <quic_abhinavk@quicinc.com> <abhinavk@codeaurora.org>
Adam Oldham <oldhamca@gmail.com> Adam Oldham <oldhamca@gmail.com>
Adam Radford <aradford@gmail.com> Adam Radford <aradford@gmail.com>
@ -85,6 +87,7 @@ Christian Borntraeger <borntraeger@linux.ibm.com> <borntrae@de.ibm.com>
Christian Brauner <brauner@kernel.org> <christian@brauner.io> Christian Brauner <brauner@kernel.org> <christian@brauner.io>
Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com> Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com>
Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com> Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
Christian Marangi <ansuelsmth@gmail.com>
Christophe Ricard <christophe.ricard@gmail.com> Christophe Ricard <christophe.ricard@gmail.com>
Christoph Hellwig <hch@lst.de> Christoph Hellwig <hch@lst.de>
Colin Ian King <colin.king@intel.com> <colin.king@canonical.com> Colin Ian King <colin.king@intel.com> <colin.king@canonical.com>
@ -165,6 +168,7 @@ Jan Glauber <jan.glauber@gmail.com> <jang@de.ibm.com>
Jan Glauber <jan.glauber@gmail.com> <jang@linux.vnet.ibm.com> Jan Glauber <jan.glauber@gmail.com> <jang@linux.vnet.ibm.com>
Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com> Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com> Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com> Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com> Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com> Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>

View File

@ -1,4 +1,4 @@
What: /sys/bus/iio/devices/iio:deviceX/conversion_mode What: /sys/bus/iio/devices/iio:deviceX/in_conversion_mode
KernelVersion: 4.2 KernelVersion: 4.2
Contact: linux-iio@vger.kernel.org Contact: linux-iio@vger.kernel.org
Description: Description:

View File

@ -526,6 +526,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/srbds /sys/devices/system/cpu/vulnerabilities/srbds
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort /sys/devices/system/cpu/vulnerabilities/tsx_async_abort
/sys/devices/system/cpu/vulnerabilities/itlb_multihit /sys/devices/system/cpu/vulnerabilities/itlb_multihit
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
Date: January 2018 Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities Description: Information about CPU vulnerabilities

View File

@ -17,3 +17,4 @@ are configurable at compile, boot or run time.
special-register-buffer-data-sampling.rst special-register-buffer-data-sampling.rst
core-scheduling.rst core-scheduling.rst
l1d_flush.rst l1d_flush.rst
processor_mmio_stale_data.rst

View File

@ -0,0 +1,246 @@
=========================================
Processor MMIO Stale Data Vulnerabilities
=========================================
Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O
(MMIO) vulnerabilities that can expose data. The sequences of operations for
exposing data range from simple to very complex. Because most of the
vulnerabilities require the attacker to have access to MMIO, many environments
are not affected. System environments using virtualization where MMIO access is
provided to untrusted guests may need mitigation. These vulnerabilities are
not transient execution attacks. However, these vulnerabilities may propagate
stale data into core fill buffers where the data can subsequently be inferred
by an unmitigated transient execution attack. Mitigation for these
vulnerabilities includes a combination of microcode update and software
changes, depending on the platform and usage model. Some of these mitigations
are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or
those used to mitigate Special Register Buffer Data Sampling (SRBDS).
Data Propagators
================
Propagators are operations that result in stale data being copied or moved from
one microarchitectural buffer or register to another. Processor MMIO Stale Data
Vulnerabilities are operations that may result in stale data being directly
read into an architectural, software-visible state or sampled from a buffer or
register.
Fill Buffer Stale Data Propagator (FBSDP)
-----------------------------------------
Stale data may propagate from fill buffers (FB) into the non-coherent portion
of the uncore on some non-coherent writes. Fill buffer propagation by itself
does not make stale data architecturally visible. Stale data must be propagated
to a location where it is subject to reading or sampling.
Sideband Stale Data Propagator (SSDP)
-------------------------------------
The sideband stale data propagator (SSDP) is limited to the client (including
Intel Xeon server E3) uncore implementation. The sideband response buffer is
shared by all client cores. For non-coherent reads that go to sideband
destinations, the uncore logic returns 64 bytes of data to the core, including
both requested data and unrequested stale data, from a transaction buffer and
the sideband response buffer. As a result, stale data from the sideband
response and transaction buffers may now reside in a core fill buffer.
Primary Stale Data Propagator (PSDP)
------------------------------------
The primary stale data propagator (PSDP) is limited to the client (including
Intel Xeon server E3) uncore implementation. Similar to the sideband response
buffer, the primary response buffer is shared by all client cores. For some
processors, MMIO primary reads will return 64 bytes of data to the core fill
buffer including both requested data and unrequested stale data. This is
similar to the sideband stale data propagator.
Vulnerabilities
===============
Device Register Partial Write (DRPW) (CVE-2022-21166)
-----------------------------------------------------
Some endpoint MMIO registers incorrectly handle writes that are smaller than
the register size. Instead of aborting the write or only copying the correct
subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than
specified by the write transaction may be written to the register. On
processors affected by FBSDP, this may expose stale data from the fill buffers
of the core that created the write transaction.
Shared Buffers Data Sampling (SBDS) (CVE-2022-21125)
----------------------------------------------------
After propagators may have moved data around the uncore and copied stale data
into client core fill buffers, processors affected by MFBDS can leak data from
the fill buffer. It is limited to the client (including Intel Xeon server E3)
uncore implementation.
Shared Buffers Data Read (SBDR) (CVE-2022-21123)
------------------------------------------------
It is similar to Shared Buffer Data Sampling (SBDS) except that the data is
directly read into the architectural software-visible state. It is limited to
the client (including Intel Xeon server E3) uncore implementation.
Affected Processors
===================
Not all the CPUs are affected by all the variants. For instance, most
processors for the server market (excluding Intel Xeon E3 processors) are
impacted by only Device Register Partial Write (DRPW).
Below is the list of affected Intel processors [#f1]_:
=================== ============ =========
Common name Family_Model Steppings
=================== ============ =========
HASWELL_X 06_3FH 2,4
SKYLAKE_L 06_4EH 3
BROADWELL_X 06_4FH All
SKYLAKE_X 06_55H 3,4,6,7,11
BROADWELL_D 06_56H 3,4,5
SKYLAKE 06_5EH 3
ICELAKE_X 06_6AH 4,5,6
ICELAKE_D 06_6CH 1
ICELAKE_L 06_7EH 5
ATOM_TREMONT_D 06_86H All
LAKEFIELD 06_8AH 1
KABYLAKE_L 06_8EH 9 to 12
ATOM_TREMONT 06_96H 1
ATOM_TREMONT_L 06_9CH 0
KABYLAKE 06_9EH 9 to 13
COMETLAKE 06_A5H 2,3,5
COMETLAKE_L 06_A6H 0,1
ROCKETLAKE 06_A7H 1
=================== ============ =========
If a CPU is in the affected processor list, but not affected by a variant, it
is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later
section, mitigation largely remains the same for all the variants, i.e. to
clear the CPU fill buffers via VERW instruction.
New bits in MSRs
================
Newer processors and microcode update on existing affected processors added new
bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate
specific variants of Processor MMIO Stale Data vulnerabilities and mitigation
capability.
MSR IA32_ARCH_CAPABILITIES
--------------------------
Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the
Shared Buffers Data Read (SBDR) vulnerability or the sideband stale
data propagator (SSDP).
Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer
Stale Data Propagator (FBSDP).
Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data
Propagator (PSDP).
Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer
values as part of MD_CLEAR operations. Processors that do not
enumerate MDS_NO (meaning they are affected by MDS) but that do
enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate
FB_CLEAR as part of their MD_CLEAR support.
Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR
IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS
bit can be set to cause the VERW instruction to not perform the
FB_CLEAR action. Not all processors that support FB_CLEAR will support
FB_CLEAR_CTRL.
MSR IA32_MCU_OPT_CTRL
---------------------
Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR
action. This may be useful to reduce the performance impact of FB_CLEAR in
cases where system software deems it warranted (for example, when performance
is more critical, or the untrusted software has no MMIO access). Note that
FB_CLEAR_DIS has no impact on enumeration (for example, it does not change
FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors
that enumerate FB_CLEAR.
Mitigation
==========
Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the
same mitigation strategy to force the CPU to clear the affected buffers before
an attacker can extract the secrets.
This is achieved by using the otherwise unused and obsolete VERW instruction in
combination with a microcode update. The microcode clears the affected CPU
buffers when the VERW instruction is executed.
Kernel reuses the MDS function to invoke the buffer clearing:
mds_clear_cpu_buffers()
On MDS affected CPUs, the kernel already invokes CPU buffer clear on
kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
additional mitigation is needed on such CPUs.
For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker
with MMIO capability. Therefore, VERW is not required for kernel/userspace. For
virtualization case, VERW is only needed at VMENTER for a guest with MMIO
capability.
Mitigation points
-----------------
Return to user space
^^^^^^^^^^^^^^^^^^^^
Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation
needed.
C-State transition
^^^^^^^^^^^^^^^^^^
Control register writes by CPU during C-state transition can propagate data
from fill buffer to uncore buffers. Execute VERW before C-state transition to
clear CPU fill buffers.
Guest entry point
^^^^^^^^^^^^^^^^^
Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise
execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by
MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO
Stale Data vulnerabilities, so there is no need to execute VERW for such guests.
Mitigation control on the kernel command line
---------------------------------------------
The kernel command line allows to control the Processor MMIO Stale Data
mitigations at boot time with the option "mmio_stale_data=". The valid
arguments for this option are:
========== =================================================================
full If the CPU is vulnerable, enable mitigation; CPU buffer clearing
on exit to userspace and when entering a VM. Idle transitions are
protected as well. It does not automatically disable SMT.
full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the
complete mitigation.
off Disables mitigation completely.
========== =================================================================
If the CPU is affected and mmio_stale_data=off is not supplied on the kernel
command line, then the kernel selects the appropriate mitigation.
Mitigation status information
-----------------------------
The Linux kernel provides a sysfs interface to enumerate the current
vulnerability status of the system: whether the system is vulnerable, and
which mitigations are active. The relevant sysfs file is:
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
The possible values in this file are:
.. list-table::
* - 'Not affected'
- The processor is not vulnerable
* - 'Vulnerable'
- The processor is vulnerable, but no mitigation enabled
* - 'Vulnerable: Clear CPU buffers attempted, no microcode'
- The processor is vulnerable, but microcode is not updated. The
mitigation is enabled on a best effort basis.
* - 'Mitigation: Clear CPU buffers'
- The processor is vulnerable and the CPU buffer clearing mitigation is
enabled.
If the processor is vulnerable then the following information is appended to
the above information:
======================== ===========================================
'SMT vulnerable' SMT is enabled
'SMT disabled' SMT is disabled
'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
======================== ===========================================
References
----------
.. [#f1] Affected Processors
https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html

View File

@ -2469,7 +2469,6 @@
protected: nVHE-based mode with support for guests whose protected: nVHE-based mode with support for guests whose
state is kept private from the host. state is kept private from the host.
Not valid if the kernel is running in EL2.
Defaults to VHE/nVHE based on hardware support. Setting Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation mode to "protected" will disable kexec and hibernation
@ -3176,6 +3175,7 @@
srbds=off [X86,INTEL] srbds=off [X86,INTEL]
no_entry_flush [PPC] no_entry_flush [PPC]
no_uaccess_flush [PPC] no_uaccess_flush [PPC]
mmio_stale_data=off [X86]
Exceptions: Exceptions:
This does not have any effect on This does not have any effect on
@ -3197,6 +3197,7 @@
Equivalent to: l1tf=flush,nosmt [X86] Equivalent to: l1tf=flush,nosmt [X86]
mds=full,nosmt [X86] mds=full,nosmt [X86]
tsx_async_abort=full,nosmt [X86] tsx_async_abort=full,nosmt [X86]
mmio_stale_data=full,nosmt [X86]
mminit_loglevel= mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
@ -3206,6 +3207,40 @@
log everything. Information is printed at KERN_DEBUG log everything. Information is printed at KERN_DEBUG
so loglevel=8 may also need to be specified. so loglevel=8 may also need to be specified.
mmio_stale_data=
[X86,INTEL] Control mitigation for the Processor
MMIO Stale Data vulnerabilities.
Processor MMIO Stale Data is a class of
vulnerabilities that may expose data after an MMIO
operation. Exposed data could originate or end in
the same CPU buffers as affected by MDS and TAA.
Therefore, similar to MDS and TAA, the mitigation
is to clear the affected CPU buffers.
This parameter controls the mitigation. The
options are:
full - Enable mitigation on vulnerable CPUs
full,nosmt - Enable mitigation and disable SMT on
vulnerable CPUs.
off - Unconditionally disable mitigation
On MDS or TAA affected machines,
mmio_stale_data=off can be prevented by an active
MDS or TAA mitigation as these vulnerabilities are
mitigated with the same mechanism so in order to
disable this mitigation, you need to specify
mds=off and tsx_async_abort=off too.
Not specifying this option is equivalent to
mmio_stale_data=full.
For details see:
Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
module.sig_enforce module.sig_enforce
[KNL] When CONFIG_MODULE_SIG is set, this means that [KNL] When CONFIG_MODULE_SIG is set, this means that
modules without (valid) signatures will fail to load. modules without (valid) signatures will fail to load.

View File

@ -40,9 +40,8 @@ properties:
value to be used for converting remote channel measurements to value to be used for converting remote channel measurements to
temperature. temperature.
$ref: /schemas/types.yaml#/definitions/int32 $ref: /schemas/types.yaml#/definitions/int32
items: minimum: -128
minimum: -128 maximum: 127
maximum: 127
ti,beta-compensation: ti,beta-compensation:
description: description:

View File

@ -30,6 +30,7 @@ properties:
- socionext,uniphier-ld11-aidet - socionext,uniphier-ld11-aidet
- socionext,uniphier-ld20-aidet - socionext,uniphier-ld20-aidet
- socionext,uniphier-pxs3-aidet - socionext,uniphier-pxs3-aidet
- socionext,uniphier-nx1-aidet
reg: reg:
maxItems: 1 maxItems: 1

View File

@ -47,6 +47,5 @@ examples:
clocks = <&clkcfg CLK_SPI0>; clocks = <&clkcfg CLK_SPI0>;
interrupt-parent = <&plic>; interrupt-parent = <&plic>;
interrupts = <54>; interrupts = <54>;
spi-max-frequency = <25000000>;
}; };
... ...

View File

@ -110,7 +110,6 @@ examples:
pinctrl-names = "default"; pinctrl-names = "default";
pinctrl-0 = <&qup_spi1_default>; pinctrl-0 = <&qup_spi1_default>;
interrupts = <GIC_SPI 602 IRQ_TYPE_LEVEL_HIGH>; interrupts = <GIC_SPI 602 IRQ_TYPE_LEVEL_HIGH>;
spi-max-frequency = <50000000>;
#address-cells = <1>; #address-cells = <1>;
#size-cells = <0>; #size-cells = <0>;
}; };

View File

@ -136,7 +136,8 @@ properties:
Phandle of a companion. Phandle of a companion.
phys: phys:
maxItems: 1 minItems: 1
maxItems: 3
phy-names: phy-names:
const: usb const: usb

View File

@ -103,7 +103,8 @@ properties:
Overrides the detected port count Overrides the detected port count
phys: phys:
maxItems: 1 minItems: 1
maxItems: 3
phy-names: phy-names:
const: usb const: usb

View File

@ -6,7 +6,7 @@ This document explains how GPIOs can be assigned to given devices and functions.
Note that it only applies to the new descriptor-based interface. For a Note that it only applies to the new descriptor-based interface. For a
description of the deprecated integer-based GPIO interface please refer to description of the deprecated integer-based GPIO interface please refer to
gpio-legacy.txt (actually, there is no real mapping possible with the old legacy.rst (actually, there is no real mapping possible with the old
interface; you just fetch an integer from somewhere and request the interface; you just fetch an integer from somewhere and request the
corresponding GPIO). corresponding GPIO).

View File

@ -4,7 +4,7 @@ GPIO Descriptor Consumer Interface
This document describes the consumer interface of the GPIO framework. Note that This document describes the consumer interface of the GPIO framework. Note that
it describes the new descriptor-based interface. For a description of the it describes the new descriptor-based interface. For a description of the
deprecated integer-based GPIO interface please refer to gpio-legacy.txt. deprecated integer-based GPIO interface please refer to legacy.rst.
Guidelines for GPIOs consumers Guidelines for GPIOs consumers
@ -78,7 +78,7 @@ whether the line is configured active high or active low (see
The two last flags are used for use cases where open drain is mandatory, such The two last flags are used for use cases where open drain is mandatory, such
as I2C: if the line is not already configured as open drain in the mappings as I2C: if the line is not already configured as open drain in the mappings
(see board.txt), then open drain will be enforced anyway and a warning will be (see board.rst), then open drain will be enforced anyway and a warning will be
printed that the board configuration needs to be updated to match the use case. printed that the board configuration needs to be updated to match the use case.
Both functions return either a valid GPIO descriptor, or an error code checkable Both functions return either a valid GPIO descriptor, or an error code checkable
@ -270,7 +270,7 @@ driven.
The same is applicable for open drain or open source output lines: those do not The same is applicable for open drain or open source output lines: those do not
actively drive their output high (open drain) or low (open source), they just actively drive their output high (open drain) or low (open source), they just
switch their output to a high impedance value. The consumer should not need to switch their output to a high impedance value. The consumer should not need to
care. (For details read about open drain in driver.txt.) care. (For details read about open drain in driver.rst.)
With this, all the gpiod_set_(array)_value_xxx() functions interpret the With this, all the gpiod_set_(array)_value_xxx() functions interpret the
parameter "value" as "asserted" ("1") or "de-asserted" ("0"). The physical line parameter "value" as "asserted" ("1") or "de-asserted" ("0"). The physical line

View File

@ -14,12 +14,12 @@ Due to the history of GPIO interfaces in the kernel, there are two different
ways to obtain and use GPIOs: ways to obtain and use GPIOs:
- The descriptor-based interface is the preferred way to manipulate GPIOs, - The descriptor-based interface is the preferred way to manipulate GPIOs,
and is described by all the files in this directory excepted gpio-legacy.txt. and is described by all the files in this directory excepted legacy.rst.
- The legacy integer-based interface which is considered deprecated (but still - The legacy integer-based interface which is considered deprecated (but still
usable for compatibility reasons) is documented in gpio-legacy.txt. usable for compatibility reasons) is documented in legacy.rst.
The remainder of this document applies to the new descriptor-based interface. The remainder of this document applies to the new descriptor-based interface.
gpio-legacy.txt contains the same information applied to the legacy legacy.rst contains the same information applied to the legacy
integer-based interface. integer-based interface.

View File

@ -19,13 +19,23 @@ The main Btrfs features include:
* Subvolumes (separate internal filesystem roots) * Subvolumes (separate internal filesystem roots)
* Object level mirroring and striping * Object level mirroring and striping
* Checksums on data and metadata (multiple algorithms available) * Checksums on data and metadata (multiple algorithms available)
* Compression * Compression (multiple algorithms available)
* Reflink, deduplication
* Scrub (on-line checksum verification)
* Hierarchical quota groups (subvolume and snapshot support)
* Integrated multiple device support, with several raid algorithms * Integrated multiple device support, with several raid algorithms
* Offline filesystem check * Offline filesystem check
* Efficient incremental backup and FS mirroring * Efficient incremental backup and FS mirroring (send/receive)
* Trim/discard
* Online filesystem defragmentation * Online filesystem defragmentation
* Swapfile support
* Zoned mode
* Read/write metadata verification
* Online resize (shrink, grow)
For more information please refer to the wiki For more information please refer to the documentation site or wiki
https://btrfs.readthedocs.io
https://btrfs.wiki.kernel.org https://btrfs.wiki.kernel.org

View File

@ -13,8 +13,8 @@ disappeared as of Linux 3.0.
There are two places where extended attributes can be found. The first There are two places where extended attributes can be found. The first
place is between the end of each inode entry and the beginning of the place is between the end of each inode entry and the beginning of the
next inode entry. For example, if inode.i\_extra\_isize = 28 and next inode entry. For example, if inode.i_extra_isize = 28 and
sb.inode\_size = 256, then there are 256 - (128 + 28) = 100 bytes sb.inode_size = 256, then there are 256 - (128 + 28) = 100 bytes
available for in-inode extended attribute storage. The second place available for in-inode extended attribute storage. The second place
where extended attributes can be found is in the block pointed to by where extended attributes can be found is in the block pointed to by
``inode.i_file_acl``. As of Linux 3.11, it is not possible for this ``inode.i_file_acl``. As of Linux 3.11, it is not possible for this
@ -38,8 +38,8 @@ Extended attributes, when stored after the inode, have a header
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- h\_magic - h_magic
- Magic number for identification, 0xEA020000. This value is set by the - Magic number for identification, 0xEA020000. This value is set by the
Linux driver, though e2fsprogs doesn't seem to check it(?) Linux driver, though e2fsprogs doesn't seem to check it(?)
@ -55,28 +55,28 @@ The beginning of an extended attribute block is in
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- h\_magic - h_magic
- Magic number for identification, 0xEA020000. - Magic number for identification, 0xEA020000.
* - 0x4 * - 0x4
- \_\_le32 - __le32
- h\_refcount - h_refcount
- Reference count. - Reference count.
* - 0x8 * - 0x8
- \_\_le32 - __le32
- h\_blocks - h_blocks
- Number of disk blocks used. - Number of disk blocks used.
* - 0xC * - 0xC
- \_\_le32 - __le32
- h\_hash - h_hash
- Hash value of all attributes. - Hash value of all attributes.
* - 0x10 * - 0x10
- \_\_le32 - __le32
- h\_checksum - h_checksum
- Checksum of the extended attribute block. - Checksum of the extended attribute block.
* - 0x14 * - 0x14
- \_\_u32 - __u32
- h\_reserved[3] - h_reserved[3]
- Zero. - Zero.
The checksum is calculated against the FS UUID, the 64-bit block number The checksum is calculated against the FS UUID, the 64-bit block number
@ -100,46 +100,46 @@ Attributes stored inside an inode do not need be stored in sorted order.
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_u8 - __u8
- e\_name\_len - e_name_len
- Length of name. - Length of name.
* - 0x1 * - 0x1
- \_\_u8 - __u8
- e\_name\_index - e_name_index
- Attribute name index. There is a discussion of this below. - Attribute name index. There is a discussion of this below.
* - 0x2 * - 0x2
- \_\_le16 - __le16
- e\_value\_offs - e_value_offs
- Location of this attribute's value on the disk block where it is stored. - Location of this attribute's value on the disk block where it is stored.
Multiple attributes can share the same value. For an inode attribute Multiple attributes can share the same value. For an inode attribute
this value is relative to the start of the first entry; for a block this this value is relative to the start of the first entry; for a block this
value is relative to the start of the block (i.e. the header). value is relative to the start of the block (i.e. the header).
* - 0x4 * - 0x4
- \_\_le32 - __le32
- e\_value\_inum - e_value_inum
- The inode where the value is stored. Zero indicates the value is in the - The inode where the value is stored. Zero indicates the value is in the
same block as this entry. This field is only used if the same block as this entry. This field is only used if the
INCOMPAT\_EA\_INODE feature is enabled. INCOMPAT_EA_INODE feature is enabled.
* - 0x8 * - 0x8
- \_\_le32 - __le32
- e\_value\_size - e_value_size
- Length of attribute value. - Length of attribute value.
* - 0xC * - 0xC
- \_\_le32 - __le32
- e\_hash - e_hash
- Hash value of attribute name and attribute value. The kernel doesn't - Hash value of attribute name and attribute value. The kernel doesn't
update the hash for in-inode attributes, so for that case this value update the hash for in-inode attributes, so for that case this value
must be zero, because e2fsck validates any non-zero hash regardless of must be zero, because e2fsck validates any non-zero hash regardless of
where the xattr lives. where the xattr lives.
* - 0x10 * - 0x10
- char - char
- e\_name[e\_name\_len] - e_name[e_name_len]
- Attribute name. Does not include trailing NULL. - Attribute name. Does not include trailing NULL.
Attribute values can follow the end of the entry table. There appears to Attribute values can follow the end of the entry table. There appears to
be a requirement that they be aligned to 4-byte boundaries. The values be a requirement that they be aligned to 4-byte boundaries. The values
are stored starting at the end of the block and grow towards the are stored starting at the end of the block and grow towards the
xattr\_header/xattr\_entry table. When the two collide, the overflow is xattr_header/xattr_entry table. When the two collide, the overflow is
put into a separate disk block. If the disk block fills up, the put into a separate disk block. If the disk block fills up, the
filesystem returns -ENOSPC. filesystem returns -ENOSPC.
@ -167,15 +167,15 @@ the key name. Here is a map of name index values to key prefixes:
* - 1 * - 1
- “user.” - “user.”
* - 2 * - 2
- “system.posix\_acl\_access” - “system.posix_acl_access”
* - 3 * - 3
- “system.posix\_acl\_default” - “system.posix_acl_default”
* - 4 * - 4
- “trusted.” - “trusted.”
* - 6 * - 6
- “security.” - “security.”
* - 7 * - 7
- “system.” (inline\_data only?) - “system.” (inline_data only?)
* - 8 * - 8
- “system.richacl” (SuSE kernels only?) - “system.richacl” (SuSE kernels only?)

View File

@ -23,7 +23,7 @@ means that a block group addresses 32 gigabytes instead of 128 megabytes,
also shrinking the amount of file system overhead for metadata. also shrinking the amount of file system overhead for metadata.
The administrator can set a block cluster size at mkfs time (which is The administrator can set a block cluster size at mkfs time (which is
stored in the s\_log\_cluster\_size field in the superblock); from then stored in the s_log_cluster_size field in the superblock); from then
on, the block bitmaps track clusters, not individual blocks. This means on, the block bitmaps track clusters, not individual blocks. This means
that block groups can be several gigabytes in size (instead of just that block groups can be several gigabytes in size (instead of just
128MiB); however, the minimum allocation unit becomes a cluster, not a 128MiB); however, the minimum allocation unit becomes a cluster, not a

View File

@ -9,15 +9,15 @@ group.
The inode bitmap records which entries in the inode table are in use. The inode bitmap records which entries in the inode table are in use.
As with most bitmaps, one bit represents the usage status of one data As with most bitmaps, one bit represents the usage status of one data
block or inode table entry. This implies a block group size of 8 \* block or inode table entry. This implies a block group size of 8 *
number\_of\_bytes\_in\_a\_logical\_block. number_of_bytes_in_a_logical_block.
NOTE: If ``BLOCK_UNINIT`` is set for a given block group, various parts NOTE: If ``BLOCK_UNINIT`` is set for a given block group, various parts
of the kernel and e2fsprogs code pretends that the block bitmap contains of the kernel and e2fsprogs code pretends that the block bitmap contains
zeros (i.e. all blocks in the group are free). However, it is not zeros (i.e. all blocks in the group are free). However, it is not
necessarily the case that no blocks are in use -- if ``meta_bg`` is set, necessarily the case that no blocks are in use -- if ``meta_bg`` is set,
the bitmaps and group descriptor live inside the group. Unfortunately, the bitmaps and group descriptor live inside the group. Unfortunately,
ext2fs\_test\_block\_bitmap2() will return '0' for those locations, ext2fs_test_block_bitmap2() will return '0' for those locations,
which produces confusing debugfs output. which produces confusing debugfs output.
Inode Table Inode Table

View File

@ -56,39 +56,39 @@ established that the super block and the group descriptor table, if
present, will be at the beginning of the block group. The bitmaps and present, will be at the beginning of the block group. The bitmaps and
the inode table can be anywhere, and it is quite possible for the the inode table can be anywhere, and it is quite possible for the
bitmaps to come after the inode table, or for both to be in different bitmaps to come after the inode table, or for both to be in different
groups (flex\_bg). Leftover space is used for file data blocks, indirect groups (flex_bg). Leftover space is used for file data blocks, indirect
block maps, extent tree blocks, and extended attributes. block maps, extent tree blocks, and extended attributes.
Flexible Block Groups Flexible Block Groups
--------------------- ---------------------
Starting in ext4, there is a new feature called flexible block groups Starting in ext4, there is a new feature called flexible block groups
(flex\_bg). In a flex\_bg, several block groups are tied together as one (flex_bg). In a flex_bg, several block groups are tied together as one
logical block group; the bitmap spaces and the inode table space in the logical block group; the bitmap spaces and the inode table space in the
first block group of the flex\_bg are expanded to include the bitmaps first block group of the flex_bg are expanded to include the bitmaps
and inode tables of all other block groups in the flex\_bg. For example, and inode tables of all other block groups in the flex_bg. For example,
if the flex\_bg size is 4, then group 0 will contain (in order) the if the flex_bg size is 4, then group 0 will contain (in order) the
superblock, group descriptors, data block bitmaps for groups 0-3, inode superblock, group descriptors, data block bitmaps for groups 0-3, inode
bitmaps for groups 0-3, inode tables for groups 0-3, and the remaining bitmaps for groups 0-3, inode tables for groups 0-3, and the remaining
space in group 0 is for file data. The effect of this is to group the space in group 0 is for file data. The effect of this is to group the
block group metadata close together for faster loading, and to enable block group metadata close together for faster loading, and to enable
large files to be continuous on disk. Backup copies of the superblock large files to be continuous on disk. Backup copies of the superblock
and group descriptors are always at the beginning of block groups, even and group descriptors are always at the beginning of block groups, even
if flex\_bg is enabled. The number of block groups that make up a if flex_bg is enabled. The number of block groups that make up a
flex\_bg is given by 2 ^ ``sb.s_log_groups_per_flex``. flex_bg is given by 2 ^ ``sb.s_log_groups_per_flex``.
Meta Block Groups Meta Block Groups
----------------- -----------------
Without the option META\_BG, for safety concerns, all block group Without the option META_BG, for safety concerns, all block group
descriptors copies are kept in the first block group. Given the default descriptors copies are kept in the first block group. Given the default
128MiB(2^27 bytes) block group size and 64-byte group descriptors, ext4 128MiB(2^27 bytes) block group size and 64-byte group descriptors, ext4
can have at most 2^27/64 = 2^21 block groups. This limits the entire can have at most 2^27/64 = 2^21 block groups. This limits the entire
filesystem size to 2^21 * 2^27 = 2^48bytes or 256TiB. filesystem size to 2^21 * 2^27 = 2^48bytes or 256TiB.
The solution to this problem is to use the metablock group feature The solution to this problem is to use the metablock group feature
(META\_BG), which is already in ext3 for all 2.6 releases. With the (META_BG), which is already in ext3 for all 2.6 releases. With the
META\_BG feature, ext4 filesystems are partitioned into many metablock META_BG feature, ext4 filesystems are partitioned into many metablock
groups. Each metablock group is a cluster of block groups whose group groups. Each metablock group is a cluster of block groups whose group
descriptor structures can be stored in a single disk block. For ext4 descriptor structures can be stored in a single disk block. For ext4
filesystems with 4 KB block size, a single metablock group partition filesystems with 4 KB block size, a single metablock group partition
@ -110,7 +110,7 @@ bytes, a meta-block group contains 32 block groups for filesystems with
a 1KB block size, and 128 block groups for filesystems with a 4KB a 1KB block size, and 128 block groups for filesystems with a 4KB
blocksize. Filesystems can either be created using this new block group blocksize. Filesystems can either be created using this new block group
descriptor layout, or existing filesystems can be resized on-line, and descriptor layout, or existing filesystems can be resized on-line, and
the field s\_first\_meta\_bg in the superblock will indicate the first the field s_first_meta_bg in the superblock will indicate the first
block group using this new layout. block group using this new layout.
Please see an important note about ``BLOCK_UNINIT`` in the section about Please see an important note about ``BLOCK_UNINIT`` in the section about
@ -121,15 +121,15 @@ Lazy Block Group Initialization
A new feature for ext4 are three block group descriptor flags that A new feature for ext4 are three block group descriptor flags that
enable mkfs to skip initializing other parts of the block group enable mkfs to skip initializing other parts of the block group
metadata. Specifically, the INODE\_UNINIT and BLOCK\_UNINIT flags mean metadata. Specifically, the INODE_UNINIT and BLOCK_UNINIT flags mean
that the inode and block bitmaps for that group can be calculated and that the inode and block bitmaps for that group can be calculated and
therefore the on-disk bitmap blocks are not initialized. This is therefore the on-disk bitmap blocks are not initialized. This is
generally the case for an empty block group or a block group containing generally the case for an empty block group or a block group containing
only fixed-location block group metadata. The INODE\_ZEROED flag means only fixed-location block group metadata. The INODE_ZEROED flag means
that the inode table has been initialized; mkfs will unset this flag and that the inode table has been initialized; mkfs will unset this flag and
rely on the kernel to initialize the inode tables in the background. rely on the kernel to initialize the inode tables in the background.
By not writing zeroes to the bitmaps and inode table, mkfs time is By not writing zeroes to the bitmaps and inode table, mkfs time is
reduced considerably. Note the feature flag is RO\_COMPAT\_GDT\_CSUM, reduced considerably. Note the feature flag is RO_COMPAT_GDT_CSUM,
but the dumpe2fs output prints this as “uninit\_bg”. They are the same but the dumpe2fs output prints this as “uninit_bg”. They are the same
thing. thing.

View File

@ -1,7 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0 .. SPDX-License-Identifier: GPL-2.0
+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| i.i\_block Offset | Where It Points | | i.i_block Offset | Where It Points |
+=====================+==============================================================================================================================================================================================================================+ +=====================+==============================================================================================================================================================================================================================+
| 0 to 11 | Direct map to file blocks 0 to 11. | | 0 to 11 | Direct map to file blocks 0 to 11. |
+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -4,7 +4,7 @@ Checksums
--------- ---------
Starting in early 2012, metadata checksums were added to all major ext4 Starting in early 2012, metadata checksums were added to all major ext4
and jbd2 data structures. The associated feature flag is metadata\_csum. and jbd2 data structures. The associated feature flag is metadata_csum.
The desired checksum algorithm is indicated in the superblock, though as The desired checksum algorithm is indicated in the superblock, though as
of October 2012 the only supported algorithm is crc32c. Some data of October 2012 the only supported algorithm is crc32c. Some data
structures did not have space to fit a full 32-bit checksum, so only the structures did not have space to fit a full 32-bit checksum, so only the
@ -20,7 +20,7 @@ encounters directory blocks that lack sufficient empty space to add a
checksum, it will request that you run ``e2fsck -D`` to have the checksum, it will request that you run ``e2fsck -D`` to have the
directories rebuilt with checksums. This has the added benefit of directories rebuilt with checksums. This has the added benefit of
removing slack space from the directory files and rebalancing the htree removing slack space from the directory files and rebalancing the htree
indexes. If you \_ignore\_ this step, your directories will not be indexes. If you _ignore_ this step, your directories will not be
protected by a checksum! protected by a checksum!
The following table describes the data elements that go into each type The following table describes the data elements that go into each type
@ -35,39 +35,39 @@ of checksum. The checksum function is whatever the superblock describes
- Length - Length
- Ingredients - Ingredients
* - Superblock * - Superblock
- \_\_le32 - __le32
- The entire superblock up to the checksum field. The UUID lives inside - The entire superblock up to the checksum field. The UUID lives inside
the superblock. the superblock.
* - MMP * - MMP
- \_\_le32 - __le32
- UUID + the entire MMP block up to the checksum field. - UUID + the entire MMP block up to the checksum field.
* - Extended Attributes * - Extended Attributes
- \_\_le32 - __le32
- UUID + the entire extended attribute block. The checksum field is set to - UUID + the entire extended attribute block. The checksum field is set to
zero. zero.
* - Directory Entries * - Directory Entries
- \_\_le32 - __le32
- UUID + inode number + inode generation + the directory block up to the - UUID + inode number + inode generation + the directory block up to the
fake entry enclosing the checksum field. fake entry enclosing the checksum field.
* - HTREE Nodes * - HTREE Nodes
- \_\_le32 - __le32
- UUID + inode number + inode generation + all valid extents + HTREE tail. - UUID + inode number + inode generation + all valid extents + HTREE tail.
The checksum field is set to zero. The checksum field is set to zero.
* - Extents * - Extents
- \_\_le32 - __le32
- UUID + inode number + inode generation + the entire extent block up to - UUID + inode number + inode generation + the entire extent block up to
the checksum field. the checksum field.
* - Bitmaps * - Bitmaps
- \_\_le32 or \_\_le16 - __le32 or __le16
- UUID + the entire bitmap. Checksums are stored in the group descriptor, - UUID + the entire bitmap. Checksums are stored in the group descriptor,
and truncated if the group descriptor size is 32 bytes (i.e. ^64bit) and truncated if the group descriptor size is 32 bytes (i.e. ^64bit)
* - Inodes * - Inodes
- \_\_le32 - __le32
- UUID + inode number + inode generation + the entire inode. The checksum - UUID + inode number + inode generation + the entire inode. The checksum
field is set to zero. Each inode has its own checksum. field is set to zero. Each inode has its own checksum.
* - Group Descriptors * - Group Descriptors
- \_\_le16 - __le16
- If metadata\_csum, then UUID + group number + the entire descriptor; - If metadata_csum, then UUID + group number + the entire descriptor;
else if gdt\_csum, then crc16(UUID + group number + the entire else if gdt_csum, then crc16(UUID + group number + the entire
descriptor). In all cases, only the lower 16 bits are stored. descriptor). In all cases, only the lower 16 bits are stored.

View File

@ -42,24 +42,24 @@ is at most 263 bytes long, though on disk you'll need to reference
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- inode - inode
- Number of the inode that this directory entry points to. - Number of the inode that this directory entry points to.
* - 0x4 * - 0x4
- \_\_le16 - __le16
- rec\_len - rec_len
- Length of this directory entry. Must be a multiple of 4. - Length of this directory entry. Must be a multiple of 4.
* - 0x6 * - 0x6
- \_\_le16 - __le16
- name\_len - name_len
- Length of the file name. - Length of the file name.
* - 0x8 * - 0x8
- char - char
- name[EXT4\_NAME\_LEN] - name[EXT4_NAME_LEN]
- File name. - File name.
Since file names cannot be longer than 255 bytes, the new directory Since file names cannot be longer than 255 bytes, the new directory
entry format shortens the name\_len field and uses the space for a file entry format shortens the name_len field and uses the space for a file
type flag, probably to avoid having to load every inode during directory type flag, probably to avoid having to load every inode during directory
tree traversal. This format is ``ext4_dir_entry_2``, which is at most tree traversal. This format is ``ext4_dir_entry_2``, which is at most
263 bytes long, though on disk you'll need to reference 263 bytes long, though on disk you'll need to reference
@ -74,24 +74,24 @@ tree traversal. This format is ``ext4_dir_entry_2``, which is at most
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- inode - inode
- Number of the inode that this directory entry points to. - Number of the inode that this directory entry points to.
* - 0x4 * - 0x4
- \_\_le16 - __le16
- rec\_len - rec_len
- Length of this directory entry. - Length of this directory entry.
* - 0x6 * - 0x6
- \_\_u8 - __u8
- name\_len - name_len
- Length of the file name. - Length of the file name.
* - 0x7 * - 0x7
- \_\_u8 - __u8
- file\_type - file_type
- File type code, see ftype_ table below. - File type code, see ftype_ table below.
* - 0x8 * - 0x8
- char - char
- name[EXT4\_NAME\_LEN] - name[EXT4_NAME_LEN]
- File name. - File name.
.. _ftype: .. _ftype:
@ -137,19 +137,19 @@ entry uses this extension, it may be up to 271 bytes.
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- hash - hash
- The hash of the directory name - The hash of the directory name
* - 0x4 * - 0x4
- \_\_le32 - __le32
- minor\_hash - minor_hash
- The minor hash of the directory name - The minor hash of the directory name
In order to add checksums to these classic directory blocks, a phony In order to add checksums to these classic directory blocks, a phony
``struct ext4_dir_entry`` is placed at the end of each leaf block to ``struct ext4_dir_entry`` is placed at the end of each leaf block to
hold the checksum. The directory entry is 12 bytes long. The inode hold the checksum. The directory entry is 12 bytes long. The inode
number and name\_len fields are set to zero to fool old software into number and name_len fields are set to zero to fool old software into
ignoring an apparently empty directory entry, and the checksum is stored ignoring an apparently empty directory entry, and the checksum is stored
in the place where the name normally goes. The structure is in the place where the name normally goes. The structure is
``struct ext4_dir_entry_tail``: ``struct ext4_dir_entry_tail``:
@ -163,24 +163,24 @@ in the place where the name normally goes. The structure is
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- det\_reserved\_zero1 - det_reserved_zero1
- Inode number, which must be zero. - Inode number, which must be zero.
* - 0x4 * - 0x4
- \_\_le16 - __le16
- det\_rec\_len - det_rec_len
- Length of this directory entry, which must be 12. - Length of this directory entry, which must be 12.
* - 0x6 * - 0x6
- \_\_u8 - __u8
- det\_reserved\_zero2 - det_reserved_zero2
- Length of the file name, which must be zero. - Length of the file name, which must be zero.
* - 0x7 * - 0x7
- \_\_u8 - __u8
- det\_reserved\_ft - det_reserved_ft
- File type, which must be 0xDE. - File type, which must be 0xDE.
* - 0x8 * - 0x8
- \_\_le32 - __le32
- det\_checksum - det_checksum
- Directory leaf block checksum. - Directory leaf block checksum.
The leaf directory block checksum is calculated against the FS UUID, the The leaf directory block checksum is calculated against the FS UUID, the
@ -194,7 +194,7 @@ Hash Tree Directories
A linear array of directory entries isn't great for performance, so a A linear array of directory entries isn't great for performance, so a
new feature was added to ext3 to provide a faster (but peculiar) new feature was added to ext3 to provide a faster (but peculiar)
balanced tree keyed off a hash of the directory entry name. If the balanced tree keyed off a hash of the directory entry name. If the
EXT4\_INDEX\_FL (0x1000) flag is set in the inode, this directory uses a EXT4_INDEX_FL (0x1000) flag is set in the inode, this directory uses a
hashed btree (htree) to organize and find directory entries. For hashed btree (htree) to organize and find directory entries. For
backwards read-only compatibility with ext2, this tree is actually backwards read-only compatibility with ext2, this tree is actually
hidden inside the directory file, masquerading as “empty” directory data hidden inside the directory file, masquerading as “empty” directory data
@ -206,14 +206,14 @@ rest of the directory block is empty so that it moves on.
The root of the tree always lives in the first data block of the The root of the tree always lives in the first data block of the
directory. By ext2 custom, the '.' and '..' entries must appear at the directory. By ext2 custom, the '.' and '..' entries must appear at the
beginning of this first block, so they are put here as two beginning of this first block, so they are put here as two
``struct ext4_dir_entry_2``\ s and not stored in the tree. The rest of ``struct ext4_dir_entry_2`` s and not stored in the tree. The rest of
the root node contains metadata about the tree and finally a hash->block the root node contains metadata about the tree and finally a hash->block
map to find nodes that are lower in the htree. If map to find nodes that are lower in the htree. If
``dx_root.info.indirect_levels`` is non-zero then the htree has two ``dx_root.info.indirect_levels`` is non-zero then the htree has two
levels; the data block pointed to by the root node's map is an interior levels; the data block pointed to by the root node's map is an interior
node, which is indexed by a minor hash. Interior nodes in this tree node, which is indexed by a minor hash. Interior nodes in this tree
contains a zeroed out ``struct ext4_dir_entry_2`` followed by a contains a zeroed out ``struct ext4_dir_entry_2`` followed by a
minor\_hash->block map to find leafe nodes. Leaf nodes contain a linear minor_hash->block map to find leafe nodes. Leaf nodes contain a linear
array of all ``struct ext4_dir_entry_2``; all of these entries array of all ``struct ext4_dir_entry_2``; all of these entries
(presumably) hash to the same value. If there is an overflow, the (presumably) hash to the same value. If there is an overflow, the
entries simply overflow into the next leaf node, and the entries simply overflow into the next leaf node, and the
@ -245,83 +245,83 @@ of a data block:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- dot.inode - dot.inode
- inode number of this directory. - inode number of this directory.
* - 0x4 * - 0x4
- \_\_le16 - __le16
- dot.rec\_len - dot.rec_len
- Length of this record, 12. - Length of this record, 12.
* - 0x6 * - 0x6
- u8 - u8
- dot.name\_len - dot.name_len
- Length of the name, 1. - Length of the name, 1.
* - 0x7 * - 0x7
- u8 - u8
- dot.file\_type - dot.file_type
- File type of this entry, 0x2 (directory) (if the feature flag is set). - File type of this entry, 0x2 (directory) (if the feature flag is set).
* - 0x8 * - 0x8
- char - char
- dot.name[4] - dot.name[4]
- “.\\0\\0\\0” - “.\0\0\0”
* - 0xC * - 0xC
- \_\_le32 - __le32
- dotdot.inode - dotdot.inode
- inode number of parent directory. - inode number of parent directory.
* - 0x10 * - 0x10
- \_\_le16 - __le16
- dotdot.rec\_len - dotdot.rec_len
- block\_size - 12. The record length is long enough to cover all htree - block_size - 12. The record length is long enough to cover all htree
data. data.
* - 0x12 * - 0x12
- u8 - u8
- dotdot.name\_len - dotdot.name_len
- Length of the name, 2. - Length of the name, 2.
* - 0x13 * - 0x13
- u8 - u8
- dotdot.file\_type - dotdot.file_type
- File type of this entry, 0x2 (directory) (if the feature flag is set). - File type of this entry, 0x2 (directory) (if the feature flag is set).
* - 0x14 * - 0x14
- char - char
- dotdot\_name[4] - dotdot_name[4]
- “..\\0\\0” - “..\0\0”
* - 0x18 * - 0x18
- \_\_le32 - __le32
- struct dx\_root\_info.reserved\_zero - struct dx_root_info.reserved_zero
- Zero. - Zero.
* - 0x1C * - 0x1C
- u8 - u8
- struct dx\_root\_info.hash\_version - struct dx_root_info.hash_version
- Hash type, see dirhash_ table below. - Hash type, see dirhash_ table below.
* - 0x1D * - 0x1D
- u8 - u8
- struct dx\_root\_info.info\_length - struct dx_root_info.info_length
- Length of the tree information, 0x8. - Length of the tree information, 0x8.
* - 0x1E * - 0x1E
- u8 - u8
- struct dx\_root\_info.indirect\_levels - struct dx_root_info.indirect_levels
- Depth of the htree. Cannot be larger than 3 if the INCOMPAT\_LARGEDIR - Depth of the htree. Cannot be larger than 3 if the INCOMPAT_LARGEDIR
feature is set; cannot be larger than 2 otherwise. feature is set; cannot be larger than 2 otherwise.
* - 0x1F * - 0x1F
- u8 - u8
- struct dx\_root\_info.unused\_flags - struct dx_root_info.unused_flags
- -
* - 0x20 * - 0x20
- \_\_le16 - __le16
- limit - limit
- Maximum number of dx\_entries that can follow this header, plus 1 for - Maximum number of dx_entries that can follow this header, plus 1 for
the header itself. the header itself.
* - 0x22 * - 0x22
- \_\_le16 - __le16
- count - count
- Actual number of dx\_entries that follow this header, plus 1 for the - Actual number of dx_entries that follow this header, plus 1 for the
header itself. header itself.
* - 0x24 * - 0x24
- \_\_le32 - __le32
- block - block
- The block number (within the directory file) that goes with hash=0. - The block number (within the directory file) that goes with hash=0.
* - 0x28 * - 0x28
- struct dx\_entry - struct dx_entry
- entries[0] - entries[0]
- As many 8-byte ``struct dx_entry`` as fits in the rest of the data block. - As many 8-byte ``struct dx_entry`` as fits in the rest of the data block.
@ -362,38 +362,38 @@ also the full length of a data block:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- fake.inode - fake.inode
- Zero, to make it look like this entry is not in use. - Zero, to make it look like this entry is not in use.
* - 0x4 * - 0x4
- \_\_le16 - __le16
- fake.rec\_len - fake.rec_len
- The size of the block, in order to hide all of the dx\_node data. - The size of the block, in order to hide all of the dx_node data.
* - 0x6 * - 0x6
- u8 - u8
- name\_len - name_len
- Zero. There is no name for this “unused” directory entry. - Zero. There is no name for this “unused” directory entry.
* - 0x7 * - 0x7
- u8 - u8
- file\_type - file_type
- Zero. There is no file type for this “unused” directory entry. - Zero. There is no file type for this “unused” directory entry.
* - 0x8 * - 0x8
- \_\_le16 - __le16
- limit - limit
- Maximum number of dx\_entries that can follow this header, plus 1 for - Maximum number of dx_entries that can follow this header, plus 1 for
the header itself. the header itself.
* - 0xA * - 0xA
- \_\_le16 - __le16
- count - count
- Actual number of dx\_entries that follow this header, plus 1 for the - Actual number of dx_entries that follow this header, plus 1 for the
header itself. header itself.
* - 0xE * - 0xE
- \_\_le32 - __le32
- block - block
- The block number (within the directory file) that goes with the lowest - The block number (within the directory file) that goes with the lowest
hash value of this block. This value is stored in the parent block. hash value of this block. This value is stored in the parent block.
* - 0x12 * - 0x12
- struct dx\_entry - struct dx_entry
- entries[0] - entries[0]
- As many 8-byte ``struct dx_entry`` as fits in the rest of the data block. - As many 8-byte ``struct dx_entry`` as fits in the rest of the data block.
@ -410,11 +410,11 @@ long:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- hash - hash
- Hash code. - Hash code.
* - 0x4 * - 0x4
- \_\_le32 - __le32
- block - block
- Block number (within the directory file, not filesystem blocks) of the - Block number (within the directory file, not filesystem blocks) of the
next node in the htree. next node in the htree.
@ -423,13 +423,13 @@ long:
author.) author.)
If metadata checksums are enabled, the last 8 bytes of the directory If metadata checksums are enabled, the last 8 bytes of the directory
block (precisely the length of one dx\_entry) are used to store a block (precisely the length of one dx_entry) are used to store a
``struct dx_tail``, which contains the checksum. The ``limit`` and ``struct dx_tail``, which contains the checksum. The ``limit`` and
``count`` entries in the dx\_root/dx\_node structures are adjusted as ``count`` entries in the dx_root/dx_node structures are adjusted as
necessary to fit the dx\_tail into the block. If there is no space for necessary to fit the dx_tail into the block. If there is no space for
the dx\_tail, the user is notified to run e2fsck -D to rebuild the the dx_tail, the user is notified to run e2fsck -D to rebuild the
directory index (which will ensure that there's space for the checksum. directory index (which will ensure that there's space for the checksum.
The dx\_tail structure is 8 bytes long and looks like this: The dx_tail structure is 8 bytes long and looks like this:
.. list-table:: .. list-table::
:widths: 8 8 24 40 :widths: 8 8 24 40
@ -441,13 +441,13 @@ The dx\_tail structure is 8 bytes long and looks like this:
- Description - Description
* - 0x0 * - 0x0
- u32 - u32
- dt\_reserved - dt_reserved
- Zero. - Zero.
* - 0x4 * - 0x4
- \_\_le32 - __le32
- dt\_checksum - dt_checksum
- Checksum of the htree directory block. - Checksum of the htree directory block.
The checksum is calculated against the FS UUID, the htree index header The checksum is calculated against the FS UUID, the htree index header
(dx\_root or dx\_node), all of the htree indices (dx\_entry) that are in (dx_root or dx_node), all of the htree indices (dx_entry) that are in
use, and the tail block (dx\_tail). use, and the tail block (dx_tail).

View File

@ -5,14 +5,14 @@ Large Extended Attribute Values
To enable ext4 to store extended attribute values that do not fit in the To enable ext4 to store extended attribute values that do not fit in the
inode or in the single extended attribute block attached to an inode, inode or in the single extended attribute block attached to an inode,
the EA\_INODE feature allows us to store the value in the data blocks of the EA_INODE feature allows us to store the value in the data blocks of
a regular file inode. This “EA inode” is linked only from the extended a regular file inode. This “EA inode” is linked only from the extended
attribute name index and must not appear in a directory entry. The attribute name index and must not appear in a directory entry. The
inode's i\_atime field is used to store a checksum of the xattr value; inode's i_atime field is used to store a checksum of the xattr value;
and i\_ctime/i\_version store a 64-bit reference count, which enables and i_ctime/i_version store a 64-bit reference count, which enables
sharing of large xattr values between multiple owning inodes. For sharing of large xattr values between multiple owning inodes. For
backward compatibility with older versions of this feature, the backward compatibility with older versions of this feature, the
i\_mtime/i\_generation *may* store a back-reference to the inode number i_mtime/i_generation *may* store a back-reference to the inode number
and i\_generation of the **one** owning inode (in cases where the EA and i_generation of the **one** owning inode (in cases where the EA
inode is not referenced by multiple inodes) to verify that the EA inode inode is not referenced by multiple inodes) to verify that the EA inode
is the correct one being accessed. is the correct one being accessed.

View File

@ -7,34 +7,34 @@ Each block group on the filesystem has one of these descriptors
associated with it. As noted in the Layout section above, the group associated with it. As noted in the Layout section above, the group
descriptors (if present) are the second item in the block group. The descriptors (if present) are the second item in the block group. The
standard configuration is for each block group to contain a full copy of standard configuration is for each block group to contain a full copy of
the block group descriptor table unless the sparse\_super feature flag the block group descriptor table unless the sparse_super feature flag
is set. is set.
Notice how the group descriptor records the location of both bitmaps and Notice how the group descriptor records the location of both bitmaps and
the inode table (i.e. they can float). This means that within a block the inode table (i.e. they can float). This means that within a block
group, the only data structures with fixed locations are the superblock group, the only data structures with fixed locations are the superblock
and the group descriptor table. The flex\_bg mechanism uses this and the group descriptor table. The flex_bg mechanism uses this
property to group several block groups into a flex group and lay out all property to group several block groups into a flex group and lay out all
of the groups' bitmaps and inode tables into one long run in the first of the groups' bitmaps and inode tables into one long run in the first
group of the flex group. group of the flex group.
If the meta\_bg feature flag is set, then several block groups are If the meta_bg feature flag is set, then several block groups are
grouped together into a meta group. Note that in the meta\_bg case, grouped together into a meta group. Note that in the meta_bg case,
however, the first and last two block groups within the larger meta however, the first and last two block groups within the larger meta
group contain only group descriptors for the groups inside the meta group contain only group descriptors for the groups inside the meta
group. group.
flex\_bg and meta\_bg do not appear to be mutually exclusive features. flex_bg and meta_bg do not appear to be mutually exclusive features.
In ext2, ext3, and ext4 (when the 64bit feature is not enabled), the In ext2, ext3, and ext4 (when the 64bit feature is not enabled), the
block group descriptor was only 32 bytes long and therefore ends at block group descriptor was only 32 bytes long and therefore ends at
bg\_checksum. On an ext4 filesystem with the 64bit feature enabled, the bg_checksum. On an ext4 filesystem with the 64bit feature enabled, the
block group descriptor expands to at least the 64 bytes described below; block group descriptor expands to at least the 64 bytes described below;
the size is stored in the superblock. the size is stored in the superblock.
If gdt\_csum is set and metadata\_csum is not set, the block group If gdt_csum is set and metadata_csum is not set, the block group
checksum is the crc16 of the FS UUID, the group number, and the group checksum is the crc16 of the FS UUID, the group number, and the group
descriptor structure. If metadata\_csum is set, then the block group descriptor structure. If metadata_csum is set, then the block group
checksum is the lower 16 bits of the checksum of the FS UUID, the group checksum is the lower 16 bits of the checksum of the FS UUID, the group
number, and the group descriptor structure. Both block and inode bitmap number, and the group descriptor structure. Both block and inode bitmap
checksums are calculated against the FS UUID, the group number, and the checksums are calculated against the FS UUID, the group number, and the
@ -51,59 +51,59 @@ The block group descriptor is laid out in ``struct ext4_group_desc``.
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- bg\_block\_bitmap\_lo - bg_block_bitmap_lo
- Lower 32-bits of location of block bitmap. - Lower 32-bits of location of block bitmap.
* - 0x4 * - 0x4
- \_\_le32 - __le32
- bg\_inode\_bitmap\_lo - bg_inode_bitmap_lo
- Lower 32-bits of location of inode bitmap. - Lower 32-bits of location of inode bitmap.
* - 0x8 * - 0x8
- \_\_le32 - __le32
- bg\_inode\_table\_lo - bg_inode_table_lo
- Lower 32-bits of location of inode table. - Lower 32-bits of location of inode table.
* - 0xC * - 0xC
- \_\_le16 - __le16
- bg\_free\_blocks\_count\_lo - bg_free_blocks_count_lo
- Lower 16-bits of free block count. - Lower 16-bits of free block count.
* - 0xE * - 0xE
- \_\_le16 - __le16
- bg\_free\_inodes\_count\_lo - bg_free_inodes_count_lo
- Lower 16-bits of free inode count. - Lower 16-bits of free inode count.
* - 0x10 * - 0x10
- \_\_le16 - __le16
- bg\_used\_dirs\_count\_lo - bg_used_dirs_count_lo
- Lower 16-bits of directory count. - Lower 16-bits of directory count.
* - 0x12 * - 0x12
- \_\_le16 - __le16
- bg\_flags - bg_flags
- Block group flags. See the bgflags_ table below. - Block group flags. See the bgflags_ table below.
* - 0x14 * - 0x14
- \_\_le32 - __le32
- bg\_exclude\_bitmap\_lo - bg_exclude_bitmap_lo
- Lower 32-bits of location of snapshot exclusion bitmap. - Lower 32-bits of location of snapshot exclusion bitmap.
* - 0x18 * - 0x18
- \_\_le16 - __le16
- bg\_block\_bitmap\_csum\_lo - bg_block_bitmap_csum_lo
- Lower 16-bits of the block bitmap checksum. - Lower 16-bits of the block bitmap checksum.
* - 0x1A * - 0x1A
- \_\_le16 - __le16
- bg\_inode\_bitmap\_csum\_lo - bg_inode_bitmap_csum_lo
- Lower 16-bits of the inode bitmap checksum. - Lower 16-bits of the inode bitmap checksum.
* - 0x1C * - 0x1C
- \_\_le16 - __le16
- bg\_itable\_unused\_lo - bg_itable_unused_lo
- Lower 16-bits of unused inode count. If set, we needn't scan past the - Lower 16-bits of unused inode count. If set, we needn't scan past the
``(sb.s_inodes_per_group - gdt.bg_itable_unused)``\ th entry in the ``(sb.s_inodes_per_group - gdt.bg_itable_unused)`` th entry in the
inode table for this group. inode table for this group.
* - 0x1E * - 0x1E
- \_\_le16 - __le16
- bg\_checksum - bg_checksum
- Group descriptor checksum; crc16(sb\_uuid+group\_num+bg\_desc) if the - Group descriptor checksum; crc16(sb_uuid+group_num+bg_desc) if the
RO\_COMPAT\_GDT\_CSUM feature is set, or RO_COMPAT_GDT_CSUM feature is set, or
crc32c(sb\_uuid+group\_num+bg\_desc) & 0xFFFF if the crc32c(sb_uuid+group_num+bg_desc) & 0xFFFF if the
RO\_COMPAT\_METADATA\_CSUM feature is set. The bg\_checksum RO_COMPAT_METADATA_CSUM feature is set. The bg_checksum
field in bg\_desc is skipped when calculating crc16 checksum, field in bg_desc is skipped when calculating crc16 checksum,
and set to zero if crc32c checksum is used. and set to zero if crc32c checksum is used.
* - * -
- -
@ -111,48 +111,48 @@ The block group descriptor is laid out in ``struct ext4_group_desc``.
- These fields only exist if the 64bit feature is enabled and s_desc_size - These fields only exist if the 64bit feature is enabled and s_desc_size
> 32. > 32.
* - 0x20 * - 0x20
- \_\_le32 - __le32
- bg\_block\_bitmap\_hi - bg_block_bitmap_hi
- Upper 32-bits of location of block bitmap. - Upper 32-bits of location of block bitmap.
* - 0x24 * - 0x24
- \_\_le32 - __le32
- bg\_inode\_bitmap\_hi - bg_inode_bitmap_hi
- Upper 32-bits of location of inodes bitmap. - Upper 32-bits of location of inodes bitmap.
* - 0x28 * - 0x28
- \_\_le32 - __le32
- bg\_inode\_table\_hi - bg_inode_table_hi
- Upper 32-bits of location of inodes table. - Upper 32-bits of location of inodes table.
* - 0x2C * - 0x2C
- \_\_le16 - __le16
- bg\_free\_blocks\_count\_hi - bg_free_blocks_count_hi
- Upper 16-bits of free block count. - Upper 16-bits of free block count.
* - 0x2E * - 0x2E
- \_\_le16 - __le16
- bg\_free\_inodes\_count\_hi - bg_free_inodes_count_hi
- Upper 16-bits of free inode count. - Upper 16-bits of free inode count.
* - 0x30 * - 0x30
- \_\_le16 - __le16
- bg\_used\_dirs\_count\_hi - bg_used_dirs_count_hi
- Upper 16-bits of directory count. - Upper 16-bits of directory count.
* - 0x32 * - 0x32
- \_\_le16 - __le16
- bg\_itable\_unused\_hi - bg_itable_unused_hi
- Upper 16-bits of unused inode count. - Upper 16-bits of unused inode count.
* - 0x34 * - 0x34
- \_\_le32 - __le32
- bg\_exclude\_bitmap\_hi - bg_exclude_bitmap_hi
- Upper 32-bits of location of snapshot exclusion bitmap. - Upper 32-bits of location of snapshot exclusion bitmap.
* - 0x38 * - 0x38
- \_\_le16 - __le16
- bg\_block\_bitmap\_csum\_hi - bg_block_bitmap_csum_hi
- Upper 16-bits of the block bitmap checksum. - Upper 16-bits of the block bitmap checksum.
* - 0x3A * - 0x3A
- \_\_le16 - __le16
- bg\_inode\_bitmap\_csum\_hi - bg_inode_bitmap_csum_hi
- Upper 16-bits of the inode bitmap checksum. - Upper 16-bits of the inode bitmap checksum.
* - 0x3C * - 0x3C
- \_\_u32 - __u32
- bg\_reserved - bg_reserved
- Padding to 64 bytes. - Padding to 64 bytes.
.. _bgflags: .. _bgflags:
@ -166,8 +166,8 @@ Block group flags can be any combination of the following:
* - Value * - Value
- Description - Description
* - 0x1 * - 0x1
- inode table and bitmap are not initialized (EXT4\_BG\_INODE\_UNINIT). - inode table and bitmap are not initialized (EXT4_BG_INODE_UNINIT).
* - 0x2 * - 0x2
- block bitmap is not initialized (EXT4\_BG\_BLOCK\_UNINIT). - block bitmap is not initialized (EXT4_BG_BLOCK_UNINIT).
* - 0x4 * - 0x4
- inode table is zeroed (EXT4\_BG\_INODE\_ZEROED). - inode table is zeroed (EXT4_BG_INODE_ZEROED).

View File

@ -1,6 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0 .. SPDX-License-Identifier: GPL-2.0
The Contents of inode.i\_block The Contents of inode.i_block
------------------------------ ------------------------------
Depending on the type of file an inode describes, the 60 bytes of Depending on the type of file an inode describes, the 60 bytes of
@ -47,7 +47,7 @@ In ext4, the file to logical block map has been replaced with an extent
tree. Under the old scheme, allocating a contiguous run of 1,000 blocks tree. Under the old scheme, allocating a contiguous run of 1,000 blocks
requires an indirect block to map all 1,000 entries; with extents, the requires an indirect block to map all 1,000 entries; with extents, the
mapping is reduced to a single ``struct ext4_extent`` with mapping is reduced to a single ``struct ext4_extent`` with
``ee_len = 1000``. If flex\_bg is enabled, it is possible to allocate ``ee_len = 1000``. If flex_bg is enabled, it is possible to allocate
very large files with a single extent, at a considerable reduction in very large files with a single extent, at a considerable reduction in
metadata block use, and some improvement in disk efficiency. The inode metadata block use, and some improvement in disk efficiency. The inode
must have the extents flag (0x80000) flag set for this feature to be in must have the extents flag (0x80000) flag set for this feature to be in
@ -76,28 +76,28 @@ which is 12 bytes long:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le16 - __le16
- eh\_magic - eh_magic
- Magic number, 0xF30A. - Magic number, 0xF30A.
* - 0x2 * - 0x2
- \_\_le16 - __le16
- eh\_entries - eh_entries
- Number of valid entries following the header. - Number of valid entries following the header.
* - 0x4 * - 0x4
- \_\_le16 - __le16
- eh\_max - eh_max
- Maximum number of entries that could follow the header. - Maximum number of entries that could follow the header.
* - 0x6 * - 0x6
- \_\_le16 - __le16
- eh\_depth - eh_depth
- Depth of this extent node in the extent tree. 0 = this extent node - Depth of this extent node in the extent tree. 0 = this extent node
points to data blocks; otherwise, this extent node points to other points to data blocks; otherwise, this extent node points to other
extent nodes. The extent tree can be at most 5 levels deep: a logical extent nodes. The extent tree can be at most 5 levels deep: a logical
block number can be at most ``2^32``, and the smallest ``n`` that block number can be at most ``2^32``, and the smallest ``n`` that
satisfies ``4*(((blocksize - 12)/12)^n) >= 2^32`` is 5. satisfies ``4*(((blocksize - 12)/12)^n) >= 2^32`` is 5.
* - 0x8 * - 0x8
- \_\_le32 - __le32
- eh\_generation - eh_generation
- Generation of the tree. (Used by Lustre, but not standard ext4). - Generation of the tree. (Used by Lustre, but not standard ext4).
Internal nodes of the extent tree, also known as index nodes, are Internal nodes of the extent tree, also known as index nodes, are
@ -112,22 +112,22 @@ recorded as ``struct ext4_extent_idx``, and are 12 bytes long:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- ei\_block - ei_block
- This index node covers file blocks from 'block' onward. - This index node covers file blocks from 'block' onward.
* - 0x4 * - 0x4
- \_\_le32 - __le32
- ei\_leaf\_lo - ei_leaf_lo
- Lower 32-bits of the block number of the extent node that is the next - Lower 32-bits of the block number of the extent node that is the next
level lower in the tree. The tree node pointed to can be either another level lower in the tree. The tree node pointed to can be either another
internal node or a leaf node, described below. internal node or a leaf node, described below.
* - 0x8 * - 0x8
- \_\_le16 - __le16
- ei\_leaf\_hi - ei_leaf_hi
- Upper 16-bits of the previous field. - Upper 16-bits of the previous field.
* - 0xA * - 0xA
- \_\_u16 - __u16
- ei\_unused - ei_unused
- -
Leaf nodes of the extent tree are recorded as ``struct ext4_extent``, Leaf nodes of the extent tree are recorded as ``struct ext4_extent``,
@ -142,24 +142,24 @@ and are also 12 bytes long:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- ee\_block - ee_block
- First file block number that this extent covers. - First file block number that this extent covers.
* - 0x4 * - 0x4
- \_\_le16 - __le16
- ee\_len - ee_len
- Number of blocks covered by extent. If the value of this field is <= - Number of blocks covered by extent. If the value of this field is <=
32768, the extent is initialized. If the value of the field is > 32768, 32768, the extent is initialized. If the value of the field is > 32768,
the extent is uninitialized and the actual extent length is ``ee_len`` - the extent is uninitialized and the actual extent length is ``ee_len`` -
32768. Therefore, the maximum length of a initialized extent is 32768 32768. Therefore, the maximum length of a initialized extent is 32768
blocks, and the maximum length of an uninitialized extent is 32767. blocks, and the maximum length of an uninitialized extent is 32767.
* - 0x6 * - 0x6
- \_\_le16 - __le16
- ee\_start\_hi - ee_start_hi
- Upper 16-bits of the block number to which this extent points. - Upper 16-bits of the block number to which this extent points.
* - 0x8 * - 0x8
- \_\_le32 - __le32
- ee\_start\_lo - ee_start_lo
- Lower 32-bits of the block number to which this extent points. - Lower 32-bits of the block number to which this extent points.
Prior to the introduction of metadata checksums, the extent header + Prior to the introduction of metadata checksums, the extent header +
@ -182,8 +182,8 @@ including) the checksum itself.
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- eb\_checksum - eb_checksum
- Checksum of the extent block, crc32c(uuid+inum+igeneration+extentblock) - Checksum of the extent block, crc32c(uuid+inum+igeneration+extentblock)
Inline Data Inline Data

View File

@ -11,12 +11,12 @@ file is smaller than 60 bytes, then the data are stored inline in
attribute space, then it might be found as an extended attribute attribute space, then it might be found as an extended attribute
“system.data” within the inode body (“ibody EA”). This of course “system.data” within the inode body (“ibody EA”). This of course
constrains the amount of extended attributes one can attach to an inode. constrains the amount of extended attributes one can attach to an inode.
If the data size increases beyond i\_block + ibody EA, a regular block If the data size increases beyond i_block + ibody EA, a regular block
is allocated and the contents moved to that block. is allocated and the contents moved to that block.
Pending a change to compact the extended attribute key used to store Pending a change to compact the extended attribute key used to store
inline data, one ought to be able to store 160 bytes of data in a inline data, one ought to be able to store 160 bytes of data in a
256-byte inode (as of June 2015, when i\_extra\_isize is 28). Prior to 256-byte inode (as of June 2015, when i_extra_isize is 28). Prior to
that, the limit was 156 bytes due to inefficient use of inode space. that, the limit was 156 bytes due to inefficient use of inode space.
The inline data feature requires the presence of an extended attribute The inline data feature requires the presence of an extended attribute
@ -25,12 +25,12 @@ for “system.data”, even if the attribute value is zero length.
Inline Directories Inline Directories
~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~
The first four bytes of i\_block are the inode number of the parent The first four bytes of i_block are the inode number of the parent
directory. Following that is a 56-byte space for an array of directory directory. Following that is a 56-byte space for an array of directory
entries; see ``struct ext4_dir_entry``. If there is a “system.data” entries; see ``struct ext4_dir_entry``. If there is a “system.data”
attribute in the inode body, the EA value is an array of attribute in the inode body, the EA value is an array of
``struct ext4_dir_entry`` as well. Note that for inline directories, the ``struct ext4_dir_entry`` as well. Note that for inline directories, the
i\_block and EA space are treated as separate dirent blocks; directory i_block and EA space are treated as separate dirent blocks; directory
entries cannot span the two. entries cannot span the two.
Inline directory entries are not checksummed, as the inode checksum Inline directory entries are not checksummed, as the inode checksum

View File

@ -38,138 +38,138 @@ The inode table entry is laid out in ``struct ext4_inode``.
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le16 - __le16
- i\_mode - i_mode
- File mode. See the table i_mode_ below. - File mode. See the table i_mode_ below.
* - 0x2 * - 0x2
- \_\_le16 - __le16
- i\_uid - i_uid
- Lower 16-bits of Owner UID. - Lower 16-bits of Owner UID.
* - 0x4 * - 0x4
- \_\_le32 - __le32
- i\_size\_lo - i_size_lo
- Lower 32-bits of size in bytes. - Lower 32-bits of size in bytes.
* - 0x8 * - 0x8
- \_\_le32 - __le32
- i\_atime - i_atime
- Last access time, in seconds since the epoch. However, if the EA\_INODE - Last access time, in seconds since the epoch. However, if the EA_INODE
inode flag is set, this inode stores an extended attribute value and inode flag is set, this inode stores an extended attribute value and
this field contains the checksum of the value. this field contains the checksum of the value.
* - 0xC * - 0xC
- \_\_le32 - __le32
- i\_ctime - i_ctime
- Last inode change time, in seconds since the epoch. However, if the - Last inode change time, in seconds since the epoch. However, if the
EA\_INODE inode flag is set, this inode stores an extended attribute EA_INODE inode flag is set, this inode stores an extended attribute
value and this field contains the lower 32 bits of the attribute value's value and this field contains the lower 32 bits of the attribute value's
reference count. reference count.
* - 0x10 * - 0x10
- \_\_le32 - __le32
- i\_mtime - i_mtime
- Last data modification time, in seconds since the epoch. However, if the - Last data modification time, in seconds since the epoch. However, if the
EA\_INODE inode flag is set, this inode stores an extended attribute EA_INODE inode flag is set, this inode stores an extended attribute
value and this field contains the number of the inode that owns the value and this field contains the number of the inode that owns the
extended attribute. extended attribute.
* - 0x14 * - 0x14
- \_\_le32 - __le32
- i\_dtime - i_dtime
- Deletion Time, in seconds since the epoch. - Deletion Time, in seconds since the epoch.
* - 0x18 * - 0x18
- \_\_le16 - __le16
- i\_gid - i_gid
- Lower 16-bits of GID. - Lower 16-bits of GID.
* - 0x1A * - 0x1A
- \_\_le16 - __le16
- i\_links\_count - i_links_count
- Hard link count. Normally, ext4 does not permit an inode to have more - Hard link count. Normally, ext4 does not permit an inode to have more
than 65,000 hard links. This applies to files as well as directories, than 65,000 hard links. This applies to files as well as directories,
which means that there cannot be more than 64,998 subdirectories in a which means that there cannot be more than 64,998 subdirectories in a
directory (each subdirectory's '..' entry counts as a hard link, as does directory (each subdirectory's '..' entry counts as a hard link, as does
the '.' entry in the directory itself). With the DIR\_NLINK feature the '.' entry in the directory itself). With the DIR_NLINK feature
enabled, ext4 supports more than 64,998 subdirectories by setting this enabled, ext4 supports more than 64,998 subdirectories by setting this
field to 1 to indicate that the number of hard links is not known. field to 1 to indicate that the number of hard links is not known.
* - 0x1C * - 0x1C
- \_\_le32 - __le32
- i\_blocks\_lo - i_blocks_lo
- Lower 32-bits of “block” count. If the huge\_file feature flag is not - Lower 32-bits of “block” count. If the huge_file feature flag is not
set on the filesystem, the file consumes ``i_blocks_lo`` 512-byte blocks set on the filesystem, the file consumes ``i_blocks_lo`` 512-byte blocks
on disk. If huge\_file is set and EXT4\_HUGE\_FILE\_FL is NOT set in on disk. If huge_file is set and EXT4_HUGE_FILE_FL is NOT set in
``inode.i_flags``, then the file consumes ``i_blocks_lo + (i_blocks_hi ``inode.i_flags``, then the file consumes ``i_blocks_lo + (i_blocks_hi
<< 32)`` 512-byte blocks on disk. If huge\_file is set and << 32)`` 512-byte blocks on disk. If huge_file is set and
EXT4\_HUGE\_FILE\_FL IS set in ``inode.i_flags``, then this file EXT4_HUGE_FILE_FL IS set in ``inode.i_flags``, then this file
consumes (``i_blocks_lo + i_blocks_hi`` << 32) filesystem blocks on consumes (``i_blocks_lo + i_blocks_hi`` << 32) filesystem blocks on
disk. disk.
* - 0x20 * - 0x20
- \_\_le32 - __le32
- i\_flags - i_flags
- Inode flags. See the table i_flags_ below. - Inode flags. See the table i_flags_ below.
* - 0x24 * - 0x24
- 4 bytes - 4 bytes
- i\_osd1 - i_osd1
- See the table i_osd1_ for more details. - See the table i_osd1_ for more details.
* - 0x28 * - 0x28
- 60 bytes - 60 bytes
- i\_block[EXT4\_N\_BLOCKS=15] - i_block[EXT4_N_BLOCKS=15]
- Block map or extent tree. See the section “The Contents of inode.i\_block”. - Block map or extent tree. See the section “The Contents of inode.i_block”.
* - 0x64 * - 0x64
- \_\_le32 - __le32
- i\_generation - i_generation
- File version (for NFS). - File version (for NFS).
* - 0x68 * - 0x68
- \_\_le32 - __le32
- i\_file\_acl\_lo - i_file_acl_lo
- Lower 32-bits of extended attribute block. ACLs are of course one of - Lower 32-bits of extended attribute block. ACLs are of course one of
many possible extended attributes; I think the name of this field is a many possible extended attributes; I think the name of this field is a
result of the first use of extended attributes being for ACLs. result of the first use of extended attributes being for ACLs.
* - 0x6C * - 0x6C
- \_\_le32 - __le32
- i\_size\_high / i\_dir\_acl - i_size_high / i_dir_acl
- Upper 32-bits of file/directory size. In ext2/3 this field was named - Upper 32-bits of file/directory size. In ext2/3 this field was named
i\_dir\_acl, though it was usually set to zero and never used. i_dir_acl, though it was usually set to zero and never used.
* - 0x70 * - 0x70
- \_\_le32 - __le32
- i\_obso\_faddr - i_obso_faddr
- (Obsolete) fragment address. - (Obsolete) fragment address.
* - 0x74 * - 0x74
- 12 bytes - 12 bytes
- i\_osd2 - i_osd2
- See the table i_osd2_ for more details. - See the table i_osd2_ for more details.
* - 0x80 * - 0x80
- \_\_le16 - __le16
- i\_extra\_isize - i_extra_isize
- Size of this inode - 128. Alternately, the size of the extended inode - Size of this inode - 128. Alternately, the size of the extended inode
fields beyond the original ext2 inode, including this field. fields beyond the original ext2 inode, including this field.
* - 0x82 * - 0x82
- \_\_le16 - __le16
- i\_checksum\_hi - i_checksum_hi
- Upper 16-bits of the inode checksum. - Upper 16-bits of the inode checksum.
* - 0x84 * - 0x84
- \_\_le32 - __le32
- i\_ctime\_extra - i_ctime_extra
- Extra change time bits. This provides sub-second precision. See Inode - Extra change time bits. This provides sub-second precision. See Inode
Timestamps section. Timestamps section.
* - 0x88 * - 0x88
- \_\_le32 - __le32
- i\_mtime\_extra - i_mtime_extra
- Extra modification time bits. This provides sub-second precision. - Extra modification time bits. This provides sub-second precision.
* - 0x8C * - 0x8C
- \_\_le32 - __le32
- i\_atime\_extra - i_atime_extra
- Extra access time bits. This provides sub-second precision. - Extra access time bits. This provides sub-second precision.
* - 0x90 * - 0x90
- \_\_le32 - __le32
- i\_crtime - i_crtime
- File creation time, in seconds since the epoch. - File creation time, in seconds since the epoch.
* - 0x94 * - 0x94
- \_\_le32 - __le32
- i\_crtime\_extra - i_crtime_extra
- Extra file creation time bits. This provides sub-second precision. - Extra file creation time bits. This provides sub-second precision.
* - 0x98 * - 0x98
- \_\_le32 - __le32
- i\_version\_hi - i_version_hi
- Upper 32-bits for version number. - Upper 32-bits for version number.
* - 0x9C * - 0x9C
- \_\_le32 - __le32
- i\_projid - i_projid
- Project ID. - Project ID.
.. _i_mode: .. _i_mode:
@ -183,45 +183,45 @@ The ``i_mode`` value is a combination of the following flags:
* - Value * - Value
- Description - Description
* - 0x1 * - 0x1
- S\_IXOTH (Others may execute) - S_IXOTH (Others may execute)
* - 0x2 * - 0x2
- S\_IWOTH (Others may write) - S_IWOTH (Others may write)
* - 0x4 * - 0x4
- S\_IROTH (Others may read) - S_IROTH (Others may read)
* - 0x8 * - 0x8
- S\_IXGRP (Group members may execute) - S_IXGRP (Group members may execute)
* - 0x10 * - 0x10
- S\_IWGRP (Group members may write) - S_IWGRP (Group members may write)
* - 0x20 * - 0x20
- S\_IRGRP (Group members may read) - S_IRGRP (Group members may read)
* - 0x40 * - 0x40
- S\_IXUSR (Owner may execute) - S_IXUSR (Owner may execute)
* - 0x80 * - 0x80
- S\_IWUSR (Owner may write) - S_IWUSR (Owner may write)
* - 0x100 * - 0x100
- S\_IRUSR (Owner may read) - S_IRUSR (Owner may read)
* - 0x200 * - 0x200
- S\_ISVTX (Sticky bit) - S_ISVTX (Sticky bit)
* - 0x400 * - 0x400
- S\_ISGID (Set GID) - S_ISGID (Set GID)
* - 0x800 * - 0x800
- S\_ISUID (Set UID) - S_ISUID (Set UID)
* - * -
- These are mutually-exclusive file types: - These are mutually-exclusive file types:
* - 0x1000 * - 0x1000
- S\_IFIFO (FIFO) - S_IFIFO (FIFO)
* - 0x2000 * - 0x2000
- S\_IFCHR (Character device) - S_IFCHR (Character device)
* - 0x4000 * - 0x4000
- S\_IFDIR (Directory) - S_IFDIR (Directory)
* - 0x6000 * - 0x6000
- S\_IFBLK (Block device) - S_IFBLK (Block device)
* - 0x8000 * - 0x8000
- S\_IFREG (Regular file) - S_IFREG (Regular file)
* - 0xA000 * - 0xA000
- S\_IFLNK (Symbolic link) - S_IFLNK (Symbolic link)
* - 0xC000 * - 0xC000
- S\_IFSOCK (Socket) - S_IFSOCK (Socket)
.. _i_flags: .. _i_flags:
@ -234,56 +234,56 @@ The ``i_flags`` field is a combination of these values:
* - Value * - Value
- Description - Description
* - 0x1 * - 0x1
- This file requires secure deletion (EXT4\_SECRM\_FL). (not implemented) - This file requires secure deletion (EXT4_SECRM_FL). (not implemented)
* - 0x2 * - 0x2
- This file should be preserved, should undeletion be desired - This file should be preserved, should undeletion be desired
(EXT4\_UNRM\_FL). (not implemented) (EXT4_UNRM_FL). (not implemented)
* - 0x4 * - 0x4
- File is compressed (EXT4\_COMPR\_FL). (not really implemented) - File is compressed (EXT4_COMPR_FL). (not really implemented)
* - 0x8 * - 0x8
- All writes to the file must be synchronous (EXT4\_SYNC\_FL). - All writes to the file must be synchronous (EXT4_SYNC_FL).
* - 0x10 * - 0x10
- File is immutable (EXT4\_IMMUTABLE\_FL). - File is immutable (EXT4_IMMUTABLE_FL).
* - 0x20 * - 0x20
- File can only be appended (EXT4\_APPEND\_FL). - File can only be appended (EXT4_APPEND_FL).
* - 0x40 * - 0x40
- The dump(1) utility should not dump this file (EXT4\_NODUMP\_FL). - The dump(1) utility should not dump this file (EXT4_NODUMP_FL).
* - 0x80 * - 0x80
- Do not update access time (EXT4\_NOATIME\_FL). - Do not update access time (EXT4_NOATIME_FL).
* - 0x100 * - 0x100
- Dirty compressed file (EXT4\_DIRTY\_FL). (not used) - Dirty compressed file (EXT4_DIRTY_FL). (not used)
* - 0x200 * - 0x200
- File has one or more compressed clusters (EXT4\_COMPRBLK\_FL). (not used) - File has one or more compressed clusters (EXT4_COMPRBLK_FL). (not used)
* - 0x400 * - 0x400
- Do not compress file (EXT4\_NOCOMPR\_FL). (not used) - Do not compress file (EXT4_NOCOMPR_FL). (not used)
* - 0x800 * - 0x800
- Encrypted inode (EXT4\_ENCRYPT\_FL). This bit value previously was - Encrypted inode (EXT4_ENCRYPT_FL). This bit value previously was
EXT4\_ECOMPR\_FL (compression error), which was never used. EXT4_ECOMPR_FL (compression error), which was never used.
* - 0x1000 * - 0x1000
- Directory has hashed indexes (EXT4\_INDEX\_FL). - Directory has hashed indexes (EXT4_INDEX_FL).
* - 0x2000 * - 0x2000
- AFS magic directory (EXT4\_IMAGIC\_FL). - AFS magic directory (EXT4_IMAGIC_FL).
* - 0x4000 * - 0x4000
- File data must always be written through the journal - File data must always be written through the journal
(EXT4\_JOURNAL\_DATA\_FL). (EXT4_JOURNAL_DATA_FL).
* - 0x8000 * - 0x8000
- File tail should not be merged (EXT4\_NOTAIL\_FL). (not used by ext4) - File tail should not be merged (EXT4_NOTAIL_FL). (not used by ext4)
* - 0x10000 * - 0x10000
- All directory entry data should be written synchronously (see - All directory entry data should be written synchronously (see
``dirsync``) (EXT4\_DIRSYNC\_FL). ``dirsync``) (EXT4_DIRSYNC_FL).
* - 0x20000 * - 0x20000
- Top of directory hierarchy (EXT4\_TOPDIR\_FL). - Top of directory hierarchy (EXT4_TOPDIR_FL).
* - 0x40000 * - 0x40000
- This is a huge file (EXT4\_HUGE\_FILE\_FL). - This is a huge file (EXT4_HUGE_FILE_FL).
* - 0x80000 * - 0x80000
- Inode uses extents (EXT4\_EXTENTS\_FL). - Inode uses extents (EXT4_EXTENTS_FL).
* - 0x100000 * - 0x100000
- Verity protected file (EXT4\_VERITY\_FL). - Verity protected file (EXT4_VERITY_FL).
* - 0x200000 * - 0x200000
- Inode stores a large extended attribute value in its data blocks - Inode stores a large extended attribute value in its data blocks
(EXT4\_EA\_INODE\_FL). (EXT4_EA_INODE_FL).
* - 0x400000 * - 0x400000
- This file has blocks allocated past EOF (EXT4\_EOFBLOCKS\_FL). - This file has blocks allocated past EOF (EXT4_EOFBLOCKS_FL).
(deprecated) (deprecated)
* - 0x01000000 * - 0x01000000
- Inode is a snapshot (``EXT4_SNAPFILE_FL``). (not in mainline) - Inode is a snapshot (``EXT4_SNAPFILE_FL``). (not in mainline)
@ -294,21 +294,21 @@ The ``i_flags`` field is a combination of these values:
- Snapshot shrink has completed (``EXT4_SNAPFILE_SHRUNK_FL``). (not in - Snapshot shrink has completed (``EXT4_SNAPFILE_SHRUNK_FL``). (not in
mainline) mainline)
* - 0x10000000 * - 0x10000000
- Inode has inline data (EXT4\_INLINE\_DATA\_FL). - Inode has inline data (EXT4_INLINE_DATA_FL).
* - 0x20000000 * - 0x20000000
- Create children with the same project ID (EXT4\_PROJINHERIT\_FL). - Create children with the same project ID (EXT4_PROJINHERIT_FL).
* - 0x80000000 * - 0x80000000
- Reserved for ext4 library (EXT4\_RESERVED\_FL). - Reserved for ext4 library (EXT4_RESERVED_FL).
* - * -
- Aggregate flags: - Aggregate flags:
* - 0x705BDFFF * - 0x705BDFFF
- User-visible flags. - User-visible flags.
* - 0x604BC0FF * - 0x604BC0FF
- User-modifiable flags. Note that while EXT4\_JOURNAL\_DATA\_FL and - User-modifiable flags. Note that while EXT4_JOURNAL_DATA_FL and
EXT4\_EXTENTS\_FL can be set with setattr, they are not in the kernel's EXT4_EXTENTS_FL can be set with setattr, they are not in the kernel's
EXT4\_FL\_USER\_MODIFIABLE mask, since it needs to handle the setting of EXT4_FL_USER_MODIFIABLE mask, since it needs to handle the setting of
these flags in a special manner and they are masked out of the set of these flags in a special manner and they are masked out of the set of
flags that are saved directly to i\_flags. flags that are saved directly to i_flags.
.. _i_osd1: .. _i_osd1:
@ -325,9 +325,9 @@ Linux:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- l\_i\_version - l_i_version
- Inode version. However, if the EA\_INODE inode flag is set, this inode - Inode version. However, if the EA_INODE inode flag is set, this inode
stores an extended attribute value and this field contains the upper 32 stores an extended attribute value and this field contains the upper 32
bits of the attribute value's reference count. bits of the attribute value's reference count.
@ -342,8 +342,8 @@ Hurd:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- h\_i\_translator - h_i_translator
- ?? - ??
Masix: Masix:
@ -357,8 +357,8 @@ Masix:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- m\_i\_reserved - m_i_reserved
- ?? - ??
.. _i_osd2: .. _i_osd2:
@ -376,30 +376,30 @@ Linux:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le16 - __le16
- l\_i\_blocks\_high - l_i_blocks_high
- Upper 16-bits of the block count. Please see the note attached to - Upper 16-bits of the block count. Please see the note attached to
i\_blocks\_lo. i_blocks_lo.
* - 0x2 * - 0x2
- \_\_le16 - __le16
- l\_i\_file\_acl\_high - l_i_file_acl_high
- Upper 16-bits of the extended attribute block (historically, the file - Upper 16-bits of the extended attribute block (historically, the file
ACL location). See the Extended Attributes section below. ACL location). See the Extended Attributes section below.
* - 0x4 * - 0x4
- \_\_le16 - __le16
- l\_i\_uid\_high - l_i_uid_high
- Upper 16-bits of the Owner UID. - Upper 16-bits of the Owner UID.
* - 0x6 * - 0x6
- \_\_le16 - __le16
- l\_i\_gid\_high - l_i_gid_high
- Upper 16-bits of the GID. - Upper 16-bits of the GID.
* - 0x8 * - 0x8
- \_\_le16 - __le16
- l\_i\_checksum\_lo - l_i_checksum_lo
- Lower 16-bits of the inode checksum. - Lower 16-bits of the inode checksum.
* - 0xA * - 0xA
- \_\_le16 - __le16
- l\_i\_reserved - l_i_reserved
- Unused. - Unused.
Hurd: Hurd:
@ -413,24 +413,24 @@ Hurd:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le16 - __le16
- h\_i\_reserved1 - h_i_reserved1
- ?? - ??
* - 0x2 * - 0x2
- \_\_u16 - __u16
- h\_i\_mode\_high - h_i_mode_high
- Upper 16-bits of the file mode. - Upper 16-bits of the file mode.
* - 0x4 * - 0x4
- \_\_le16 - __le16
- h\_i\_uid\_high - h_i_uid_high
- Upper 16-bits of the Owner UID. - Upper 16-bits of the Owner UID.
* - 0x6 * - 0x6
- \_\_le16 - __le16
- h\_i\_gid\_high - h_i_gid_high
- Upper 16-bits of the GID. - Upper 16-bits of the GID.
* - 0x8 * - 0x8
- \_\_u32 - __u32
- h\_i\_author - h_i_author
- Author code? - Author code?
Masix: Masix:
@ -444,17 +444,17 @@ Masix:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le16 - __le16
- h\_i\_reserved1 - h_i_reserved1
- ?? - ??
* - 0x2 * - 0x2
- \_\_u16 - __u16
- m\_i\_file\_acl\_high - m_i_file_acl_high
- Upper 16-bits of the extended attribute block (historically, the file - Upper 16-bits of the extended attribute block (historically, the file
ACL location). ACL location).
* - 0x4 * - 0x4
- \_\_u32 - __u32
- m\_i\_reserved2[2] - m_i_reserved2[2]
- ?? - ??
Inode Size Inode Size
@ -466,11 +466,11 @@ In ext2 and ext3, the inode structure size was fixed at 128 bytes
on-disk inode at format time for all inodes in the filesystem to provide on-disk inode at format time for all inodes in the filesystem to provide
space beyond the end of the original ext2 inode. The on-disk inode space beyond the end of the original ext2 inode. The on-disk inode
record size is recorded in the superblock as ``s_inode_size``. The record size is recorded in the superblock as ``s_inode_size``. The
number of bytes actually used by struct ext4\_inode beyond the original number of bytes actually used by struct ext4_inode beyond the original
128-byte ext2 inode is recorded in the ``i_extra_isize`` field for each 128-byte ext2 inode is recorded in the ``i_extra_isize`` field for each
inode, which allows struct ext4\_inode to grow for a new kernel without inode, which allows struct ext4_inode to grow for a new kernel without
having to upgrade all of the on-disk inodes. Access to fields beyond having to upgrade all of the on-disk inodes. Access to fields beyond
EXT2\_GOOD\_OLD\_INODE\_SIZE should be verified to be within EXT2_GOOD_OLD_INODE_SIZE should be verified to be within
``i_extra_isize``. By default, ext4 inode records are 256 bytes, and (as ``i_extra_isize``. By default, ext4 inode records are 256 bytes, and (as
of August 2019) the inode structure is 160 bytes of August 2019) the inode structure is 160 bytes
(``i_extra_isize = 32``). The extra space between the end of the inode (``i_extra_isize = 32``). The extra space between the end of the inode
@ -516,7 +516,7 @@ creation time (crtime); this field is 64-bits wide and decoded in the
same manner as 64-bit [cma]time. Neither crtime nor dtime are accessible same manner as 64-bit [cma]time. Neither crtime nor dtime are accessible
through the regular stat() interface, though debugfs will report them. through the regular stat() interface, though debugfs will report them.
We use the 32-bit signed time value plus (2^32 \* (extra epoch bits)). We use the 32-bit signed time value plus (2^32 * (extra epoch bits)).
In other words: In other words:
.. list-table:: .. list-table::
@ -525,8 +525,8 @@ In other words:
* - Extra epoch bits * - Extra epoch bits
- MSB of 32-bit time - MSB of 32-bit time
- Adjustment for signed 32-bit to 64-bit tv\_sec - Adjustment for signed 32-bit to 64-bit tv_sec
- Decoded 64-bit tv\_sec - Decoded 64-bit tv_sec
- valid time range - valid time range
* - 0 0 * - 0 0
- 1 - 1

View File

@ -63,8 +63,8 @@ Generally speaking, the journal has this format:
:header-rows: 1 :header-rows: 1
* - Superblock * - Superblock
- descriptor\_block (data\_blocks or revocation\_block) [more data or - descriptor_block (data_blocks or revocation_block) [more data or
revocations] commmit\_block revocations] commmit_block
- [more transactions...] - [more transactions...]
* - * -
- One transaction - One transaction
@ -93,8 +93,8 @@ superblock.
* - 1024 bytes of padding * - 1024 bytes of padding
- ext4 Superblock - ext4 Superblock
- Journal Superblock - Journal Superblock
- descriptor\_block (data\_blocks or revocation\_block) [more data or - descriptor_block (data_blocks or revocation_block) [more data or
revocations] commmit\_block revocations] commmit_block
- [more transactions...] - [more transactions...]
* - * -
- -
@ -117,17 +117,17 @@ Every block in the journal starts with a common 12-byte header
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_be32 - __be32
- h\_magic - h_magic
- jbd2 magic number, 0xC03B3998. - jbd2 magic number, 0xC03B3998.
* - 0x4 * - 0x4
- \_\_be32 - __be32
- h\_blocktype - h_blocktype
- Description of what this block contains. See the jbd2_blocktype_ table - Description of what this block contains. See the jbd2_blocktype_ table
below. below.
* - 0x8 * - 0x8
- \_\_be32 - __be32
- h\_sequence - h_sequence
- The transaction ID that goes with this block. - The transaction ID that goes with this block.
.. _jbd2_blocktype: .. _jbd2_blocktype:
@ -177,99 +177,99 @@ which is 1024 bytes long:
- -
- Static information describing the journal. - Static information describing the journal.
* - 0x0 * - 0x0
- journal\_header\_t (12 bytes) - journal_header_t (12 bytes)
- s\_header - s_header
- Common header identifying this as a superblock. - Common header identifying this as a superblock.
* - 0xC * - 0xC
- \_\_be32 - __be32
- s\_blocksize - s_blocksize
- Journal device block size. - Journal device block size.
* - 0x10 * - 0x10
- \_\_be32 - __be32
- s\_maxlen - s_maxlen
- Total number of blocks in this journal. - Total number of blocks in this journal.
* - 0x14 * - 0x14
- \_\_be32 - __be32
- s\_first - s_first
- First block of log information. - First block of log information.
* - * -
- -
- -
- Dynamic information describing the current state of the log. - Dynamic information describing the current state of the log.
* - 0x18 * - 0x18
- \_\_be32 - __be32
- s\_sequence - s_sequence
- First commit ID expected in log. - First commit ID expected in log.
* - 0x1C * - 0x1C
- \_\_be32 - __be32
- s\_start - s_start
- Block number of the start of log. Contrary to the comments, this field - Block number of the start of log. Contrary to the comments, this field
being zero does not imply that the journal is clean! being zero does not imply that the journal is clean!
* - 0x20 * - 0x20
- \_\_be32 - __be32
- s\_errno - s_errno
- Error value, as set by jbd2\_journal\_abort(). - Error value, as set by jbd2_journal_abort().
* - * -
- -
- -
- The remaining fields are only valid in a v2 superblock. - The remaining fields are only valid in a v2 superblock.
* - 0x24 * - 0x24
- \_\_be32 - __be32
- s\_feature\_compat; - s_feature_compat;
- Compatible feature set. See the table jbd2_compat_ below. - Compatible feature set. See the table jbd2_compat_ below.
* - 0x28 * - 0x28
- \_\_be32 - __be32
- s\_feature\_incompat - s_feature_incompat
- Incompatible feature set. See the table jbd2_incompat_ below. - Incompatible feature set. See the table jbd2_incompat_ below.
* - 0x2C * - 0x2C
- \_\_be32 - __be32
- s\_feature\_ro\_compat - s_feature_ro_compat
- Read-only compatible feature set. There aren't any of these currently. - Read-only compatible feature set. There aren't any of these currently.
* - 0x30 * - 0x30
- \_\_u8 - __u8
- s\_uuid[16] - s_uuid[16]
- 128-bit uuid for journal. This is compared against the copy in the ext4 - 128-bit uuid for journal. This is compared against the copy in the ext4
super block at mount time. super block at mount time.
* - 0x40 * - 0x40
- \_\_be32 - __be32
- s\_nr\_users - s_nr_users
- Number of file systems sharing this journal. - Number of file systems sharing this journal.
* - 0x44 * - 0x44
- \_\_be32 - __be32
- s\_dynsuper - s_dynsuper
- Location of dynamic super block copy. (Not used?) - Location of dynamic super block copy. (Not used?)
* - 0x48 * - 0x48
- \_\_be32 - __be32
- s\_max\_transaction - s_max_transaction
- Limit of journal blocks per transaction. (Not used?) - Limit of journal blocks per transaction. (Not used?)
* - 0x4C * - 0x4C
- \_\_be32 - __be32
- s\_max\_trans\_data - s_max_trans_data
- Limit of data blocks per transaction. (Not used?) - Limit of data blocks per transaction. (Not used?)
* - 0x50 * - 0x50
- \_\_u8 - __u8
- s\_checksum\_type - s_checksum_type
- Checksum algorithm used for the journal. See jbd2_checksum_type_ for - Checksum algorithm used for the journal. See jbd2_checksum_type_ for
more info. more info.
* - 0x51 * - 0x51
- \_\_u8[3] - __u8[3]
- s\_padding2 - s_padding2
- -
* - 0x54 * - 0x54
- \_\_be32 - __be32
- s\_num\_fc\_blocks - s_num_fc_blocks
- Number of fast commit blocks in the journal. - Number of fast commit blocks in the journal.
* - 0x58 * - 0x58
- \_\_u32 - __u32
- s\_padding[42] - s_padding[42]
- -
* - 0xFC * - 0xFC
- \_\_be32 - __be32
- s\_checksum - s_checksum
- Checksum of the entire superblock, with this field set to zero. - Checksum of the entire superblock, with this field set to zero.
* - 0x100 * - 0x100
- \_\_u8 - __u8
- s\_users[16\*48] - s_users[16*48]
- ids of all file systems sharing the log. e2fsprogs/Linux don't allow - ids of all file systems sharing the log. e2fsprogs/Linux don't allow
shared external journals, but I imagine Lustre (or ocfs2?), which use shared external journals, but I imagine Lustre (or ocfs2?), which use
the jbd2 code, might. the jbd2 code, might.
@ -286,7 +286,7 @@ The journal compat features are any combination of the following:
- Description - Description
* - 0x1 * - 0x1
- Journal maintains checksums on the data blocks. - Journal maintains checksums on the data blocks.
(JBD2\_FEATURE\_COMPAT\_CHECKSUM) (JBD2_FEATURE_COMPAT_CHECKSUM)
.. _jbd2_incompat: .. _jbd2_incompat:
@ -299,23 +299,23 @@ The journal incompat features are any combination of the following:
* - Value * - Value
- Description - Description
* - 0x1 * - 0x1
- Journal has block revocation records. (JBD2\_FEATURE\_INCOMPAT\_REVOKE) - Journal has block revocation records. (JBD2_FEATURE_INCOMPAT_REVOKE)
* - 0x2 * - 0x2
- Journal can deal with 64-bit block numbers. - Journal can deal with 64-bit block numbers.
(JBD2\_FEATURE\_INCOMPAT\_64BIT) (JBD2_FEATURE_INCOMPAT_64BIT)
* - 0x4 * - 0x4
- Journal commits asynchronously. (JBD2\_FEATURE\_INCOMPAT\_ASYNC\_COMMIT) - Journal commits asynchronously. (JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
* - 0x8 * - 0x8
- This journal uses v2 of the checksum on-disk format. Each journal - This journal uses v2 of the checksum on-disk format. Each journal
metadata block gets its own checksum, and the block tags in the metadata block gets its own checksum, and the block tags in the
descriptor table contain checksums for each of the data blocks in the descriptor table contain checksums for each of the data blocks in the
journal. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2) journal. (JBD2_FEATURE_INCOMPAT_CSUM_V2)
* - 0x10 * - 0x10
- This journal uses v3 of the checksum on-disk format. This is the same as - This journal uses v3 of the checksum on-disk format. This is the same as
v2, but the journal block tag size is fixed regardless of the size of v2, but the journal block tag size is fixed regardless of the size of
block numbers. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3) block numbers. (JBD2_FEATURE_INCOMPAT_CSUM_V3)
* - 0x20 * - 0x20
- Journal has fast commit blocks. (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) - Journal has fast commit blocks. (JBD2_FEATURE_INCOMPAT_FAST_COMMIT)
.. _jbd2_checksum_type: .. _jbd2_checksum_type:
@ -355,11 +355,11 @@ Descriptor blocks consume at least 36 bytes, but use a full block:
- Name - Name
- Descriptor - Descriptor
* - 0x0 * - 0x0
- journal\_header\_t - journal_header_t
- (open coded) - (open coded)
- Common block header. - Common block header.
* - 0xC * - 0xC
- struct journal\_block\_tag\_s - struct journal_block_tag_s
- open coded array[] - open coded array[]
- Enough tags either to fill up the block or to describe all the data - Enough tags either to fill up the block or to describe all the data
blocks that follow this descriptor block. blocks that follow this descriptor block.
@ -367,7 +367,7 @@ Descriptor blocks consume at least 36 bytes, but use a full block:
Journal block tags have any of the following formats, depending on which Journal block tags have any of the following formats, depending on which
journal feature and block tag flags are set. journal feature and block tag flags are set.
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is set, the journal block tag is If JBD2_FEATURE_INCOMPAT_CSUM_V3 is set, the journal block tag is
defined as ``struct journal_block_tag3_s``, which looks like the defined as ``struct journal_block_tag3_s``, which looks like the
following. The size is 16 or 32 bytes. following. The size is 16 or 32 bytes.
@ -380,24 +380,24 @@ following. The size is 16 or 32 bytes.
- Name - Name
- Descriptor - Descriptor
* - 0x0 * - 0x0
- \_\_be32 - __be32
- t\_blocknr - t_blocknr
- Lower 32-bits of the location of where the corresponding data block - Lower 32-bits of the location of where the corresponding data block
should end up on disk. should end up on disk.
* - 0x4 * - 0x4
- \_\_be32 - __be32
- t\_flags - t_flags
- Flags that go with the descriptor. See the table jbd2_tag_flags_ for - Flags that go with the descriptor. See the table jbd2_tag_flags_ for
more info. more info.
* - 0x8 * - 0x8
- \_\_be32 - __be32
- t\_blocknr\_high - t_blocknr_high
- Upper 32-bits of the location of where the corresponding data block - Upper 32-bits of the location of where the corresponding data block
should end up on disk. This is zero if JBD2\_FEATURE\_INCOMPAT\_64BIT is should end up on disk. This is zero if JBD2_FEATURE_INCOMPAT_64BIT is
not enabled. not enabled.
* - 0xC * - 0xC
- \_\_be32 - __be32
- t\_checksum - t_checksum
- Checksum of the journal UUID, the sequence number, and the data block. - Checksum of the journal UUID, the sequence number, and the data block.
* - * -
- -
@ -433,7 +433,7 @@ The journal tag flags are any combination of the following:
* - 0x8 * - 0x8
- This is the last tag in this descriptor block. - This is the last tag in this descriptor block.
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is NOT set, the journal block tag If JBD2_FEATURE_INCOMPAT_CSUM_V3 is NOT set, the journal block tag
is defined as ``struct journal_block_tag_s``, which looks like the is defined as ``struct journal_block_tag_s``, which looks like the
following. The size is 8, 12, 24, or 28 bytes: following. The size is 8, 12, 24, or 28 bytes:
@ -446,18 +446,18 @@ following. The size is 8, 12, 24, or 28 bytes:
- Name - Name
- Descriptor - Descriptor
* - 0x0 * - 0x0
- \_\_be32 - __be32
- t\_blocknr - t_blocknr
- Lower 32-bits of the location of where the corresponding data block - Lower 32-bits of the location of where the corresponding data block
should end up on disk. should end up on disk.
* - 0x4 * - 0x4
- \_\_be16 - __be16
- t\_checksum - t_checksum
- Checksum of the journal UUID, the sequence number, and the data block. - Checksum of the journal UUID, the sequence number, and the data block.
Note that only the lower 16 bits are stored. Note that only the lower 16 bits are stored.
* - 0x6 * - 0x6
- \_\_be16 - __be16
- t\_flags - t_flags
- Flags that go with the descriptor. See the table jbd2_tag_flags_ for - Flags that go with the descriptor. See the table jbd2_tag_flags_ for
more info. more info.
* - * -
@ -466,8 +466,8 @@ following. The size is 8, 12, 24, or 28 bytes:
- This next field is only present if the super block indicates support for - This next field is only present if the super block indicates support for
64-bit block numbers. 64-bit block numbers.
* - 0x8 * - 0x8
- \_\_be32 - __be32
- t\_blocknr\_high - t_blocknr_high
- Upper 32-bits of the location of where the corresponding data block - Upper 32-bits of the location of where the corresponding data block
should end up on disk. should end up on disk.
* - * -
@ -483,8 +483,8 @@ following. The size is 8, 12, 24, or 28 bytes:
``j_uuid`` field in ``struct journal_s``, but only tune2fs touches that ``j_uuid`` field in ``struct journal_s``, but only tune2fs touches that
field. field.
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or If JBD2_FEATURE_INCOMPAT_CSUM_V2 or
JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the block is a JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the block is a
``struct jbd2_journal_block_tail``, which looks like this: ``struct jbd2_journal_block_tail``, which looks like this:
.. list-table:: .. list-table::
@ -496,8 +496,8 @@ JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the block is a
- Name - Name
- Descriptor - Descriptor
* - 0x0 * - 0x0
- \_\_be32 - __be32
- t\_checksum - t_checksum
- Checksum of the journal UUID + the descriptor block, with this field set - Checksum of the journal UUID + the descriptor block, with this field set
to zero. to zero.
@ -538,25 +538,25 @@ length, but use a full block:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- journal\_header\_t - journal_header_t
- r\_header - r_header
- Common block header. - Common block header.
* - 0xC * - 0xC
- \_\_be32 - __be32
- r\_count - r_count
- Number of bytes used in this block. - Number of bytes used in this block.
* - 0x10 * - 0x10
- \_\_be32 or \_\_be64 - __be32 or __be64
- blocks[0] - blocks[0]
- Blocks to revoke. - Blocks to revoke.
After r\_count is a linear array of block numbers that are effectively After r_count is a linear array of block numbers that are effectively
revoked by this transaction. The size of each block number is 8 bytes if revoked by this transaction. The size of each block number is 8 bytes if
the superblock advertises 64-bit block number support, or 4 bytes the superblock advertises 64-bit block number support, or 4 bytes
otherwise. otherwise.
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or If JBD2_FEATURE_INCOMPAT_CSUM_V2 or
JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the revocation JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the revocation
block is a ``struct jbd2_journal_revoke_tail``, which has this format: block is a ``struct jbd2_journal_revoke_tail``, which has this format:
.. list-table:: .. list-table::
@ -568,8 +568,8 @@ block is a ``struct jbd2_journal_revoke_tail``, which has this format:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_be32 - __be32
- r\_checksum - r_checksum
- Checksum of the journal UUID + revocation block - Checksum of the journal UUID + revocation block
Commit Block Commit Block
@ -592,38 +592,38 @@ bytes long (but uses a full block):
- Name - Name
- Descriptor - Descriptor
* - 0x0 * - 0x0
- journal\_header\_s - journal_header_s
- (open coded) - (open coded)
- Common block header. - Common block header.
* - 0xC * - 0xC
- unsigned char - unsigned char
- h\_chksum\_type - h_chksum_type
- The type of checksum to use to verify the integrity of the data blocks - The type of checksum to use to verify the integrity of the data blocks
in the transaction. See jbd2_checksum_type_ for more info. in the transaction. See jbd2_checksum_type_ for more info.
* - 0xD * - 0xD
- unsigned char - unsigned char
- h\_chksum\_size - h_chksum_size
- The number of bytes used by the checksum. Most likely 4. - The number of bytes used by the checksum. Most likely 4.
* - 0xE * - 0xE
- unsigned char - unsigned char
- h\_padding[2] - h_padding[2]
- -
* - 0x10 * - 0x10
- \_\_be32 - __be32
- h\_chksum[JBD2\_CHECKSUM\_BYTES] - h_chksum[JBD2_CHECKSUM_BYTES]
- 32 bytes of space to store checksums. If - 32 bytes of space to store checksums. If
JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 JBD2_FEATURE_INCOMPAT_CSUM_V2 or JBD2_FEATURE_INCOMPAT_CSUM_V3
are set, the first ``__be32`` is the checksum of the journal UUID and are set, the first ``__be32`` is the checksum of the journal UUID and
the entire commit block, with this field zeroed. If the entire commit block, with this field zeroed. If
JBD2\_FEATURE\_COMPAT\_CHECKSUM is set, the first ``__be32`` is the JBD2_FEATURE_COMPAT_CHECKSUM is set, the first ``__be32`` is the
crc32 of all the blocks already written to the transaction. crc32 of all the blocks already written to the transaction.
* - 0x30 * - 0x30
- \_\_be64 - __be64
- h\_commit\_sec - h_commit_sec
- The time that the transaction was committed, in seconds since the epoch. - The time that the transaction was committed, in seconds since the epoch.
* - 0x38 * - 0x38
- \_\_be32 - __be32
- h\_commit\_nsec - h_commit_nsec
- Nanoseconds component of the above timestamp. - Nanoseconds component of the above timestamp.
Fast commits Fast commits

View File

@ -7,8 +7,8 @@ Multiple mount protection (MMP) is a feature that protects the
filesystem against multiple hosts trying to use the filesystem filesystem against multiple hosts trying to use the filesystem
simultaneously. When a filesystem is opened (for mounting, or fsck, simultaneously. When a filesystem is opened (for mounting, or fsck,
etc.), the MMP code running on the node (call it node A) checks a etc.), the MMP code running on the node (call it node A) checks a
sequence number. If the sequence number is EXT4\_MMP\_SEQ\_CLEAN, the sequence number. If the sequence number is EXT4_MMP_SEQ_CLEAN, the
open continues. If the sequence number is EXT4\_MMP\_SEQ\_FSCK, then open continues. If the sequence number is EXT4_MMP_SEQ_FSCK, then
fsck is (hopefully) running, and open fails immediately. Otherwise, the fsck is (hopefully) running, and open fails immediately. Otherwise, the
open code will wait for twice the specified MMP check interval and check open code will wait for twice the specified MMP check interval and check
the sequence number again. If the sequence number has changed, then the the sequence number again. If the sequence number has changed, then the
@ -40,38 +40,38 @@ The MMP structure (``struct mmp_struct``) is as follows:
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- mmp\_magic - mmp_magic
- Magic number for MMP, 0x004D4D50 (“MMP”). - Magic number for MMP, 0x004D4D50 (“MMP”).
* - 0x4 * - 0x4
- \_\_le32 - __le32
- mmp\_seq - mmp_seq
- Sequence number, updated periodically. - Sequence number, updated periodically.
* - 0x8 * - 0x8
- \_\_le64 - __le64
- mmp\_time - mmp_time
- Time that the MMP block was last updated. - Time that the MMP block was last updated.
* - 0x10 * - 0x10
- char[64] - char[64]
- mmp\_nodename - mmp_nodename
- Hostname of the node that opened the filesystem. - Hostname of the node that opened the filesystem.
* - 0x50 * - 0x50
- char[32] - char[32]
- mmp\_bdevname - mmp_bdevname
- Block device name of the filesystem. - Block device name of the filesystem.
* - 0x70 * - 0x70
- \_\_le16 - __le16
- mmp\_check\_interval - mmp_check_interval
- The MMP re-check interval, in seconds. - The MMP re-check interval, in seconds.
* - 0x72 * - 0x72
- \_\_le16 - __le16
- mmp\_pad1 - mmp_pad1
- Zero. - Zero.
* - 0x74 * - 0x74
- \_\_le32[226] - __le32[226]
- mmp\_pad2 - mmp_pad2
- Zero. - Zero.
* - 0x3FC * - 0x3FC
- \_\_le32 - __le32
- mmp\_checksum - mmp_checksum
- Checksum of the MMP block. - Checksum of the MMP block.

View File

@ -7,7 +7,7 @@ An ext4 file system is split into a series of block groups. To reduce
performance difficulties due to fragmentation, the block allocator tries performance difficulties due to fragmentation, the block allocator tries
very hard to keep each file's blocks within the same group, thereby very hard to keep each file's blocks within the same group, thereby
reducing seek times. The size of a block group is specified in reducing seek times. The size of a block group is specified in
``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 \* ``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 *
``block_size_in_bytes``. With the default block size of 4KiB, each group ``block_size_in_bytes``. With the default block size of 4KiB, each group
will contain 32,768 blocks, for a length of 128MiB. The number of block will contain 32,768 blocks, for a length of 128MiB. The number of block
groups is the size of the device divided by the size of a block group. groups is the size of the device divided by the size of a block group.

View File

@ -34,7 +34,7 @@ ext4 reserves some inode for special features, as follows:
* - 10 * - 10
- Replica inode, used for some non-upstream feature? - Replica inode, used for some non-upstream feature?
* - 11 * - 11
- Traditional first non-reserved inode. Usually this is the lost+found directory. See s\_first\_ino in the superblock. - Traditional first non-reserved inode. Usually this is the lost+found directory. See s_first_ino in the superblock.
Note that there are also some inodes allocated from non-reserved inode numbers Note that there are also some inodes allocated from non-reserved inode numbers
for other filesystem features which are not referenced from standard directory for other filesystem features which are not referenced from standard directory
@ -47,9 +47,9 @@ hierarchy. These are generally reference from the superblock. They are:
* - Superblock field * - Superblock field
- Description - Description
* - s\_lpf\_ino * - s_lpf_ino
- Inode number of lost+found directory. - Inode number of lost+found directory.
* - s\_prj\_quota\_inum * - s_prj_quota_inum
- Inode number of quota file tracking project quotas - Inode number of quota file tracking project quotas
* - s\_orphan\_file\_inum * - s_orphan_file_inum
- Inode number of file tracking orphan inodes. - Inode number of file tracking orphan inodes.

View File

@ -7,7 +7,7 @@ The superblock records various information about the enclosing
filesystem, such as block counts, inode counts, supported features, filesystem, such as block counts, inode counts, supported features,
maintenance information, and more. maintenance information, and more.
If the sparse\_super feature flag is set, redundant copies of the If the sparse_super feature flag is set, redundant copies of the
superblock and group descriptors are kept only in the groups whose group superblock and group descriptors are kept only in the groups whose group
number is either 0 or a power of 3, 5, or 7. If the flag is not set, number is either 0 or a power of 3, 5, or 7. If the flag is not set,
redundant copies are kept in all groups. redundant copies are kept in all groups.
@ -27,107 +27,107 @@ The ext4 superblock is laid out as follows in
- Name - Name
- Description - Description
* - 0x0 * - 0x0
- \_\_le32 - __le32
- s\_inodes\_count - s_inodes_count
- Total inode count. - Total inode count.
* - 0x4 * - 0x4
- \_\_le32 - __le32
- s\_blocks\_count\_lo - s_blocks_count_lo
- Total block count. - Total block count.
* - 0x8 * - 0x8
- \_\_le32 - __le32
- s\_r\_blocks\_count\_lo - s_r_blocks_count_lo
- This number of blocks can only be allocated by the super-user. - This number of blocks can only be allocated by the super-user.
* - 0xC * - 0xC
- \_\_le32 - __le32
- s\_free\_blocks\_count\_lo - s_free_blocks_count_lo
- Free block count. - Free block count.
* - 0x10 * - 0x10
- \_\_le32 - __le32
- s\_free\_inodes\_count - s_free_inodes_count
- Free inode count. - Free inode count.
* - 0x14 * - 0x14
- \_\_le32 - __le32
- s\_first\_data\_block - s_first_data_block
- First data block. This must be at least 1 for 1k-block filesystems and - First data block. This must be at least 1 for 1k-block filesystems and
is typically 0 for all other block sizes. is typically 0 for all other block sizes.
* - 0x18 * - 0x18
- \_\_le32 - __le32
- s\_log\_block\_size - s_log_block_size
- Block size is 2 ^ (10 + s\_log\_block\_size). - Block size is 2 ^ (10 + s_log_block_size).
* - 0x1C * - 0x1C
- \_\_le32 - __le32
- s\_log\_cluster\_size - s_log_cluster_size
- Cluster size is 2 ^ (10 + s\_log\_cluster\_size) blocks if bigalloc is - Cluster size is 2 ^ (10 + s_log_cluster_size) blocks if bigalloc is
enabled. Otherwise s\_log\_cluster\_size must equal s\_log\_block\_size. enabled. Otherwise s_log_cluster_size must equal s_log_block_size.
* - 0x20 * - 0x20
- \_\_le32 - __le32
- s\_blocks\_per\_group - s_blocks_per_group
- Blocks per group. - Blocks per group.
* - 0x24 * - 0x24
- \_\_le32 - __le32
- s\_clusters\_per\_group - s_clusters_per_group
- Clusters per group, if bigalloc is enabled. Otherwise - Clusters per group, if bigalloc is enabled. Otherwise
s\_clusters\_per\_group must equal s\_blocks\_per\_group. s_clusters_per_group must equal s_blocks_per_group.
* - 0x28 * - 0x28
- \_\_le32 - __le32
- s\_inodes\_per\_group - s_inodes_per_group
- Inodes per group. - Inodes per group.
* - 0x2C * - 0x2C
- \_\_le32 - __le32
- s\_mtime - s_mtime
- Mount time, in seconds since the epoch. - Mount time, in seconds since the epoch.
* - 0x30 * - 0x30
- \_\_le32 - __le32
- s\_wtime - s_wtime
- Write time, in seconds since the epoch. - Write time, in seconds since the epoch.
* - 0x34 * - 0x34
- \_\_le16 - __le16
- s\_mnt\_count - s_mnt_count
- Number of mounts since the last fsck. - Number of mounts since the last fsck.
* - 0x36 * - 0x36
- \_\_le16 - __le16
- s\_max\_mnt\_count - s_max_mnt_count
- Number of mounts beyond which a fsck is needed. - Number of mounts beyond which a fsck is needed.
* - 0x38 * - 0x38
- \_\_le16 - __le16
- s\_magic - s_magic
- Magic signature, 0xEF53 - Magic signature, 0xEF53
* - 0x3A * - 0x3A
- \_\_le16 - __le16
- s\_state - s_state
- File system state. See super_state_ for more info. - File system state. See super_state_ for more info.
* - 0x3C * - 0x3C
- \_\_le16 - __le16
- s\_errors - s_errors
- Behaviour when detecting errors. See super_errors_ for more info. - Behaviour when detecting errors. See super_errors_ for more info.
* - 0x3E * - 0x3E
- \_\_le16 - __le16
- s\_minor\_rev\_level - s_minor_rev_level
- Minor revision level. - Minor revision level.
* - 0x40 * - 0x40
- \_\_le32 - __le32
- s\_lastcheck - s_lastcheck
- Time of last check, in seconds since the epoch. - Time of last check, in seconds since the epoch.
* - 0x44 * - 0x44
- \_\_le32 - __le32
- s\_checkinterval - s_checkinterval
- Maximum time between checks, in seconds. - Maximum time between checks, in seconds.
* - 0x48 * - 0x48
- \_\_le32 - __le32
- s\_creator\_os - s_creator_os
- Creator OS. See the table super_creator_ for more info. - Creator OS. See the table super_creator_ for more info.
* - 0x4C * - 0x4C
- \_\_le32 - __le32
- s\_rev\_level - s_rev_level
- Revision level. See the table super_revision_ for more info. - Revision level. See the table super_revision_ for more info.
* - 0x50 * - 0x50
- \_\_le16 - __le16
- s\_def\_resuid - s_def_resuid
- Default uid for reserved blocks. - Default uid for reserved blocks.
* - 0x52 * - 0x52
- \_\_le16 - __le16
- s\_def\_resgid - s_def_resgid
- Default gid for reserved blocks. - Default gid for reserved blocks.
* - * -
- -
@ -143,50 +143,50 @@ The ext4 superblock is laid out as follows in
about a feature in either the compatible or incompatible feature set, it about a feature in either the compatible or incompatible feature set, it
must abort and not try to meddle with things it doesn't understand... must abort and not try to meddle with things it doesn't understand...
* - 0x54 * - 0x54
- \_\_le32 - __le32
- s\_first\_ino - s_first_ino
- First non-reserved inode. - First non-reserved inode.
* - 0x58 * - 0x58
- \_\_le16 - __le16
- s\_inode\_size - s_inode_size
- Size of inode structure, in bytes. - Size of inode structure, in bytes.
* - 0x5A * - 0x5A
- \_\_le16 - __le16
- s\_block\_group\_nr - s_block_group_nr
- Block group # of this superblock. - Block group # of this superblock.
* - 0x5C * - 0x5C
- \_\_le32 - __le32
- s\_feature\_compat - s_feature_compat
- Compatible feature set flags. Kernel can still read/write this fs even - Compatible feature set flags. Kernel can still read/write this fs even
if it doesn't understand a flag; fsck should not do that. See the if it doesn't understand a flag; fsck should not do that. See the
super_compat_ table for more info. super_compat_ table for more info.
* - 0x60 * - 0x60
- \_\_le32 - __le32
- s\_feature\_incompat - s_feature_incompat
- Incompatible feature set. If the kernel or fsck doesn't understand one - Incompatible feature set. If the kernel or fsck doesn't understand one
of these bits, it should stop. See the super_incompat_ table for more of these bits, it should stop. See the super_incompat_ table for more
info. info.
* - 0x64 * - 0x64
- \_\_le32 - __le32
- s\_feature\_ro\_compat - s_feature_ro_compat
- Readonly-compatible feature set. If the kernel doesn't understand one of - Readonly-compatible feature set. If the kernel doesn't understand one of
these bits, it can still mount read-only. See the super_rocompat_ table these bits, it can still mount read-only. See the super_rocompat_ table
for more info. for more info.
* - 0x68 * - 0x68
- \_\_u8 - __u8
- s\_uuid[16] - s_uuid[16]
- 128-bit UUID for volume. - 128-bit UUID for volume.
* - 0x78 * - 0x78
- char - char
- s\_volume\_name[16] - s_volume_name[16]
- Volume label. - Volume label.
* - 0x88 * - 0x88
- char - char
- s\_last\_mounted[64] - s_last_mounted[64]
- Directory where filesystem was last mounted. - Directory where filesystem was last mounted.
* - 0xC8 * - 0xC8
- \_\_le32 - __le32
- s\_algorithm\_usage\_bitmap - s_algorithm_usage_bitmap
- For compression (Not used in e2fsprogs/Linux) - For compression (Not used in e2fsprogs/Linux)
* - * -
- -
@ -194,18 +194,18 @@ The ext4 superblock is laid out as follows in
- Performance hints. Directory preallocation should only happen if the - Performance hints. Directory preallocation should only happen if the
EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
* - 0xCC * - 0xCC
- \_\_u8 - __u8
- s\_prealloc\_blocks - s_prealloc_blocks
- #. of blocks to try to preallocate for ... files? (Not used in - #. of blocks to try to preallocate for ... files? (Not used in
e2fsprogs/Linux) e2fsprogs/Linux)
* - 0xCD * - 0xCD
- \_\_u8 - __u8
- s\_prealloc\_dir\_blocks - s_prealloc_dir_blocks
- #. of blocks to preallocate for directories. (Not used in - #. of blocks to preallocate for directories. (Not used in
e2fsprogs/Linux) e2fsprogs/Linux)
* - 0xCE * - 0xCE
- \_\_le16 - __le16
- s\_reserved\_gdt\_blocks - s_reserved_gdt_blocks
- Number of reserved GDT entries for future filesystem expansion. - Number of reserved GDT entries for future filesystem expansion.
* - * -
- -
@ -213,281 +213,281 @@ The ext4 superblock is laid out as follows in
- Journalling support is valid only if EXT4_FEATURE_COMPAT_HAS_JOURNAL is - Journalling support is valid only if EXT4_FEATURE_COMPAT_HAS_JOURNAL is
set. set.
* - 0xD0 * - 0xD0
- \_\_u8 - __u8
- s\_journal\_uuid[16] - s_journal_uuid[16]
- UUID of journal superblock - UUID of journal superblock
* - 0xE0 * - 0xE0
- \_\_le32 - __le32
- s\_journal\_inum - s_journal_inum
- inode number of journal file. - inode number of journal file.
* - 0xE4 * - 0xE4
- \_\_le32 - __le32
- s\_journal\_dev - s_journal_dev
- Device number of journal file, if the external journal feature flag is - Device number of journal file, if the external journal feature flag is
set. set.
* - 0xE8 * - 0xE8
- \_\_le32 - __le32
- s\_last\_orphan - s_last_orphan
- Start of list of orphaned inodes to delete. - Start of list of orphaned inodes to delete.
* - 0xEC * - 0xEC
- \_\_le32 - __le32
- s\_hash\_seed[4] - s_hash_seed[4]
- HTREE hash seed. - HTREE hash seed.
* - 0xFC * - 0xFC
- \_\_u8 - __u8
- s\_def\_hash\_version - s_def_hash_version
- Default hash algorithm to use for directory hashes. See super_def_hash_ - Default hash algorithm to use for directory hashes. See super_def_hash_
for more info. for more info.
* - 0xFD * - 0xFD
- \_\_u8 - __u8
- s\_jnl\_backup\_type - s_jnl_backup_type
- If this value is 0 or EXT3\_JNL\_BACKUP\_BLOCKS (1), then the - If this value is 0 or EXT3_JNL_BACKUP_BLOCKS (1), then the
``s_jnl_blocks`` field contains a duplicate copy of the inode's ``s_jnl_blocks`` field contains a duplicate copy of the inode's
``i_block[]`` array and ``i_size``. ``i_block[]`` array and ``i_size``.
* - 0xFE * - 0xFE
- \_\_le16 - __le16
- s\_desc\_size - s_desc_size
- Size of group descriptors, in bytes, if the 64bit incompat feature flag - Size of group descriptors, in bytes, if the 64bit incompat feature flag
is set. is set.
* - 0x100 * - 0x100
- \_\_le32 - __le32
- s\_default\_mount\_opts - s_default_mount_opts
- Default mount options. See the super_mountopts_ table for more info. - Default mount options. See the super_mountopts_ table for more info.
* - 0x104 * - 0x104
- \_\_le32 - __le32
- s\_first\_meta\_bg - s_first_meta_bg
- First metablock block group, if the meta\_bg feature is enabled. - First metablock block group, if the meta_bg feature is enabled.
* - 0x108 * - 0x108
- \_\_le32 - __le32
- s\_mkfs\_time - s_mkfs_time
- When the filesystem was created, in seconds since the epoch. - When the filesystem was created, in seconds since the epoch.
* - 0x10C * - 0x10C
- \_\_le32 - __le32
- s\_jnl\_blocks[17] - s_jnl_blocks[17]
- Backup copy of the journal inode's ``i_block[]`` array in the first 15 - Backup copy of the journal inode's ``i_block[]`` array in the first 15
elements and i\_size\_high and i\_size in the 16th and 17th elements, elements and i_size_high and i_size in the 16th and 17th elements,
respectively. respectively.
* - * -
- -
- -
- 64bit support is valid only if EXT4_FEATURE_COMPAT_64BIT is set. - 64bit support is valid only if EXT4_FEATURE_COMPAT_64BIT is set.
* - 0x150 * - 0x150
- \_\_le32 - __le32
- s\_blocks\_count\_hi - s_blocks_count_hi
- High 32-bits of the block count. - High 32-bits of the block count.
* - 0x154 * - 0x154
- \_\_le32 - __le32
- s\_r\_blocks\_count\_hi - s_r_blocks_count_hi
- High 32-bits of the reserved block count. - High 32-bits of the reserved block count.
* - 0x158 * - 0x158
- \_\_le32 - __le32
- s\_free\_blocks\_count\_hi - s_free_blocks_count_hi
- High 32-bits of the free block count. - High 32-bits of the free block count.
* - 0x15C * - 0x15C
- \_\_le16 - __le16
- s\_min\_extra\_isize - s_min_extra_isize
- All inodes have at least # bytes. - All inodes have at least # bytes.
* - 0x15E * - 0x15E
- \_\_le16 - __le16
- s\_want\_extra\_isize - s_want_extra_isize
- New inodes should reserve # bytes. - New inodes should reserve # bytes.
* - 0x160 * - 0x160
- \_\_le32 - __le32
- s\_flags - s_flags
- Miscellaneous flags. See the super_flags_ table for more info. - Miscellaneous flags. See the super_flags_ table for more info.
* - 0x164 * - 0x164
- \_\_le16 - __le16
- s\_raid\_stride - s_raid_stride
- RAID stride. This is the number of logical blocks read from or written - RAID stride. This is the number of logical blocks read from or written
to the disk before moving to the next disk. This affects the placement to the disk before moving to the next disk. This affects the placement
of filesystem metadata, which will hopefully make RAID storage faster. of filesystem metadata, which will hopefully make RAID storage faster.
* - 0x166 * - 0x166
- \_\_le16 - __le16
- s\_mmp\_interval - s_mmp_interval
- #. seconds to wait in multi-mount prevention (MMP) checking. In theory, - #. seconds to wait in multi-mount prevention (MMP) checking. In theory,
MMP is a mechanism to record in the superblock which host and device MMP is a mechanism to record in the superblock which host and device
have mounted the filesystem, in order to prevent multiple mounts. This have mounted the filesystem, in order to prevent multiple mounts. This
feature does not seem to be implemented... feature does not seem to be implemented...
* - 0x168 * - 0x168
- \_\_le64 - __le64
- s\_mmp\_block - s_mmp_block
- Block # for multi-mount protection data. - Block # for multi-mount protection data.
* - 0x170 * - 0x170
- \_\_le32 - __le32
- s\_raid\_stripe\_width - s_raid_stripe_width
- RAID stripe width. This is the number of logical blocks read from or - RAID stripe width. This is the number of logical blocks read from or
written to the disk before coming back to the current disk. This is used written to the disk before coming back to the current disk. This is used
by the block allocator to try to reduce the number of read-modify-write by the block allocator to try to reduce the number of read-modify-write
operations in a RAID5/6. operations in a RAID5/6.
* - 0x174 * - 0x174
- \_\_u8 - __u8
- s\_log\_groups\_per\_flex - s_log_groups_per_flex
- Size of a flexible block group is 2 ^ ``s_log_groups_per_flex``. - Size of a flexible block group is 2 ^ ``s_log_groups_per_flex``.
* - 0x175 * - 0x175
- \_\_u8 - __u8
- s\_checksum\_type - s_checksum_type
- Metadata checksum algorithm type. The only valid value is 1 (crc32c). - Metadata checksum algorithm type. The only valid value is 1 (crc32c).
* - 0x176 * - 0x176
- \_\_le16 - __le16
- s\_reserved\_pad - s_reserved_pad
- -
* - 0x178 * - 0x178
- \_\_le64 - __le64
- s\_kbytes\_written - s_kbytes_written
- Number of KiB written to this filesystem over its lifetime. - Number of KiB written to this filesystem over its lifetime.
* - 0x180 * - 0x180
- \_\_le32 - __le32
- s\_snapshot\_inum - s_snapshot_inum
- inode number of active snapshot. (Not used in e2fsprogs/Linux.) - inode number of active snapshot. (Not used in e2fsprogs/Linux.)
* - 0x184 * - 0x184
- \_\_le32 - __le32
- s\_snapshot\_id - s_snapshot_id
- Sequential ID of active snapshot. (Not used in e2fsprogs/Linux.) - Sequential ID of active snapshot. (Not used in e2fsprogs/Linux.)
* - 0x188 * - 0x188
- \_\_le64 - __le64
- s\_snapshot\_r\_blocks\_count - s_snapshot_r_blocks_count
- Number of blocks reserved for active snapshot's future use. (Not used in - Number of blocks reserved for active snapshot's future use. (Not used in
e2fsprogs/Linux.) e2fsprogs/Linux.)
* - 0x190 * - 0x190
- \_\_le32 - __le32
- s\_snapshot\_list - s_snapshot_list
- inode number of the head of the on-disk snapshot list. (Not used in - inode number of the head of the on-disk snapshot list. (Not used in
e2fsprogs/Linux.) e2fsprogs/Linux.)
* - 0x194 * - 0x194
- \_\_le32 - __le32
- s\_error\_count - s_error_count
- Number of errors seen. - Number of errors seen.
* - 0x198 * - 0x198
- \_\_le32 - __le32
- s\_first\_error\_time - s_first_error_time
- First time an error happened, in seconds since the epoch. - First time an error happened, in seconds since the epoch.
* - 0x19C * - 0x19C
- \_\_le32 - __le32
- s\_first\_error\_ino - s_first_error_ino
- inode involved in first error. - inode involved in first error.
* - 0x1A0 * - 0x1A0
- \_\_le64 - __le64
- s\_first\_error\_block - s_first_error_block
- Number of block involved of first error. - Number of block involved of first error.
* - 0x1A8 * - 0x1A8
- \_\_u8 - __u8
- s\_first\_error\_func[32] - s_first_error_func[32]
- Name of function where the error happened. - Name of function where the error happened.
* - 0x1C8 * - 0x1C8
- \_\_le32 - __le32
- s\_first\_error\_line - s_first_error_line
- Line number where error happened. - Line number where error happened.
* - 0x1CC * - 0x1CC
- \_\_le32 - __le32
- s\_last\_error\_time - s_last_error_time
- Time of most recent error, in seconds since the epoch. - Time of most recent error, in seconds since the epoch.
* - 0x1D0 * - 0x1D0
- \_\_le32 - __le32
- s\_last\_error\_ino - s_last_error_ino
- inode involved in most recent error. - inode involved in most recent error.
* - 0x1D4 * - 0x1D4
- \_\_le32 - __le32
- s\_last\_error\_line - s_last_error_line
- Line number where most recent error happened. - Line number where most recent error happened.
* - 0x1D8 * - 0x1D8
- \_\_le64 - __le64
- s\_last\_error\_block - s_last_error_block
- Number of block involved in most recent error. - Number of block involved in most recent error.
* - 0x1E0 * - 0x1E0
- \_\_u8 - __u8
- s\_last\_error\_func[32] - s_last_error_func[32]
- Name of function where the most recent error happened. - Name of function where the most recent error happened.
* - 0x200 * - 0x200
- \_\_u8 - __u8
- s\_mount\_opts[64] - s_mount_opts[64]
- ASCIIZ string of mount options. - ASCIIZ string of mount options.
* - 0x240 * - 0x240
- \_\_le32 - __le32
- s\_usr\_quota\_inum - s_usr_quota_inum
- Inode number of user `quota <quota>`__ file. - Inode number of user `quota <quota>`__ file.
* - 0x244 * - 0x244
- \_\_le32 - __le32
- s\_grp\_quota\_inum - s_grp_quota_inum
- Inode number of group `quota <quota>`__ file. - Inode number of group `quota <quota>`__ file.
* - 0x248 * - 0x248
- \_\_le32 - __le32
- s\_overhead\_blocks - s_overhead_blocks
- Overhead blocks/clusters in fs. (Huh? This field is always zero, which - Overhead blocks/clusters in fs. (Huh? This field is always zero, which
means that the kernel calculates it dynamically.) means that the kernel calculates it dynamically.)
* - 0x24C * - 0x24C
- \_\_le32 - __le32
- s\_backup\_bgs[2] - s_backup_bgs[2]
- Block groups containing superblock backups (if sparse\_super2) - Block groups containing superblock backups (if sparse_super2)
* - 0x254 * - 0x254
- \_\_u8 - __u8
- s\_encrypt\_algos[4] - s_encrypt_algos[4]
- Encryption algorithms in use. There can be up to four algorithms in use - Encryption algorithms in use. There can be up to four algorithms in use
at any time; valid algorithm codes are given in the super_encrypt_ table at any time; valid algorithm codes are given in the super_encrypt_ table
below. below.
* - 0x258 * - 0x258
- \_\_u8 - __u8
- s\_encrypt\_pw\_salt[16] - s_encrypt_pw_salt[16]
- Salt for the string2key algorithm for encryption. - Salt for the string2key algorithm for encryption.
* - 0x268 * - 0x268
- \_\_le32 - __le32
- s\_lpf\_ino - s_lpf_ino
- Inode number of lost+found - Inode number of lost+found
* - 0x26C * - 0x26C
- \_\_le32 - __le32
- s\_prj\_quota\_inum - s_prj_quota_inum
- Inode that tracks project quotas. - Inode that tracks project quotas.
* - 0x270 * - 0x270
- \_\_le32 - __le32
- s\_checksum\_seed - s_checksum_seed
- Checksum seed used for metadata\_csum calculations. This value is - Checksum seed used for metadata_csum calculations. This value is
crc32c(~0, $orig\_fs\_uuid). crc32c(~0, $orig_fs_uuid).
* - 0x274 * - 0x274
- \_\_u8 - __u8
- s\_wtime_hi - s_wtime_hi
- Upper 8 bits of the s_wtime field. - Upper 8 bits of the s_wtime field.
* - 0x275 * - 0x275
- \_\_u8 - __u8
- s\_mtime_hi - s_mtime_hi
- Upper 8 bits of the s_mtime field. - Upper 8 bits of the s_mtime field.
* - 0x276 * - 0x276
- \_\_u8 - __u8
- s\_mkfs_time_hi - s_mkfs_time_hi
- Upper 8 bits of the s_mkfs_time field. - Upper 8 bits of the s_mkfs_time field.
* - 0x277 * - 0x277
- \_\_u8 - __u8
- s\_lastcheck_hi - s_lastcheck_hi
- Upper 8 bits of the s_lastcheck_hi field. - Upper 8 bits of the s_lastcheck_hi field.
* - 0x278 * - 0x278
- \_\_u8 - __u8
- s\_first_error_time_hi - s_first_error_time_hi
- Upper 8 bits of the s_first_error_time_hi field. - Upper 8 bits of the s_first_error_time_hi field.
* - 0x279 * - 0x279
- \_\_u8 - __u8
- s\_last_error_time_hi - s_last_error_time_hi
- Upper 8 bits of the s_last_error_time_hi field. - Upper 8 bits of the s_last_error_time_hi field.
* - 0x27A * - 0x27A
- \_\_u8 - __u8
- s\_pad[2] - s_pad[2]
- Zero padding. - Zero padding.
* - 0x27C * - 0x27C
- \_\_le16 - __le16
- s\_encoding - s_encoding
- Filename charset encoding. - Filename charset encoding.
* - 0x27E * - 0x27E
- \_\_le16 - __le16
- s\_encoding_flags - s_encoding_flags
- Filename charset encoding flags. - Filename charset encoding flags.
* - 0x280 * - 0x280
- \_\_le32 - __le32
- s\_orphan\_file\_inum - s_orphan_file_inum
- Orphan file inode number. - Orphan file inode number.
* - 0x284 * - 0x284
- \_\_le32 - __le32
- s\_reserved[94] - s_reserved[94]
- Padding to the end of the block. - Padding to the end of the block.
* - 0x3FC * - 0x3FC
- \_\_le32 - __le32
- s\_checksum - s_checksum
- Superblock checksum. - Superblock checksum.
.. _super_state: .. _super_state:
@ -574,44 +574,44 @@ following:
* - Value * - Value
- Description - Description
* - 0x1 * - 0x1
- Directory preallocation (COMPAT\_DIR\_PREALLOC). - Directory preallocation (COMPAT_DIR_PREALLOC).
* - 0x2 * - 0x2
- “imagic inodes”. Not clear from the code what this does - “imagic inodes”. Not clear from the code what this does
(COMPAT\_IMAGIC\_INODES). (COMPAT_IMAGIC_INODES).
* - 0x4 * - 0x4
- Has a journal (COMPAT\_HAS\_JOURNAL). - Has a journal (COMPAT_HAS_JOURNAL).
* - 0x8 * - 0x8
- Supports extended attributes (COMPAT\_EXT\_ATTR). - Supports extended attributes (COMPAT_EXT_ATTR).
* - 0x10 * - 0x10
- Has reserved GDT blocks for filesystem expansion - Has reserved GDT blocks for filesystem expansion
(COMPAT\_RESIZE\_INODE). Requires RO\_COMPAT\_SPARSE\_SUPER. (COMPAT_RESIZE_INODE). Requires RO_COMPAT_SPARSE_SUPER.
* - 0x20 * - 0x20
- Has directory indices (COMPAT\_DIR\_INDEX). - Has directory indices (COMPAT_DIR_INDEX).
* - 0x40 * - 0x40
- “Lazy BG”. Not in Linux kernel, seems to have been for uninitialized - “Lazy BG”. Not in Linux kernel, seems to have been for uninitialized
block groups? (COMPAT\_LAZY\_BG) block groups? (COMPAT_LAZY_BG)
* - 0x80 * - 0x80
- “Exclude inode”. Not used. (COMPAT\_EXCLUDE\_INODE). - “Exclude inode”. Not used. (COMPAT_EXCLUDE_INODE).
* - 0x100 * - 0x100
- “Exclude bitmap”. Seems to be used to indicate the presence of - “Exclude bitmap”. Seems to be used to indicate the presence of
snapshot-related exclude bitmaps? Not defined in kernel or used in snapshot-related exclude bitmaps? Not defined in kernel or used in
e2fsprogs (COMPAT\_EXCLUDE\_BITMAP). e2fsprogs (COMPAT_EXCLUDE_BITMAP).
* - 0x200 * - 0x200
- Sparse Super Block, v2. If this flag is set, the SB field s\_backup\_bgs - Sparse Super Block, v2. If this flag is set, the SB field s_backup_bgs
points to the two block groups that contain backup superblocks points to the two block groups that contain backup superblocks
(COMPAT\_SPARSE\_SUPER2). (COMPAT_SPARSE_SUPER2).
* - 0x400 * - 0x400
- Fast commits supported. Although fast commits blocks are - Fast commits supported. Although fast commits blocks are
backward incompatible, fast commit blocks are not always backward incompatible, fast commit blocks are not always
present in the journal. If fast commit blocks are present in present in the journal. If fast commit blocks are present in
the journal, JBD2 incompat feature the journal, JBD2 incompat feature
(JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) gets (JBD2_FEATURE_INCOMPAT_FAST_COMMIT) gets
set (COMPAT\_FAST\_COMMIT). set (COMPAT_FAST_COMMIT).
* - 0x1000 * - 0x1000
- Orphan file allocated. This is the special file for more efficient - Orphan file allocated. This is the special file for more efficient
tracking of unlinked but still open inodes. When there may be any tracking of unlinked but still open inodes. When there may be any
entries in the file, we additionally set proper rocompat feature entries in the file, we additionally set proper rocompat feature
(RO\_COMPAT\_ORPHAN\_PRESENT). (RO_COMPAT_ORPHAN_PRESENT).
.. _super_incompat: .. _super_incompat:
@ -625,45 +625,45 @@ following:
* - Value * - Value
- Description - Description
* - 0x1 * - 0x1
- Compression (INCOMPAT\_COMPRESSION). - Compression (INCOMPAT_COMPRESSION).
* - 0x2 * - 0x2
- Directory entries record the file type. See ext4\_dir\_entry\_2 below - Directory entries record the file type. See ext4_dir_entry_2 below
(INCOMPAT\_FILETYPE). (INCOMPAT_FILETYPE).
* - 0x4 * - 0x4
- Filesystem needs recovery (INCOMPAT\_RECOVER). - Filesystem needs recovery (INCOMPAT_RECOVER).
* - 0x8 * - 0x8
- Filesystem has a separate journal device (INCOMPAT\_JOURNAL\_DEV). - Filesystem has a separate journal device (INCOMPAT_JOURNAL_DEV).
* - 0x10 * - 0x10
- Meta block groups. See the earlier discussion of this feature - Meta block groups. See the earlier discussion of this feature
(INCOMPAT\_META\_BG). (INCOMPAT_META_BG).
* - 0x40 * - 0x40
- Files in this filesystem use extents (INCOMPAT\_EXTENTS). - Files in this filesystem use extents (INCOMPAT_EXTENTS).
* - 0x80 * - 0x80
- Enable a filesystem size of 2^64 blocks (INCOMPAT\_64BIT). - Enable a filesystem size of 2^64 blocks (INCOMPAT_64BIT).
* - 0x100 * - 0x100
- Multiple mount protection (INCOMPAT\_MMP). - Multiple mount protection (INCOMPAT_MMP).
* - 0x200 * - 0x200
- Flexible block groups. See the earlier discussion of this feature - Flexible block groups. See the earlier discussion of this feature
(INCOMPAT\_FLEX\_BG). (INCOMPAT_FLEX_BG).
* - 0x400 * - 0x400
- Inodes can be used to store large extended attribute values - Inodes can be used to store large extended attribute values
(INCOMPAT\_EA\_INODE). (INCOMPAT_EA_INODE).
* - 0x1000 * - 0x1000
- Data in directory entry (INCOMPAT\_DIRDATA). (Not implemented?) - Data in directory entry (INCOMPAT_DIRDATA). (Not implemented?)
* - 0x2000 * - 0x2000
- Metadata checksum seed is stored in the superblock. This feature enables - Metadata checksum seed is stored in the superblock. This feature enables
the administrator to change the UUID of a metadata\_csum filesystem the administrator to change the UUID of a metadata_csum filesystem
while the filesystem is mounted; without it, the checksum definition while the filesystem is mounted; without it, the checksum definition
requires all metadata blocks to be rewritten (INCOMPAT\_CSUM\_SEED). requires all metadata blocks to be rewritten (INCOMPAT_CSUM_SEED).
* - 0x4000 * - 0x4000
- Large directory >2GB or 3-level htree (INCOMPAT\_LARGEDIR). Prior to - Large directory >2GB or 3-level htree (INCOMPAT_LARGEDIR). Prior to
this feature, directories could not be larger than 4GiB and could not this feature, directories could not be larger than 4GiB and could not
have an htree more than 2 levels deep. If this feature is enabled, have an htree more than 2 levels deep. If this feature is enabled,
directories can be larger than 4GiB and have a maximum htree depth of 3. directories can be larger than 4GiB and have a maximum htree depth of 3.
* - 0x8000 * - 0x8000
- Data in inode (INCOMPAT\_INLINE\_DATA). - Data in inode (INCOMPAT_INLINE_DATA).
* - 0x10000 * - 0x10000
- Encrypted inodes are present on the filesystem. (INCOMPAT\_ENCRYPT). - Encrypted inodes are present on the filesystem. (INCOMPAT_ENCRYPT).
.. _super_rocompat: .. _super_rocompat:
@ -678,54 +678,54 @@ the following:
- Description - Description
* - 0x1 * - 0x1
- Sparse superblocks. See the earlier discussion of this feature - Sparse superblocks. See the earlier discussion of this feature
(RO\_COMPAT\_SPARSE\_SUPER). (RO_COMPAT_SPARSE_SUPER).
* - 0x2 * - 0x2
- This filesystem has been used to store a file greater than 2GiB - This filesystem has been used to store a file greater than 2GiB
(RO\_COMPAT\_LARGE\_FILE). (RO_COMPAT_LARGE_FILE).
* - 0x4 * - 0x4
- Not used in kernel or e2fsprogs (RO\_COMPAT\_BTREE\_DIR). - Not used in kernel or e2fsprogs (RO_COMPAT_BTREE_DIR).
* - 0x8 * - 0x8
- This filesystem has files whose sizes are represented in units of - This filesystem has files whose sizes are represented in units of
logical blocks, not 512-byte sectors. This implies a very large file logical blocks, not 512-byte sectors. This implies a very large file
indeed! (RO\_COMPAT\_HUGE\_FILE) indeed! (RO_COMPAT_HUGE_FILE)
* - 0x10 * - 0x10
- Group descriptors have checksums. In addition to detecting corruption, - Group descriptors have checksums. In addition to detecting corruption,
this is useful for lazy formatting with uninitialized groups this is useful for lazy formatting with uninitialized groups
(RO\_COMPAT\_GDT\_CSUM). (RO_COMPAT_GDT_CSUM).
* - 0x20 * - 0x20
- Indicates that the old ext3 32,000 subdirectory limit no longer applies - Indicates that the old ext3 32,000 subdirectory limit no longer applies
(RO\_COMPAT\_DIR\_NLINK). A directory's i\_links\_count will be set to 1 (RO_COMPAT_DIR_NLINK). A directory's i_links_count will be set to 1
if it is incremented past 64,999. if it is incremented past 64,999.
* - 0x40 * - 0x40
- Indicates that large inodes exist on this filesystem - Indicates that large inodes exist on this filesystem
(RO\_COMPAT\_EXTRA\_ISIZE). (RO_COMPAT_EXTRA_ISIZE).
* - 0x80 * - 0x80
- This filesystem has a snapshot (RO\_COMPAT\_HAS\_SNAPSHOT). - This filesystem has a snapshot (RO_COMPAT_HAS_SNAPSHOT).
* - 0x100 * - 0x100
- `Quota <Quota>`__ (RO\_COMPAT\_QUOTA). - `Quota <Quota>`__ (RO_COMPAT_QUOTA).
* - 0x200 * - 0x200
- This filesystem supports “bigalloc”, which means that file extents are - This filesystem supports “bigalloc”, which means that file extents are
tracked in units of clusters (of blocks) instead of blocks tracked in units of clusters (of blocks) instead of blocks
(RO\_COMPAT\_BIGALLOC). (RO_COMPAT_BIGALLOC).
* - 0x400 * - 0x400
- This filesystem supports metadata checksumming. - This filesystem supports metadata checksumming.
(RO\_COMPAT\_METADATA\_CSUM; implies RO\_COMPAT\_GDT\_CSUM, though (RO_COMPAT_METADATA_CSUM; implies RO_COMPAT_GDT_CSUM, though
GDT\_CSUM must not be set) GDT_CSUM must not be set)
* - 0x800 * - 0x800
- Filesystem supports replicas. This feature is neither in the kernel nor - Filesystem supports replicas. This feature is neither in the kernel nor
e2fsprogs. (RO\_COMPAT\_REPLICA) e2fsprogs. (RO_COMPAT_REPLICA)
* - 0x1000 * - 0x1000
- Read-only filesystem image; the kernel will not mount this image - Read-only filesystem image; the kernel will not mount this image
read-write and most tools will refuse to write to the image. read-write and most tools will refuse to write to the image.
(RO\_COMPAT\_READONLY) (RO_COMPAT_READONLY)
* - 0x2000 * - 0x2000
- Filesystem tracks project quotas. (RO\_COMPAT\_PROJECT) - Filesystem tracks project quotas. (RO_COMPAT_PROJECT)
* - 0x8000 * - 0x8000
- Verity inodes may be present on the filesystem. (RO\_COMPAT\_VERITY) - Verity inodes may be present on the filesystem. (RO_COMPAT_VERITY)
* - 0x10000 * - 0x10000
- Indicates orphan file may have valid orphan entries and thus we need - Indicates orphan file may have valid orphan entries and thus we need
to clean them up when mounting the filesystem to clean them up when mounting the filesystem
(RO\_COMPAT\_ORPHAN\_PRESENT). (RO_COMPAT_ORPHAN_PRESENT).
.. _super_def_hash: .. _super_def_hash:
@ -761,36 +761,36 @@ The ``s_default_mount_opts`` field is any combination of the following:
* - Value * - Value
- Description - Description
* - 0x0001 * - 0x0001
- Print debugging info upon (re)mount. (EXT4\_DEFM\_DEBUG) - Print debugging info upon (re)mount. (EXT4_DEFM_DEBUG)
* - 0x0002 * - 0x0002
- New files take the gid of the containing directory (instead of the fsgid - New files take the gid of the containing directory (instead of the fsgid
of the current process). (EXT4\_DEFM\_BSDGROUPS) of the current process). (EXT4_DEFM_BSDGROUPS)
* - 0x0004 * - 0x0004
- Support userspace-provided extended attributes. (EXT4\_DEFM\_XATTR\_USER) - Support userspace-provided extended attributes. (EXT4_DEFM_XATTR_USER)
* - 0x0008 * - 0x0008
- Support POSIX access control lists (ACLs). (EXT4\_DEFM\_ACL) - Support POSIX access control lists (ACLs). (EXT4_DEFM_ACL)
* - 0x0010 * - 0x0010
- Do not support 32-bit UIDs. (EXT4\_DEFM\_UID16) - Do not support 32-bit UIDs. (EXT4_DEFM_UID16)
* - 0x0020 * - 0x0020
- All data and metadata are commited to the journal. - All data and metadata are commited to the journal.
(EXT4\_DEFM\_JMODE\_DATA) (EXT4_DEFM_JMODE_DATA)
* - 0x0040 * - 0x0040
- All data are flushed to the disk before metadata are committed to the - All data are flushed to the disk before metadata are committed to the
journal. (EXT4\_DEFM\_JMODE\_ORDERED) journal. (EXT4_DEFM_JMODE_ORDERED)
* - 0x0060 * - 0x0060
- Data ordering is not preserved; data may be written after the metadata - Data ordering is not preserved; data may be written after the metadata
has been written. (EXT4\_DEFM\_JMODE\_WBACK) has been written. (EXT4_DEFM_JMODE_WBACK)
* - 0x0100 * - 0x0100
- Disable write flushes. (EXT4\_DEFM\_NOBARRIER) - Disable write flushes. (EXT4_DEFM_NOBARRIER)
* - 0x0200 * - 0x0200
- Track which blocks in a filesystem are metadata and therefore should not - Track which blocks in a filesystem are metadata and therefore should not
be used as data blocks. This option will be enabled by default on 3.18, be used as data blocks. This option will be enabled by default on 3.18,
hopefully. (EXT4\_DEFM\_BLOCK\_VALIDITY) hopefully. (EXT4_DEFM_BLOCK_VALIDITY)
* - 0x0400 * - 0x0400
- Enable DISCARD support, where the storage device is told about blocks - Enable DISCARD support, where the storage device is told about blocks
becoming unused. (EXT4\_DEFM\_DISCARD) becoming unused. (EXT4_DEFM_DISCARD)
* - 0x0800 * - 0x0800
- Disable delayed allocation. (EXT4\_DEFM\_NODELALLOC) - Disable delayed allocation. (EXT4_DEFM_NODELALLOC)
.. _super_flags: .. _super_flags:
@ -820,12 +820,12 @@ The ``s_encrypt_algos`` list can contain any of the following:
* - Value * - Value
- Description - Description
* - 0 * - 0
- Invalid algorithm (ENCRYPTION\_MODE\_INVALID). - Invalid algorithm (ENCRYPTION_MODE_INVALID).
* - 1 * - 1
- 256-bit AES in XTS mode (ENCRYPTION\_MODE\_AES\_256\_XTS). - 256-bit AES in XTS mode (ENCRYPTION_MODE_AES_256_XTS).
* - 2 * - 2
- 256-bit AES in GCM mode (ENCRYPTION\_MODE\_AES\_256\_GCM). - 256-bit AES in GCM mode (ENCRYPTION_MODE_AES_256_GCM).
* - 3 * - 3
- 256-bit AES in CBC mode (ENCRYPTION\_MODE\_AES\_256\_CBC). - 256-bit AES in CBC mode (ENCRYPTION_MODE_AES_256_CBC).
Total size of the superblock is 1024 bytes. Total size of the superblock is 1024 bytes.

View File

@ -129,18 +129,24 @@ yet. Bug reports are always welcome at the issue tracker below!
* - arm64 * - arm64
- Supported - Supported
- ``LLVM=1`` - ``LLVM=1``
* - hexagon
- Maintained
- ``LLVM=1``
* - mips * - mips
- Maintained - Maintained
- ``CC=clang`` - ``LLVM=1``
* - powerpc * - powerpc
- Maintained - Maintained
- ``CC=clang`` - ``CC=clang``
* - riscv * - riscv
- Maintained - Maintained
- ``CC=clang`` - ``LLVM=1``
* - s390 * - s390
- Maintained - Maintained
- ``CC=clang`` - ``CC=clang``
* - um (User Mode)
- Maintained
- ``LLVM=1``
* - x86 * - x86
- Supported - Supported
- ``LLVM=1`` - ``LLVM=1``

View File

@ -45,10 +45,12 @@ Name Alias Usage Preserved
``$r23``-``$r31`` ``$s0``-``$s8`` Static registers Yes ``$r23``-``$r31`` ``$s0``-``$s8`` Static registers Yes
================= =============== =================== ============ ================= =============== =================== ============
Note: The register ``$r21`` is reserved in the ELF psABI, but used by the Linux .. Note::
kernel for storing the percpu base address. It normally has no ABI name, but is The register ``$r21`` is reserved in the ELF psABI, but used by the Linux
called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1`` in some old code, kernel for storing the percpu base address. It normally has no ABI name,
however they are deprecated aliases of ``$a0`` and ``$a1`` respectively. but is called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1``
in some old code,however they are deprecated aliases of ``$a0`` and ``$a1``
respectively.
FPRs FPRs
---- ----
@ -69,8 +71,9 @@ Name Alias Usage Preserved
``$f24``-``$f31`` ``$fs0``-``$fs7`` Static registers Yes ``$f24``-``$f31`` ``$fs0``-``$fs7`` Static registers Yes
================= ================== =================== ============ ================= ================== =================== ============
Note: You may see ``$fv0`` or ``$fv1`` in some old code, however they are deprecated .. Note::
aliases of ``$fa0`` and ``$fa1`` respectively. You may see ``$fv0`` or ``$fv1`` in some old code, however they are
deprecated aliases of ``$fa0`` and ``$fa1`` respectively.
VRs VRs
---- ----

View File

@ -145,12 +145,16 @@ Documentation of Loongson's LS7A chipset:
https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (in English) https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (in English)
Note: CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described .. Note::
in Section 7.4 of "LoongArch Reference Manual, Vol 1"; LIOINTC is "Legacy I/O - CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described
Interrupts" described in Section 11.1 of "Loongson 3A5000 Processor Reference in Section 7.4 of "LoongArch Reference Manual, Vol 1";
Manual"; EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of - LIOINTC is "Legacy I/OInterrupts" described in Section 11.1 of
"Loongson 3A5000 Processor Reference Manual"; HTVECINTC is "HyperTransport "Loongson 3A5000 Processor Reference Manual";
Interrupts" described in Section 14.3 of "Loongson 3A5000 Processor Reference - EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of
Manual"; PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of "Loongson 3A5000 Processor Reference Manual";
"Loongson 7A1000 Bridge User Manual"; PCH-LPC is "LPC Interrupts" described in - HTVECINTC is "HyperTransport Interrupts" described in Section 14.3 of
Section 24.3 of "Loongson 7A1000 Bridge User Manual". "Loongson 3A5000 Processor Reference Manual";
- PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of
"Loongson 7A1000 Bridge User Manual";
- PCH-LPC is "LPC Interrupts" described in Section 24.3 of
"Loongson 7A1000 Bridge User Manual".

View File

@ -2925,6 +2925,43 @@ plpmtud_probe_interval - INTEGER
Default: 0 Default: 0
reconf_enable - BOOLEAN
Enable or disable extension of Stream Reconfiguration functionality
specified in RFC6525. This extension provides the ability to "reset"
a stream, and it includes the Parameters of "Outgoing/Incoming SSN
Reset", "SSN/TSN Reset" and "Add Outgoing/Incoming Streams".
- 1: Enable extension.
- 0: Disable extension.
Default: 0
intl_enable - BOOLEAN
Enable or disable extension of User Message Interleaving functionality
specified in RFC8260. This extension allows the interleaving of user
messages sent on different streams. With this feature enabled, I-DATA
chunk will replace DATA chunk to carry user messages if also supported
by the peer. Note that to use this feature, one needs to set this option
to 1 and also needs to set socket options SCTP_FRAGMENT_INTERLEAVE to 2
and SCTP_INTERLEAVING_SUPPORTED to 1.
- 1: Enable extension.
- 0: Disable extension.
Default: 0
ecn_enable - BOOLEAN
Control use of Explicit Congestion Notification (ECN) by SCTP.
Like in TCP, ECN is used only when both ends of the SCTP connection
indicate support for it. This feature is useful in avoiding losses
due to congestion by allowing supporting routers to signal congestion
before having to drop packets.
1: Enable ecn.
0: Disable ecn.
Default: 1
``/proc/sys/net/core/*`` ``/proc/sys/net/core/*``
======================== ========================

View File

@ -104,7 +104,7 @@ Whenever possible, use the PHY side RGMII delay for these reasons:
* PHY device drivers in PHYLIB being reusable by nature, being able to * PHY device drivers in PHYLIB being reusable by nature, being able to
configure correctly a specified delay enables more designs with similar delay configure correctly a specified delay enables more designs with similar delay
requirements to be operate correctly requirements to be operated correctly
For cases where the PHY is not capable of providing this delay, but the For cases where the PHY is not capable of providing this delay, but the
Ethernet MAC driver is capable of doing so, the correct phy_interface_t value Ethernet MAC driver is capable of doing so, the correct phy_interface_t value

View File

@ -46,10 +46,11 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0而其
``$r23``-``$r31`` ``$s0``-``$s8`` 静态寄存器 是 ``$r23``-``$r31`` ``$s0``-``$s8`` 静态寄存器 是
================= =============== =================== ========== ================= =============== =================== ==========
注意:``$r21``寄存器在ELF psABI中保留未使用但是在Linux内核用于保存每CPU .. note::
变量基地址。该寄存器没有ABI命名不过在内核中称为``$u0``。在一些遗留代码 注意: ``$r21`` 寄存器在ELF psABI中保留未使用但是在Linux内核用于保
中有时可能见到``$v0````$v1``,它们是``$a0````$a1``的别名,属于已经废弃 存每CPU变量基地址。该寄存器没有ABI命名不过在内核中称为 ``$u0`` 。在
的用法。 一些遗留代码中有时可能见到 ``$v0````$v1`` ,它们是 ``$a0``
``$a1`` 的别名,属于已经废弃的用法。
浮点寄存器 浮点寄存器
---------- ----------
@ -68,8 +69,9 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0而其
``$f24``-``$f31`` ``$fs0``-``$fs7`` 静态寄存器 是 ``$f24``-``$f31`` ``$fs0``-``$fs7`` 静态寄存器 是
================= ================== =================== ========== ================= ================== =================== ==========
注意:在一些遗留代码中有时可能见到 ``$v0````$v1`` ,它们是 ``$a0`` .. note::
``$a1`` 的别名,属于已经废弃的用法。 注意:在一些遗留代码中有时可能见到 ``$v0````$v1`` ,它们是
``$a0````$a1`` 的别名,属于已经废弃的用法。
向量寄存器 向量寄存器

View File

@ -147,9 +147,11 @@ PCH-LPC::
https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (英文版) https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (英文版)
CPUINTC即《龙芯架构参考手册卷一》第7.4节所描述的CSR.ECFG/CSR.ESTAT寄存器及其中断 .. note::
控制逻辑LIOINTC即《龙芯3A5000处理器使用手册》第11.1节所描述的“传统I/O中断”EIOINTC - CPUINTC即《龙芯架构参考手册卷一》第7.4节所描述的CSR.ECFG/CSR.ESTAT寄存器及其
即《龙芯3A5000处理器使用手册》第11.2节所描述的“扩展I/O中断”HTVECINTC即《龙芯3A5000 中断控制逻辑;
处理器使用手册》第14.3节所描述的“HyperTransport中断”PCH-PIC/PCH-MSI即《龙芯7A1000桥 - LIOINTC即《龙芯3A5000处理器使用手册》第11.1节所描述的“传统I/O中断”
片用户手册》第5章所描述的“中断控制器”PCH-LPC即《龙芯7A1000桥片用户手册》第24.3节所 - EIOINTC即《龙芯3A5000处理器使用手册》第11.2节所描述的“扩展I/O中断”
描述的“LPC中断”。 - HTVECINTC即《龙芯3A5000处理器使用手册》第14.3节所描述的“HyperTransport中断”
- PCH-PIC/PCH-MSI即《龙芯7A1000桥片用户手册》第5章所描述的“中断控制器”
- PCH-LPC即《龙芯7A1000桥片用户手册》第24.3节所描述的“LPC中断”。

View File

@ -120,7 +120,8 @@ Testing
unpoison-pfn unpoison-pfn
Software-unpoison page at PFN echoed into this file. This way Software-unpoison page at PFN echoed into this file. This way
a page can be reused again. This only works for Linux a page can be reused again. This only works for Linux
injected failures, not for real memory failures. injected failures, not for real memory failures. Once any hardware
memory failure happens, this feature is disabled.
Note these injection interfaces are not stable and might change between Note these injection interfaces are not stable and might change between
kernel versions kernel versions

View File

@ -427,6 +427,7 @@ ACPI VIOT DRIVER
M: Jean-Philippe Brucker <jean-philippe@linaro.org> M: Jean-Philippe Brucker <jean-philippe@linaro.org>
L: linux-acpi@vger.kernel.org L: linux-acpi@vger.kernel.org
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
L: iommu@lists.linux.dev
S: Maintained S: Maintained
F: drivers/acpi/viot.c F: drivers/acpi/viot.c
F: include/linux/acpi_viot.h F: include/linux/acpi_viot.h
@ -960,6 +961,7 @@ AMD IOMMU (AMD-VI)
M: Joerg Roedel <joro@8bytes.org> M: Joerg Roedel <joro@8bytes.org>
R: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> R: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
L: iommu@lists.linux.dev
S: Maintained S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
F: drivers/iommu/amd/ F: drivers/iommu/amd/
@ -2467,6 +2469,7 @@ ARM/NXP S32G ARCHITECTURE
M: Chester Lin <clin@suse.com> M: Chester Lin <clin@suse.com>
R: Andreas Färber <afaerber@suse.de> R: Andreas Färber <afaerber@suse.de>
R: Matthias Brugger <mbrugger@suse.com> R: Matthias Brugger <mbrugger@suse.com>
R: NXP S32 Linux Team <s32@nxp.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained S: Maintained
F: arch/arm64/boot/dts/freescale/s32g*.dts* F: arch/arm64/boot/dts/freescale/s32g*.dts*
@ -3662,7 +3665,7 @@ BPF JIT for ARM
M: Shubham Bansal <illusionist.neo@gmail.com> M: Shubham Bansal <illusionist.neo@gmail.com>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
L: bpf@vger.kernel.org L: bpf@vger.kernel.org
S: Maintained S: Odd Fixes
F: arch/arm/net/ F: arch/arm/net/
BPF JIT for ARM64 BPF JIT for ARM64
@ -3686,14 +3689,15 @@ BPF JIT for NFP NICs
M: Jakub Kicinski <kuba@kernel.org> M: Jakub Kicinski <kuba@kernel.org>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
L: bpf@vger.kernel.org L: bpf@vger.kernel.org
S: Supported S: Odd Fixes
F: drivers/net/ethernet/netronome/nfp/bpf/ F: drivers/net/ethernet/netronome/nfp/bpf/
BPF JIT for POWERPC (32-BIT AND 64-BIT) BPF JIT for POWERPC (32-BIT AND 64-BIT)
M: Naveen N. Rao <naveen.n.rao@linux.ibm.com> M: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
M: Michael Ellerman <mpe@ellerman.id.au>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
L: bpf@vger.kernel.org L: bpf@vger.kernel.org
S: Maintained S: Supported
F: arch/powerpc/net/ F: arch/powerpc/net/
BPF JIT for RISC-V (32-bit) BPF JIT for RISC-V (32-bit)
@ -3719,7 +3723,7 @@ M: Heiko Carstens <hca@linux.ibm.com>
M: Vasily Gorbik <gor@linux.ibm.com> M: Vasily Gorbik <gor@linux.ibm.com>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
L: bpf@vger.kernel.org L: bpf@vger.kernel.org
S: Maintained S: Supported
F: arch/s390/net/ F: arch/s390/net/
X: arch/s390/net/pnet.c X: arch/s390/net/pnet.c
@ -3727,14 +3731,14 @@ BPF JIT for SPARC (32-BIT AND 64-BIT)
M: David S. Miller <davem@davemloft.net> M: David S. Miller <davem@davemloft.net>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
L: bpf@vger.kernel.org L: bpf@vger.kernel.org
S: Maintained S: Odd Fixes
F: arch/sparc/net/ F: arch/sparc/net/
BPF JIT for X86 32-BIT BPF JIT for X86 32-BIT
M: Wang YanQing <udknight@gmail.com> M: Wang YanQing <udknight@gmail.com>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
L: bpf@vger.kernel.org L: bpf@vger.kernel.org
S: Maintained S: Odd Fixes
F: arch/x86/net/bpf_jit_comp32.c F: arch/x86/net/bpf_jit_comp32.c
BPF JIT for X86 64-BIT BPF JIT for X86 64-BIT
@ -3757,6 +3761,19 @@ F: include/linux/bpf_lsm.h
F: kernel/bpf/bpf_lsm.c F: kernel/bpf/bpf_lsm.c
F: security/bpf/ F: security/bpf/
BPF L7 FRAMEWORK
M: John Fastabend <john.fastabend@gmail.com>
M: Jakub Sitnicki <jakub@cloudflare.com>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
F: include/linux/skmsg.h
F: net/core/skmsg.c
F: net/core/sock_map.c
F: net/ipv4/tcp_bpf.c
F: net/ipv4/udp_bpf.c
F: net/unix/unix_bpf.c
BPFTOOL BPFTOOL
M: Quentin Monnet <quentin@isovalent.com> M: Quentin Monnet <quentin@isovalent.com>
L: bpf@vger.kernel.org L: bpf@vger.kernel.org
@ -3796,12 +3813,12 @@ N: bcmbca
N: bcm[9]?47622 N: bcm[9]?47622
BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE
M: Nicolas Saenz Julienne <nsaenz@kernel.org> M: Florian Fainelli <f.fainelli@gmail.com>
R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/nsaenz/linux-rpi.git T: git git://github.com/broadcom/stblinux.git
F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml
F: drivers/pci/controller/pcie-brcmstb.c F: drivers/pci/controller/pcie-brcmstb.c
F: drivers/staging/vc04_services F: drivers/staging/vc04_services
@ -5962,6 +5979,7 @@ M: Christoph Hellwig <hch@lst.de>
M: Marek Szyprowski <m.szyprowski@samsung.com> M: Marek Szyprowski <m.szyprowski@samsung.com>
R: Robin Murphy <robin.murphy@arm.com> R: Robin Murphy <robin.murphy@arm.com>
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
L: iommu@lists.linux.dev
S: Supported S: Supported
W: http://git.infradead.org/users/hch/dma-mapping.git W: http://git.infradead.org/users/hch/dma-mapping.git
T: git git://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git
@ -5974,6 +5992,7 @@ F: kernel/dma/
DMA MAPPING BENCHMARK DMA MAPPING BENCHMARK
M: Xiang Chen <chenxiang66@hisilicon.com> M: Xiang Chen <chenxiang66@hisilicon.com>
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
L: iommu@lists.linux.dev
F: kernel/dma/map_benchmark.c F: kernel/dma/map_benchmark.c
F: tools/testing/selftests/dma/ F: tools/testing/selftests/dma/
@ -7558,6 +7577,7 @@ F: drivers/gpu/drm/exynos/exynos_dp*
EXYNOS SYSMMU (IOMMU) driver EXYNOS SYSMMU (IOMMU) driver
M: Marek Szyprowski <m.szyprowski@samsung.com> M: Marek Szyprowski <m.szyprowski@samsung.com>
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
L: iommu@lists.linux.dev
S: Maintained S: Maintained
F: drivers/iommu/exynos-iommu.c F: drivers/iommu/exynos-iommu.c
@ -8479,6 +8499,7 @@ F: Documentation/devicetree/bindings/gpio/
F: Documentation/driver-api/gpio/ F: Documentation/driver-api/gpio/
F: drivers/gpio/ F: drivers/gpio/
F: include/asm-generic/gpio.h F: include/asm-generic/gpio.h
F: include/dt-bindings/gpio/
F: include/linux/gpio.h F: include/linux/gpio.h
F: include/linux/gpio/ F: include/linux/gpio/
F: include/linux/of_gpio.h F: include/linux/of_gpio.h
@ -9132,6 +9153,7 @@ F: drivers/media/platform/st/sti/hva
HWPOISON MEMORY FAILURE HANDLING HWPOISON MEMORY FAILURE HANDLING
M: Naoya Horiguchi <naoya.horiguchi@nec.com> M: Naoya Horiguchi <naoya.horiguchi@nec.com>
R: Miaohe Lin <linmiaohe@huawei.com>
L: linux-mm@kvack.org L: linux-mm@kvack.org
S: Maintained S: Maintained
F: mm/hwpoison-inject.c F: mm/hwpoison-inject.c
@ -9276,6 +9298,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
F: Documentation/devicetree/bindings/i2c/i2c.txt F: Documentation/devicetree/bindings/i2c/i2c.txt
F: Documentation/i2c/ F: Documentation/i2c/
F: drivers/i2c/* F: drivers/i2c/*
F: include/dt-bindings/i2c/i2c.h
F: include/linux/i2c-dev.h F: include/linux/i2c-dev.h
F: include/linux/i2c-smbus.h F: include/linux/i2c-smbus.h
F: include/linux/i2c.h F: include/linux/i2c.h
@ -9291,6 +9314,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
F: Documentation/devicetree/bindings/i2c/ F: Documentation/devicetree/bindings/i2c/
F: drivers/i2c/algos/ F: drivers/i2c/algos/
F: drivers/i2c/busses/ F: drivers/i2c/busses/
F: include/dt-bindings/i2c/
I2C-TAOS-EVM DRIVER I2C-TAOS-EVM DRIVER
M: Jean Delvare <jdelvare@suse.com> M: Jean Delvare <jdelvare@suse.com>
@ -9975,6 +9999,7 @@ INTEL IOMMU (VT-d)
M: David Woodhouse <dwmw2@infradead.org> M: David Woodhouse <dwmw2@infradead.org>
M: Lu Baolu <baolu.lu@linux.intel.com> M: Lu Baolu <baolu.lu@linux.intel.com>
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
L: iommu@lists.linux.dev
S: Supported S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
F: drivers/iommu/intel/ F: drivers/iommu/intel/
@ -10354,6 +10379,7 @@ IOMMU DRIVERS
M: Joerg Roedel <joro@8bytes.org> M: Joerg Roedel <joro@8bytes.org>
M: Will Deacon <will@kernel.org> M: Will Deacon <will@kernel.org>
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
L: iommu@lists.linux.dev
S: Maintained S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
F: Documentation/devicetree/bindings/iommu/ F: Documentation/devicetree/bindings/iommu/
@ -10830,6 +10856,7 @@ M: Marc Zyngier <maz@kernel.org>
R: James Morse <james.morse@arm.com> R: James Morse <james.morse@arm.com>
R: Alexandru Elisei <alexandru.elisei@arm.com> R: Alexandru Elisei <alexandru.elisei@arm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com> R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Oliver Upton <oliver.upton@linux.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: kvmarm@lists.cs.columbia.edu (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu (moderated for non-subscribers)
S: Maintained S: Maintained
@ -10872,7 +10899,6 @@ F: arch/riscv/include/asm/kvm*
F: arch/riscv/include/uapi/asm/kvm* F: arch/riscv/include/uapi/asm/kvm*
F: arch/riscv/kvm/ F: arch/riscv/kvm/
F: tools/testing/selftests/kvm/*/riscv/ F: tools/testing/selftests/kvm/*/riscv/
F: tools/testing/selftests/kvm/riscv/
KERNEL VIRTUAL MACHINE for s390 (KVM/s390) KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
M: Christian Borntraeger <borntraeger@linux.ibm.com> M: Christian Borntraeger <borntraeger@linux.ibm.com>
@ -10897,28 +10923,51 @@ F: tools/testing/selftests/kvm/*/s390x/
F: tools/testing/selftests/kvm/s390x/ F: tools/testing/selftests/kvm/s390x/
KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
M: Sean Christopherson <seanjc@google.com>
M: Paolo Bonzini <pbonzini@redhat.com> M: Paolo Bonzini <pbonzini@redhat.com>
R: Sean Christopherson <seanjc@google.com>
R: Vitaly Kuznetsov <vkuznets@redhat.com>
R: Wanpeng Li <wanpengli@tencent.com>
R: Jim Mattson <jmattson@google.com>
R: Joerg Roedel <joro@8bytes.org>
L: kvm@vger.kernel.org L: kvm@vger.kernel.org
S: Supported S: Supported
W: http://www.linux-kvm.org
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
F: arch/x86/include/asm/kvm* F: arch/x86/include/asm/kvm*
F: arch/x86/include/asm/pvclock-abi.h
F: arch/x86/include/asm/svm.h F: arch/x86/include/asm/svm.h
F: arch/x86/include/asm/vmx*.h F: arch/x86/include/asm/vmx*.h
F: arch/x86/include/uapi/asm/kvm* F: arch/x86/include/uapi/asm/kvm*
F: arch/x86/include/uapi/asm/svm.h F: arch/x86/include/uapi/asm/svm.h
F: arch/x86/include/uapi/asm/vmx.h F: arch/x86/include/uapi/asm/vmx.h
F: arch/x86/kernel/kvm.c
F: arch/x86/kernel/kvmclock.c
F: arch/x86/kvm/ F: arch/x86/kvm/
F: arch/x86/kvm/*/ F: arch/x86/kvm/*/
KVM PARAVIRT (KVM/paravirt)
M: Paolo Bonzini <pbonzini@redhat.com>
R: Wanpeng Li <wanpengli@tencent.com>
R: Vitaly Kuznetsov <vkuznets@redhat.com>
L: kvm@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
F: arch/x86/kernel/kvm.c
F: arch/x86/kernel/kvmclock.c
F: arch/x86/include/asm/pvclock-abi.h
F: include/linux/kvm_para.h
F: include/uapi/linux/kvm_para.h
F: include/uapi/asm-generic/kvm_para.h
F: include/asm-generic/kvm_para.h
F: arch/um/include/asm/kvm_para.h
F: arch/x86/include/asm/kvm_para.h
F: arch/x86/include/uapi/asm/kvm_para.h
KVM X86 HYPER-V (KVM/hyper-v)
M: Vitaly Kuznetsov <vkuznets@redhat.com>
M: Sean Christopherson <seanjc@google.com>
M: Paolo Bonzini <pbonzini@redhat.com>
L: kvm@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
F: arch/x86/kvm/hyperv.*
F: arch/x86/kvm/kvm_onhyperv.*
F: arch/x86/kvm/svm/hyperv.*
F: arch/x86/kvm/svm/svm_onhyperv.*
F: arch/x86/kvm/vmx/evmcs.*
KERNFS KERNFS
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
M: Tejun Heo <tj@kernel.org> M: Tejun Heo <tj@kernel.org>
@ -11097,20 +11146,6 @@ S: Maintained
F: include/net/l3mdev.h F: include/net/l3mdev.h
F: net/l3mdev F: net/l3mdev
L7 BPF FRAMEWORK
M: John Fastabend <john.fastabend@gmail.com>
M: Daniel Borkmann <daniel@iogearbox.net>
M: Jakub Sitnicki <jakub@cloudflare.com>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
F: include/linux/skmsg.h
F: net/core/skmsg.c
F: net/core/sock_map.c
F: net/ipv4/tcp_bpf.c
F: net/ipv4/udp_bpf.c
F: net/unix/unix_bpf.c
LANDLOCK SECURITY MODULE LANDLOCK SECURITY MODULE
M: Mickaël Salaün <mic@digikod.net> M: Mickaël Salaün <mic@digikod.net>
L: linux-security-module@vger.kernel.org L: linux-security-module@vger.kernel.org
@ -11590,6 +11625,7 @@ F: drivers/gpu/drm/bridge/lontium-lt8912b.c
LOONGARCH LOONGARCH
M: Huacai Chen <chenhuacai@kernel.org> M: Huacai Chen <chenhuacai@kernel.org>
R: WANG Xuerui <kernel@xen0n.name> R: WANG Xuerui <kernel@xen0n.name>
L: loongarch@lists.linux.dev
S: Maintained S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git
F: arch/loongarch/ F: arch/loongarch/
@ -12503,6 +12539,7 @@ F: drivers/i2c/busses/i2c-mt65xx.c
MEDIATEK IOMMU DRIVER MEDIATEK IOMMU DRIVER
M: Yong Wu <yong.wu@mediatek.com> M: Yong Wu <yong.wu@mediatek.com>
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
L: iommu@lists.linux.dev
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
S: Supported S: Supported
F: Documentation/devicetree/bindings/iommu/mediatek* F: Documentation/devicetree/bindings/iommu/mediatek*
@ -12845,9 +12882,8 @@ M: Andrew Morton <akpm@linux-foundation.org>
L: linux-mm@kvack.org L: linux-mm@kvack.org
S: Maintained S: Maintained
W: http://www.linux-mm.org W: http://www.linux-mm.org
T: quilt https://ozlabs.org/~akpm/mmotm/ T: git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
T: quilt https://ozlabs.org/~akpm/mmots/ T: quilt git://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new
T: git git://github.com/hnaz/linux-mm.git
F: include/linux/gfp.h F: include/linux/gfp.h
F: include/linux/memory_hotplug.h F: include/linux/memory_hotplug.h
F: include/linux/mm.h F: include/linux/mm.h
@ -12857,6 +12893,18 @@ F: include/linux/vmalloc.h
F: mm/ F: mm/
F: tools/testing/selftests/vm/ F: tools/testing/selftests/vm/
MEMORY HOT(UN)PLUG
M: David Hildenbrand <david@redhat.com>
M: Oscar Salvador <osalvador@suse.de>
L: linux-mm@kvack.org
S: Maintained
F: Documentation/admin-guide/mm/memory-hotplug.rst
F: Documentation/core-api/memory-hotplug.rst
F: drivers/base/memory.c
F: include/linux/memory_hotplug.h
F: mm/memory_hotplug.c
F: tools/testing/selftests/memory-hotplug/
MEMORY TECHNOLOGY DEVICES (MTD) MEMORY TECHNOLOGY DEVICES (MTD)
M: Miquel Raynal <miquel.raynal@bootlin.com> M: Miquel Raynal <miquel.raynal@bootlin.com>
M: Richard Weinberger <richard@nod.at> M: Richard Weinberger <richard@nod.at>
@ -13801,6 +13849,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
F: Documentation/devicetree/bindings/net/ F: Documentation/devicetree/bindings/net/
F: drivers/connector/ F: drivers/connector/
F: drivers/net/ F: drivers/net/
F: include/dt-bindings/net/
F: include/linux/etherdevice.h F: include/linux/etherdevice.h
F: include/linux/fcdevice.h F: include/linux/fcdevice.h
F: include/linux/fddidevice.h F: include/linux/fddidevice.h
@ -13952,7 +14001,6 @@ F: net/ipv6/tcp*.c
NETWORKING [TLS] NETWORKING [TLS]
M: Boris Pismenny <borisp@nvidia.com> M: Boris Pismenny <borisp@nvidia.com>
M: John Fastabend <john.fastabend@gmail.com> M: John Fastabend <john.fastabend@gmail.com>
M: Daniel Borkmann <daniel@iogearbox.net>
M: Jakub Kicinski <kuba@kernel.org> M: Jakub Kicinski <kuba@kernel.org>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
S: Maintained S: Maintained
@ -14261,7 +14309,7 @@ F: drivers/iio/gyro/fxas21002c_i2c.c
F: drivers/iio/gyro/fxas21002c_spi.c F: drivers/iio/gyro/fxas21002c_spi.c
NXP i.MX CLOCK DRIVERS NXP i.MX CLOCK DRIVERS
M: Abel Vesa <abel.vesa@nxp.com> M: Abel Vesa <abelvesa@kernel.org>
L: linux-clk@vger.kernel.org L: linux-clk@vger.kernel.org
L: linux-imx@nxp.com L: linux-imx@nxp.com
S: Maintained S: Maintained
@ -14869,6 +14917,7 @@ F: include/dt-bindings/
OPENCOMPUTE PTP CLOCK DRIVER OPENCOMPUTE PTP CLOCK DRIVER
M: Jonathan Lemon <jonathan.lemon@gmail.com> M: Jonathan Lemon <jonathan.lemon@gmail.com>
M: Vadim Fedorenko <vadfed@fb.com>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
S: Maintained S: Maintained
F: drivers/ptp/ptp_ocp.c F: drivers/ptp/ptp_ocp.c
@ -16488,7 +16537,7 @@ F: Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml
F: drivers/cpufreq/qcom-cpufreq-nvmem.c F: drivers/cpufreq/qcom-cpufreq-nvmem.c
QUALCOMM CRYPTO DRIVERS QUALCOMM CRYPTO DRIVERS
M: Thara Gopinath <thara.gopinath@linaro.org> M: Thara Gopinath <thara.gopinath@gmail.com>
L: linux-crypto@vger.kernel.org L: linux-crypto@vger.kernel.org
L: linux-arm-msm@vger.kernel.org L: linux-arm-msm@vger.kernel.org
S: Maintained S: Maintained
@ -16543,6 +16592,7 @@ F: drivers/i2c/busses/i2c-qcom-cci.c
QUALCOMM IOMMU QUALCOMM IOMMU
M: Rob Clark <robdclark@gmail.com> M: Rob Clark <robdclark@gmail.com>
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
L: iommu@lists.linux.dev
L: linux-arm-msm@vger.kernel.org L: linux-arm-msm@vger.kernel.org
S: Maintained S: Maintained
F: drivers/iommu/arm/arm-smmu/qcom_iommu.c F: drivers/iommu/arm/arm-smmu/qcom_iommu.c
@ -16598,7 +16648,7 @@ F: include/linux/if_rmnet.h
QUALCOMM TSENS THERMAL DRIVER QUALCOMM TSENS THERMAL DRIVER
M: Amit Kucheria <amitk@kernel.org> M: Amit Kucheria <amitk@kernel.org>
M: Thara Gopinath <thara.gopinath@linaro.org> M: Thara Gopinath <thara.gopinath@gmail.com>
L: linux-pm@vger.kernel.org L: linux-pm@vger.kernel.org
L: linux-arm-msm@vger.kernel.org L: linux-arm-msm@vger.kernel.org
S: Maintained S: Maintained
@ -19168,6 +19218,7 @@ F: arch/x86/boot/video*
SWIOTLB SUBSYSTEM SWIOTLB SUBSYSTEM
M: Christoph Hellwig <hch@infradead.org> M: Christoph Hellwig <hch@infradead.org>
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
L: iommu@lists.linux.dev
S: Supported S: Supported
W: http://git.infradead.org/users/hch/dma-mapping.git W: http://git.infradead.org/users/hch/dma-mapping.git
T: git git://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git
@ -19305,7 +19356,7 @@ R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
R: Mika Westerberg <mika.westerberg@linux.intel.com> R: Mika Westerberg <mika.westerberg@linux.intel.com>
R: Jan Dabros <jsd@semihalf.com> R: Jan Dabros <jsd@semihalf.com>
L: linux-i2c@vger.kernel.org L: linux-i2c@vger.kernel.org
S: Maintained S: Supported
F: drivers/i2c/busses/i2c-designware-* F: drivers/i2c/busses/i2c-designware-*
SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
@ -20712,6 +20763,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
F: Documentation/devicetree/bindings/usb/ F: Documentation/devicetree/bindings/usb/
F: Documentation/usb/ F: Documentation/usb/
F: drivers/usb/ F: drivers/usb/
F: include/dt-bindings/usb/
F: include/linux/usb.h F: include/linux/usb.h
F: include/linux/usb/ F: include/linux/usb/
@ -21843,6 +21895,7 @@ M: Juergen Gross <jgross@suse.com>
M: Stefano Stabellini <sstabellini@kernel.org> M: Stefano Stabellini <sstabellini@kernel.org>
L: xen-devel@lists.xenproject.org (moderated for non-subscribers) L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
L: iommu@lists.linux.dev
S: Supported S: Supported
F: arch/x86/xen/*swiotlb* F: arch/x86/xen/*swiotlb*
F: drivers/xen/*swiotlb* F: drivers/xen/*swiotlb*

View File

@ -2,7 +2,7 @@
VERSION = 5 VERSION = 5
PATCHLEVEL = 19 PATCHLEVEL = 19
SUBLEVEL = 0 SUBLEVEL = 0
EXTRAVERSION = -rc2 EXTRAVERSION = -rc4
NAME = Superb Owl NAME = Superb Owl
# *DOCUMENTATION* # *DOCUMENTATION*
@ -1141,7 +1141,7 @@ KBUILD_MODULES := 1
autoksyms_recursive: descend modules.order autoksyms_recursive: descend modules.order
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \
"$(MAKE) -f $(srctree)/Makefile vmlinux" "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive"
endif endif
autoksyms_h := $(if $(CONFIG_TRIM_UNUSED_KSYMS), include/generated/autoksyms.h) autoksyms_h := $(if $(CONFIG_TRIM_UNUSED_KSYMS), include/generated/autoksyms.h)

View File

@ -1586,7 +1586,6 @@ dtb-$(CONFIG_ARCH_ASPEED) += \
aspeed-bmc-lenovo-hr630.dtb \ aspeed-bmc-lenovo-hr630.dtb \
aspeed-bmc-lenovo-hr855xg2.dtb \ aspeed-bmc-lenovo-hr855xg2.dtb \
aspeed-bmc-microsoft-olympus.dtb \ aspeed-bmc-microsoft-olympus.dtb \
aspeed-bmc-nuvia-dc-scm.dtb \
aspeed-bmc-opp-lanyang.dtb \ aspeed-bmc-opp-lanyang.dtb \
aspeed-bmc-opp-mihawk.dtb \ aspeed-bmc-opp-mihawk.dtb \
aspeed-bmc-opp-mowgli.dtb \ aspeed-bmc-opp-mowgli.dtb \
@ -1599,6 +1598,7 @@ dtb-$(CONFIG_ARCH_ASPEED) += \
aspeed-bmc-opp-witherspoon.dtb \ aspeed-bmc-opp-witherspoon.dtb \
aspeed-bmc-opp-zaius.dtb \ aspeed-bmc-opp-zaius.dtb \
aspeed-bmc-portwell-neptune.dtb \ aspeed-bmc-portwell-neptune.dtb \
aspeed-bmc-qcom-dc-scm-v1.dtb \
aspeed-bmc-quanta-q71l.dtb \ aspeed-bmc-quanta-q71l.dtb \
aspeed-bmc-quanta-s6q.dtb \ aspeed-bmc-quanta-s6q.dtb \
aspeed-bmc-supermicro-x11spi.dtb \ aspeed-bmc-supermicro-x11spi.dtb \

View File

@ -6,8 +6,8 @@
#include "aspeed-g6.dtsi" #include "aspeed-g6.dtsi"
/ { / {
model = "Nuvia DC-SCM BMC"; model = "Qualcomm DC-SCM V1 BMC";
compatible = "nuvia,dc-scm-bmc", "aspeed,ast2600"; compatible = "qcom,dc-scm-v1-bmc", "aspeed,ast2600";
aliases { aliases {
serial4 = &uart5; serial4 = &uart5;

View File

@ -120,26 +120,31 @@
port@0 { port@0 {
reg = <0>; reg = <0>;
label = "lan1"; label = "lan1";
phy-mode = "internal";
}; };
port@1 { port@1 {
reg = <1>; reg = <1>;
label = "lan2"; label = "lan2";
phy-mode = "internal";
}; };
port@2 { port@2 {
reg = <2>; reg = <2>;
label = "lan3"; label = "lan3";
phy-mode = "internal";
}; };
port@3 { port@3 {
reg = <3>; reg = <3>;
label = "lan4"; label = "lan4";
phy-mode = "internal";
}; };
port@4 { port@4 {
reg = <4>; reg = <4>;
label = "lan5"; label = "lan5";
phy-mode = "internal";
}; };
port@5 { port@5 {

View File

@ -28,12 +28,12 @@
&expgpio { &expgpio {
gpio-line-names = "BT_ON", gpio-line-names = "BT_ON",
"WL_ON", "WL_ON",
"", "PWR_LED_OFF",
"GLOBAL_RESET", "GLOBAL_RESET",
"VDD_SD_IO_SEL", "VDD_SD_IO_SEL",
"CAM_GPIO", "GLOBAL_SHUTDOWN",
"SD_PWR_ON", "SD_PWR_ON",
"SD_OC_N"; "SHUTDOWN_REQUEST";
}; };
&genet_mdio { &genet_mdio {

View File

@ -593,7 +593,7 @@
pinctrl-names = "default"; pinctrl-names = "default";
pinctrl-0 = <&pinctrl_atmel_conn>; pinctrl-0 = <&pinctrl_atmel_conn>;
reg = <0x4a>; reg = <0x4a>;
reset-gpios = <&gpio1 14 GPIO_ACTIVE_HIGH>; /* SODIMM 106 */ reset-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; /* SODIMM 106 */
status = "disabled"; status = "disabled";
}; };
}; };

View File

@ -762,7 +762,7 @@
regulator-name = "vddpu"; regulator-name = "vddpu";
regulator-min-microvolt = <725000>; regulator-min-microvolt = <725000>;
regulator-max-microvolt = <1450000>; regulator-max-microvolt = <1450000>;
regulator-enable-ramp-delay = <150>; regulator-enable-ramp-delay = <380>;
anatop-reg-offset = <0x140>; anatop-reg-offset = <0x140>;
anatop-vol-bit-shift = <9>; anatop-vol-bit-shift = <9>;
anatop-vol-bit-width = <5>; anatop-vol-bit-width = <5>;

View File

@ -120,6 +120,7 @@
compatible = "usb-nop-xceiv"; compatible = "usb-nop-xceiv";
clocks = <&clks IMX7D_USB_HSIC_ROOT_CLK>; clocks = <&clks IMX7D_USB_HSIC_ROOT_CLK>;
clock-names = "main_clk"; clock-names = "main_clk";
power-domains = <&pgc_hsic_phy>;
#phy-cells = <0>; #phy-cells = <0>;
}; };
@ -1153,7 +1154,6 @@
compatible = "fsl,imx7d-usb", "fsl,imx27-usb"; compatible = "fsl,imx7d-usb", "fsl,imx27-usb";
reg = <0x30b30000 0x200>; reg = <0x30b30000 0x200>;
interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>; interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
power-domains = <&pgc_hsic_phy>;
clocks = <&clks IMX7D_USB_CTRL_CLK>; clocks = <&clks IMX7D_USB_CTRL_CLK>;
fsl,usbphy = <&usbphynop3>; fsl,usbphy = <&usbphynop3>;
fsl,usbmisc = <&usbmisc3 0>; fsl,usbmisc = <&usbmisc3 0>;

View File

@ -0,0 +1,47 @@
// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
/*
* Copyright (C) STMicroelectronics 2022 - All Rights Reserved
* Author: Alexandre Torgue <alexandre.torgue@foss.st.com> for STMicroelectronics.
*/
/ {
firmware {
optee: optee {
compatible = "linaro,optee-tz";
method = "smc";
};
scmi: scmi {
compatible = "linaro,scmi-optee";
#address-cells = <1>;
#size-cells = <0>;
linaro,optee-channel-id = <0>;
shmem = <&scmi_shm>;
scmi_clk: protocol@14 {
reg = <0x14>;
#clock-cells = <1>;
};
scmi_reset: protocol@16 {
reg = <0x16>;
#reset-cells = <1>;
};
};
};
soc {
scmi_sram: sram@2ffff000 {
compatible = "mmio-sram";
reg = <0x2ffff000 0x1000>;
#address-cells = <1>;
#size-cells = <1>;
ranges = <0 0x2ffff000 0x1000>;
scmi_shm: scmi-sram@0 {
compatible = "arm,scmi-shmem";
reg = <0 0x80>;
};
};
};
};

View File

@ -115,33 +115,6 @@
status = "disabled"; status = "disabled";
}; };
firmware {
optee: optee {
compatible = "linaro,optee-tz";
method = "smc";
status = "disabled";
};
scmi: scmi {
compatible = "linaro,scmi-optee";
#address-cells = <1>;
#size-cells = <0>;
linaro,optee-channel-id = <0>;
shmem = <&scmi_shm>;
status = "disabled";
scmi_clk: protocol@14 {
reg = <0x14>;
#clock-cells = <1>;
};
scmi_reset: protocol@16 {
reg = <0x16>;
#reset-cells = <1>;
};
};
};
soc { soc {
compatible = "simple-bus"; compatible = "simple-bus";
#address-cells = <1>; #address-cells = <1>;
@ -149,20 +122,6 @@
interrupt-parent = <&intc>; interrupt-parent = <&intc>;
ranges; ranges;
scmi_sram: sram@2ffff000 {
compatible = "mmio-sram";
reg = <0x2ffff000 0x1000>;
#address-cells = <1>;
#size-cells = <1>;
ranges = <0 0x2ffff000 0x1000>;
scmi_shm: scmi-sram@0 {
compatible = "arm,scmi-shmem";
reg = <0 0x80>;
status = "disabled";
};
};
timers2: timer@40000000 { timers2: timer@40000000 {
#address-cells = <1>; #address-cells = <1>;
#size-cells = <0>; #size-cells = <0>;

View File

@ -7,6 +7,7 @@
/dts-v1/; /dts-v1/;
#include "stm32mp157a-dk1.dts" #include "stm32mp157a-dk1.dts"
#include "stm32mp15-scmi.dtsi"
/ { / {
model = "STMicroelectronics STM32MP157A-DK1 SCMI Discovery Board"; model = "STMicroelectronics STM32MP157A-DK1 SCMI Discovery Board";
@ -54,10 +55,6 @@
resets = <&scmi_reset RST_SCMI_MCU>; resets = <&scmi_reset RST_SCMI_MCU>;
}; };
&optee {
status = "okay";
};
&rcc { &rcc {
compatible = "st,stm32mp1-rcc-secure", "syscon"; compatible = "st,stm32mp1-rcc-secure", "syscon";
clock-names = "hse", "hsi", "csi", "lse", "lsi"; clock-names = "hse", "hsi", "csi", "lse", "lsi";
@ -76,11 +73,3 @@
&rtc { &rtc {
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
}; };
&scmi {
status = "okay";
};
&scmi_shm {
status = "okay";
};

View File

@ -7,6 +7,7 @@
/dts-v1/; /dts-v1/;
#include "stm32mp157c-dk2.dts" #include "stm32mp157c-dk2.dts"
#include "stm32mp15-scmi.dtsi"
/ { / {
model = "STMicroelectronics STM32MP157C-DK2 SCMI Discovery Board"; model = "STMicroelectronics STM32MP157C-DK2 SCMI Discovery Board";
@ -63,10 +64,6 @@
resets = <&scmi_reset RST_SCMI_MCU>; resets = <&scmi_reset RST_SCMI_MCU>;
}; };
&optee {
status = "okay";
};
&rcc { &rcc {
compatible = "st,stm32mp1-rcc-secure", "syscon"; compatible = "st,stm32mp1-rcc-secure", "syscon";
clock-names = "hse", "hsi", "csi", "lse", "lsi"; clock-names = "hse", "hsi", "csi", "lse", "lsi";
@ -85,11 +82,3 @@
&rtc { &rtc {
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
}; };
&scmi {
status = "okay";
};
&scmi_shm {
status = "okay";
};

View File

@ -7,6 +7,7 @@
/dts-v1/; /dts-v1/;
#include "stm32mp157c-ed1.dts" #include "stm32mp157c-ed1.dts"
#include "stm32mp15-scmi.dtsi"
/ { / {
model = "STMicroelectronics STM32MP157C-ED1 SCMI eval daughter"; model = "STMicroelectronics STM32MP157C-ED1 SCMI eval daughter";
@ -59,10 +60,6 @@
resets = <&scmi_reset RST_SCMI_MCU>; resets = <&scmi_reset RST_SCMI_MCU>;
}; };
&optee {
status = "okay";
};
&rcc { &rcc {
compatible = "st,stm32mp1-rcc-secure", "syscon"; compatible = "st,stm32mp1-rcc-secure", "syscon";
clock-names = "hse", "hsi", "csi", "lse", "lsi"; clock-names = "hse", "hsi", "csi", "lse", "lsi";
@ -81,11 +78,3 @@
&rtc { &rtc {
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
}; };
&scmi {
status = "okay";
};
&scmi_shm {
status = "okay";
};

View File

@ -7,6 +7,7 @@
/dts-v1/; /dts-v1/;
#include "stm32mp157c-ev1.dts" #include "stm32mp157c-ev1.dts"
#include "stm32mp15-scmi.dtsi"
/ { / {
model = "STMicroelectronics STM32MP157C-EV1 SCMI eval daughter on eval mother"; model = "STMicroelectronics STM32MP157C-EV1 SCMI eval daughter on eval mother";
@ -68,10 +69,6 @@
resets = <&scmi_reset RST_SCMI_MCU>; resets = <&scmi_reset RST_SCMI_MCU>;
}; };
&optee {
status = "okay";
};
&rcc { &rcc {
compatible = "st,stm32mp1-rcc-secure", "syscon"; compatible = "st,stm32mp1-rcc-secure", "syscon";
clock-names = "hse", "hsi", "csi", "lse", "lsi"; clock-names = "hse", "hsi", "csi", "lse", "lsi";
@ -90,11 +87,3 @@
&rtc { &rtc {
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>; clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
}; };
&scmi {
status = "okay";
};
&scmi_shm {
status = "okay";
};

View File

@ -39,6 +39,7 @@ static int axxia_boot_secondary(unsigned int cpu, struct task_struct *idle)
return -ENOENT; return -ENOENT;
syscon = of_iomap(syscon_np, 0); syscon = of_iomap(syscon_np, 0);
of_node_put(syscon_np);
if (!syscon) if (!syscon)
return -ENOMEM; return -ENOMEM;

View File

@ -372,6 +372,7 @@ static void __init cns3xxx_init(void)
/* De-Asscer SATA Reset */ /* De-Asscer SATA Reset */
cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA)); cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA));
} }
of_node_put(dn);
dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci"); dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci");
if (of_device_is_available(dn)) { if (of_device_is_available(dn)) {
@ -385,6 +386,7 @@ static void __init cns3xxx_init(void)
cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO)); cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO));
cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO)); cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO));
} }
of_node_put(dn);
pm_power_off = cns3xxx_power_off; pm_power_off = cns3xxx_power_off;

View File

@ -149,6 +149,7 @@ static void exynos_map_pmu(void)
np = of_find_matching_node(NULL, exynos_dt_pmu_match); np = of_find_matching_node(NULL, exynos_dt_pmu_match);
if (np) if (np)
pmu_base_addr = of_iomap(np, 0); pmu_base_addr = of_iomap(np, 0);
of_node_put(np);
} }
static void __init exynos_init_irq(void) static void __init exynos_init_irq(void)

View File

@ -218,13 +218,13 @@ void __init spear_setup_of_timer(void)
irq = irq_of_parse_and_map(np, 0); irq = irq_of_parse_and_map(np, 0);
if (!irq) { if (!irq) {
pr_err("%s: No irq passed for timer via DT\n", __func__); pr_err("%s: No irq passed for timer via DT\n", __func__);
return; goto err_put_np;
} }
gpt_base = of_iomap(np, 0); gpt_base = of_iomap(np, 0);
if (!gpt_base) { if (!gpt_base) {
pr_err("%s: of iomap failed\n", __func__); pr_err("%s: of iomap failed\n", __func__);
return; goto err_put_np;
} }
gpt_clk = clk_get_sys("gpt0", NULL); gpt_clk = clk_get_sys("gpt0", NULL);
@ -239,6 +239,8 @@ void __init spear_setup_of_timer(void)
goto err_prepare_enable_clk; goto err_prepare_enable_clk;
} }
of_node_put(np);
spear_clockevent_init(irq); spear_clockevent_init(irq);
spear_clocksource_init(); spear_clocksource_init();
@ -248,4 +250,6 @@ err_prepare_enable_clk:
clk_put(gpt_clk); clk_put(gpt_clk);
err_iomap: err_iomap:
iounmap(gpt_base); iounmap(gpt_base);
err_put_np:
of_node_put(np);
} }

View File

@ -280,8 +280,8 @@
interrupts = <GIC_SPI 246 IRQ_TYPE_LEVEL_HIGH>; interrupts = <GIC_SPI 246 IRQ_TYPE_LEVEL_HIGH>;
pinctrl-names = "default"; pinctrl-names = "default";
pinctrl-0 = <&uart0_bus>; pinctrl-0 = <&uart0_bus>;
clocks = <&cmu_peri CLK_GOUT_UART0_EXT_UCLK>, clocks = <&cmu_peri CLK_GOUT_UART0_PCLK>,
<&cmu_peri CLK_GOUT_UART0_PCLK>; <&cmu_peri CLK_GOUT_UART0_EXT_UCLK>;
clock-names = "uart", "clk_uart_baud0"; clock-names = "uart", "clk_uart_baud0";
samsung,uart-fifosize = <64>; samsung,uart-fifosize = <64>;
status = "disabled"; status = "disabled";
@ -293,8 +293,8 @@
interrupts = <GIC_SPI 247 IRQ_TYPE_LEVEL_HIGH>; interrupts = <GIC_SPI 247 IRQ_TYPE_LEVEL_HIGH>;
pinctrl-names = "default"; pinctrl-names = "default";
pinctrl-0 = <&uart1_bus>; pinctrl-0 = <&uart1_bus>;
clocks = <&cmu_peri CLK_GOUT_UART1_EXT_UCLK>, clocks = <&cmu_peri CLK_GOUT_UART1_PCLK>,
<&cmu_peri CLK_GOUT_UART1_PCLK>; <&cmu_peri CLK_GOUT_UART1_EXT_UCLK>;
clock-names = "uart", "clk_uart_baud0"; clock-names = "uart", "clk_uart_baud0";
samsung,uart-fifosize = <256>; samsung,uart-fifosize = <256>;
status = "disabled"; status = "disabled";
@ -306,8 +306,8 @@
interrupts = <GIC_SPI 279 IRQ_TYPE_LEVEL_HIGH>; interrupts = <GIC_SPI 279 IRQ_TYPE_LEVEL_HIGH>;
pinctrl-names = "default"; pinctrl-names = "default";
pinctrl-0 = <&uart2_bus>; pinctrl-0 = <&uart2_bus>;
clocks = <&cmu_peri CLK_GOUT_UART2_EXT_UCLK>, clocks = <&cmu_peri CLK_GOUT_UART2_PCLK>,
<&cmu_peri CLK_GOUT_UART2_PCLK>; <&cmu_peri CLK_GOUT_UART2_EXT_UCLK>;
clock-names = "uart", "clk_uart_baud0"; clock-names = "uart", "clk_uart_baud0";
samsung,uart-fifosize = <256>; samsung,uart-fifosize = <256>;
status = "disabled"; status = "disabled";

View File

@ -79,7 +79,7 @@
}; };
}; };
soc { soc@0 {
compatible = "simple-bus"; compatible = "simple-bus";
#address-cells = <1>; #address-cells = <1>;
#size-cells = <1>; #size-cells = <1>;

View File

@ -456,13 +456,11 @@
clock-names = "clk_ahb", "clk_xin"; clock-names = "clk_ahb", "clk_xin";
mmc-ddr-1_8v; mmc-ddr-1_8v;
mmc-hs200-1_8v; mmc-hs200-1_8v;
mmc-hs400-1_8v;
ti,trm-icp = <0x2>; ti,trm-icp = <0x2>;
ti,otap-del-sel-legacy = <0x0>; ti,otap-del-sel-legacy = <0x0>;
ti,otap-del-sel-mmc-hs = <0x0>; ti,otap-del-sel-mmc-hs = <0x0>;
ti,otap-del-sel-ddr52 = <0x6>; ti,otap-del-sel-ddr52 = <0x6>;
ti,otap-del-sel-hs200 = <0x7>; ti,otap-del-sel-hs200 = <0x7>;
ti,otap-del-sel-hs400 = <0x4>;
}; };
sdhci1: mmc@fa00000 { sdhci1: mmc@fa00000 {

View File

@ -33,7 +33,7 @@
ranges; ranges;
#interrupt-cells = <3>; #interrupt-cells = <3>;
interrupt-controller; interrupt-controller;
reg = <0x00 0x01800000 0x00 0x200000>, /* GICD */ reg = <0x00 0x01800000 0x00 0x100000>, /* GICD */
<0x00 0x01900000 0x00 0x100000>, /* GICR */ <0x00 0x01900000 0x00 0x100000>, /* GICR */
<0x00 0x6f000000 0x00 0x2000>, /* GICC */ <0x00 0x6f000000 0x00 0x2000>, /* GICC */
<0x00 0x6f010000 0x00 0x1000>, /* GICH */ <0x00 0x6f010000 0x00 0x1000>, /* GICH */

View File

@ -362,11 +362,6 @@ struct kvm_vcpu_arch {
struct arch_timer_cpu timer_cpu; struct arch_timer_cpu timer_cpu;
struct kvm_pmu pmu; struct kvm_pmu pmu;
/*
* Anything that is not used directly from assembly code goes
* here.
*/
/* /*
* Guest registers we preserve during guest debugging. * Guest registers we preserve during guest debugging.
* *

View File

@ -113,6 +113,9 @@ static __always_inline bool has_vhe(void)
/* /*
* Code only run in VHE/NVHE hyp context can assume VHE is present or * Code only run in VHE/NVHE hyp context can assume VHE is present or
* absent. Otherwise fall back to caps. * absent. Otherwise fall back to caps.
* This allows the compiler to discard VHE-specific code from the
* nVHE object, reducing the number of external symbol references
* needed to link.
*/ */
if (is_vhe_hyp_code()) if (is_vhe_hyp_code())
return true; return true;

View File

@ -1974,15 +1974,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
#ifdef CONFIG_KVM #ifdef CONFIG_KVM
static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
{ {
if (kvm_get_mode() != KVM_MODE_PROTECTED) return kvm_get_mode() == KVM_MODE_PROTECTED;
return false;
if (is_kernel_in_hyp_mode()) {
pr_warn("Protected KVM not available with VHE\n");
return false;
}
return true;
} }
#endif /* CONFIG_KVM */ #endif /* CONFIG_KVM */
@ -3109,7 +3101,6 @@ void cpu_set_feature(unsigned int num)
WARN_ON(num >= MAX_CPU_FEATURES); WARN_ON(num >= MAX_CPU_FEATURES);
elf_hwcap |= BIT(num); elf_hwcap |= BIT(num);
} }
EXPORT_SYMBOL_GPL(cpu_set_feature);
bool cpu_have_feature(unsigned int num) bool cpu_have_feature(unsigned int num)
{ {

View File

@ -102,7 +102,6 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
* x19-x29 per the AAPCS, and we created frame records upon entry, so we need * x19-x29 per the AAPCS, and we created frame records upon entry, so we need
* to restore x0-x8, x29, and x30. * to restore x0-x8, x29, and x30.
*/ */
ftrace_common_return:
/* Restore function arguments */ /* Restore function arguments */
ldp x0, x1, [sp] ldp x0, x1, [sp]
ldp x2, x3, [sp, #S_X2] ldp x2, x3, [sp, #S_X2]

View File

@ -77,6 +77,66 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
return NULL; return NULL;
} }
/*
* Find the address the callsite must branch to in order to reach '*addr'.
*
* Due to the limited range of 'BL' instructions, modules may be placed too far
* away to branch directly and must use a PLT.
*
* Returns true when '*addr' contains a reachable target address, or has been
* modified to contain a PLT address. Returns false otherwise.
*/
static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
struct module *mod,
unsigned long *addr)
{
unsigned long pc = rec->ip;
long offset = (long)*addr - (long)pc;
struct plt_entry *plt;
/*
* When the target is within range of the 'BL' instruction, use 'addr'
* as-is and branch to that directly.
*/
if (offset >= -SZ_128M && offset < SZ_128M)
return true;
/*
* When the target is outside of the range of a 'BL' instruction, we
* must use a PLT to reach it. We can only place PLTs for modules, and
* only when module PLT support is built-in.
*/
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
return false;
/*
* 'mod' is only set at module load time, but if we end up
* dealing with an out-of-range condition, we can assume it
* is due to a module being loaded far away from the kernel.
*
* NOTE: __module_text_address() must be called with preemption
* disabled, but we can rely on ftrace_lock to ensure that 'mod'
* retains its validity throughout the remainder of this code.
*/
if (!mod) {
preempt_disable();
mod = __module_text_address(pc);
preempt_enable();
}
if (WARN_ON(!mod))
return false;
plt = get_ftrace_plt(mod, *addr);
if (!plt) {
pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
return false;
}
*addr = (unsigned long)plt;
return true;
}
/* /*
* Turn on the call to ftrace_caller() in instrumented function * Turn on the call to ftrace_caller() in instrumented function
*/ */
@ -84,40 +144,9 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{ {
unsigned long pc = rec->ip; unsigned long pc = rec->ip;
u32 old, new; u32 old, new;
long offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) { if (!ftrace_find_callable_addr(rec, NULL, &addr))
struct module *mod; return -EINVAL;
struct plt_entry *plt;
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
return -EINVAL;
/*
* On kernels that support module PLTs, the offset between the
* branch instruction and its target may legally exceed the
* range of an ordinary relative 'bl' opcode. In this case, we
* need to branch via a trampoline in the module.
*
* NOTE: __module_text_address() must be called with preemption
* disabled, but we can rely on ftrace_lock to ensure that 'mod'
* retains its validity throughout the remainder of this code.
*/
preempt_disable();
mod = __module_text_address(pc);
preempt_enable();
if (WARN_ON(!mod))
return -EINVAL;
plt = get_ftrace_plt(mod, addr);
if (!plt) {
pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
return -EINVAL;
}
addr = (unsigned long)plt;
}
old = aarch64_insn_gen_nop(); old = aarch64_insn_gen_nop();
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
@ -132,6 +161,11 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long pc = rec->ip; unsigned long pc = rec->ip;
u32 old, new; u32 old, new;
if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
return -EINVAL;
if (!ftrace_find_callable_addr(rec, NULL, &addr))
return -EINVAL;
old = aarch64_insn_gen_branch_imm(pc, old_addr, old = aarch64_insn_gen_branch_imm(pc, old_addr,
AARCH64_INSN_BRANCH_LINK); AARCH64_INSN_BRANCH_LINK);
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
@ -181,54 +215,15 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr) unsigned long addr)
{ {
unsigned long pc = rec->ip; unsigned long pc = rec->ip;
bool validate = true;
u32 old = 0, new; u32 old = 0, new;
long offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) { if (!ftrace_find_callable_addr(rec, mod, &addr))
u32 replaced; return -EINVAL;
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
return -EINVAL;
/*
* 'mod' is only set at module load time, but if we end up
* dealing with an out-of-range condition, we can assume it
* is due to a module being loaded far away from the kernel.
*/
if (!mod) {
preempt_disable();
mod = __module_text_address(pc);
preempt_enable();
if (WARN_ON(!mod))
return -EINVAL;
}
/*
* The instruction we are about to patch may be a branch and
* link instruction that was redirected via a PLT entry. In
* this case, the normal validation will fail, but we can at
* least check that we are dealing with a branch and link
* instruction that points into the right module.
*/
if (aarch64_insn_read((void *)pc, &replaced))
return -EFAULT;
if (!aarch64_insn_is_bl(replaced) ||
!within_module(pc + aarch64_get_branch_offset(replaced),
mod))
return -EINVAL;
validate = false;
} else {
old = aarch64_insn_gen_branch_imm(pc, addr,
AARCH64_INSN_BRANCH_LINK);
}
old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
new = aarch64_insn_gen_nop(); new = aarch64_insn_gen_nop();
return ftrace_modify_code(pc, old, new, validate); return ftrace_modify_code(pc, old, new, true);
} }
void arch_ftrace_update_code(int command) void arch_ftrace_update_code(int command)

View File

@ -303,14 +303,13 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
early_fixmap_init(); early_fixmap_init();
early_ioremap_init(); early_ioremap_init();
/*
* Initialise the static keys early as they may be enabled by the
* cpufeature code, early parameters, and DT setup.
*/
jump_label_init();
setup_machine_fdt(__fdt_pointer); setup_machine_fdt(__fdt_pointer);
/*
* Initialise the static keys early as they may be enabled by the
* cpufeature code and early parameters.
*/
jump_label_init();
parse_early_param(); parse_early_param();
/* /*

View File

@ -1230,6 +1230,9 @@ bool kvm_arch_timer_get_input_level(int vintid)
struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
struct arch_timer_context *timer; struct arch_timer_context *timer;
if (WARN(!vcpu, "No vcpu context!\n"))
return false;
if (vintid == vcpu_vtimer(vcpu)->irq.irq) if (vintid == vcpu_vtimer(vcpu)->irq.irq)
timer = vcpu_vtimer(vcpu); timer = vcpu_vtimer(vcpu);
else if (vintid == vcpu_ptimer(vcpu)->irq.irq) else if (vintid == vcpu_ptimer(vcpu)->irq.irq)

View File

@ -150,8 +150,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret) if (ret)
goto out_free_stage2_pgd; goto out_free_stage2_pgd;
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
ret = -ENOMEM;
goto out_free_stage2_pgd; goto out_free_stage2_pgd;
}
cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
kvm_vgic_early_init(kvm); kvm_vgic_early_init(kvm);
@ -2110,11 +2112,11 @@ static int finalize_hyp_mode(void)
return 0; return 0;
/* /*
* Exclude HYP BSS from kmemleak so that it doesn't get peeked * Exclude HYP sections from kmemleak so that they don't get peeked
* at, which would end badly once the section is inaccessible. * at, which would end badly once inaccessible.
* None of other sections should ever be introspected.
*/ */
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size);
return pkvm_drop_host_privileges(); return pkvm_drop_host_privileges();
} }
@ -2271,7 +2273,11 @@ static int __init early_kvm_mode_cfg(char *arg)
return -EINVAL; return -EINVAL;
if (strcmp(arg, "protected") == 0) { if (strcmp(arg, "protected") == 0) {
kvm_mode = KVM_MODE_PROTECTED; if (!is_kernel_in_hyp_mode())
kvm_mode = KVM_MODE_PROTECTED;
else
pr_warn_once("Protected KVM not available with VHE\n");
return 0; return 0;
} }

View File

@ -80,6 +80,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
vcpu->arch.flags |= KVM_ARM64_FP_HOST; vcpu->arch.flags |= KVM_ARM64_FP_HOST;
vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED;
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
@ -93,6 +94,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* operations. Do this for ZA as well for now for simplicity. * operations. Do this for ZA as well for now for simplicity.
*/ */
if (system_supports_sme()) { if (system_supports_sme()) {
vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED;
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;

View File

@ -314,15 +314,11 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
enum kvm_pgtable_prot prot) enum kvm_pgtable_prot prot)
{ {
hyp_assert_lock_held(&host_kvm.lock);
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
} }
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
{ {
hyp_assert_lock_held(&host_kvm.lock);
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
addr, size, &host_s2_pool, owner_id); addr, size, &host_s2_pool, owner_id);
} }

View File

@ -243,15 +243,9 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
case SYS_ID_AA64MMFR2_EL1: case SYS_ID_AA64MMFR2_EL1:
return get_pvm_id_aa64mmfr2(vcpu); return get_pvm_id_aa64mmfr2(vcpu);
default: default:
/* /* Unhandled ID register, RAZ */
* Should never happen because all cases are covered in return 0;
* pvm_sys_reg_descs[].
*/
WARN_ON(1);
break;
} }
return 0;
} }
static u64 read_id_reg(const struct kvm_vcpu *vcpu, static u64 read_id_reg(const struct kvm_vcpu *vcpu,
@ -332,6 +326,16 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
/* Mark the specified system register as an AArch64 feature id register. */ /* Mark the specified system register as an AArch64 feature id register. */
#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 } #define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
/*
* sys_reg_desc initialiser for architecturally unallocated cpufeature ID
* register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
* (1 <= crm < 8, 0 <= Op2 < 8).
*/
#define ID_UNALLOCATED(crm, op2) { \
Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
.access = pvm_access_id_aarch64, \
}
/* Mark the specified system register as Read-As-Zero/Write-Ignored */ /* Mark the specified system register as Read-As-Zero/Write-Ignored */
#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi } #define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
@ -375,24 +379,46 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
AARCH32(SYS_MVFR0_EL1), AARCH32(SYS_MVFR0_EL1),
AARCH32(SYS_MVFR1_EL1), AARCH32(SYS_MVFR1_EL1),
AARCH32(SYS_MVFR2_EL1), AARCH32(SYS_MVFR2_EL1),
ID_UNALLOCATED(3,3),
AARCH32(SYS_ID_PFR2_EL1), AARCH32(SYS_ID_PFR2_EL1),
AARCH32(SYS_ID_DFR1_EL1), AARCH32(SYS_ID_DFR1_EL1),
AARCH32(SYS_ID_MMFR5_EL1), AARCH32(SYS_ID_MMFR5_EL1),
ID_UNALLOCATED(3,7),
/* AArch64 ID registers */ /* AArch64 ID registers */
/* CRm=4 */ /* CRm=4 */
AARCH64(SYS_ID_AA64PFR0_EL1), AARCH64(SYS_ID_AA64PFR0_EL1),
AARCH64(SYS_ID_AA64PFR1_EL1), AARCH64(SYS_ID_AA64PFR1_EL1),
ID_UNALLOCATED(4,2),
ID_UNALLOCATED(4,3),
AARCH64(SYS_ID_AA64ZFR0_EL1), AARCH64(SYS_ID_AA64ZFR0_EL1),
ID_UNALLOCATED(4,5),
ID_UNALLOCATED(4,6),
ID_UNALLOCATED(4,7),
AARCH64(SYS_ID_AA64DFR0_EL1), AARCH64(SYS_ID_AA64DFR0_EL1),
AARCH64(SYS_ID_AA64DFR1_EL1), AARCH64(SYS_ID_AA64DFR1_EL1),
ID_UNALLOCATED(5,2),
ID_UNALLOCATED(5,3),
AARCH64(SYS_ID_AA64AFR0_EL1), AARCH64(SYS_ID_AA64AFR0_EL1),
AARCH64(SYS_ID_AA64AFR1_EL1), AARCH64(SYS_ID_AA64AFR1_EL1),
ID_UNALLOCATED(5,6),
ID_UNALLOCATED(5,7),
AARCH64(SYS_ID_AA64ISAR0_EL1), AARCH64(SYS_ID_AA64ISAR0_EL1),
AARCH64(SYS_ID_AA64ISAR1_EL1), AARCH64(SYS_ID_AA64ISAR1_EL1),
AARCH64(SYS_ID_AA64ISAR2_EL1),
ID_UNALLOCATED(6,3),
ID_UNALLOCATED(6,4),
ID_UNALLOCATED(6,5),
ID_UNALLOCATED(6,6),
ID_UNALLOCATED(6,7),
AARCH64(SYS_ID_AA64MMFR0_EL1), AARCH64(SYS_ID_AA64MMFR0_EL1),
AARCH64(SYS_ID_AA64MMFR1_EL1), AARCH64(SYS_ID_AA64MMFR1_EL1),
AARCH64(SYS_ID_AA64MMFR2_EL1), AARCH64(SYS_ID_AA64MMFR2_EL1),
ID_UNALLOCATED(7,3),
ID_UNALLOCATED(7,4),
ID_UNALLOCATED(7,5),
ID_UNALLOCATED(7,6),
ID_UNALLOCATED(7,7),
/* Scalable Vector Registers are restricted. */ /* Scalable Vector Registers are restricted. */

View File

@ -429,11 +429,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
VGIC_ACCESS_32bit), VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
vgic_mmio_read_pending, vgic_mmio_write_spending, vgic_mmio_read_pending, vgic_mmio_write_spending,
NULL, vgic_uaccess_write_spending, 1, vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1,
VGIC_ACCESS_32bit), VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
vgic_mmio_read_pending, vgic_mmio_write_cpending, vgic_mmio_read_pending, vgic_mmio_write_cpending,
NULL, vgic_uaccess_write_cpending, 1, vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1,
VGIC_ACCESS_32bit), VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
vgic_mmio_read_active, vgic_mmio_write_sactive, vgic_mmio_read_active, vgic_mmio_write_sactive,

View File

@ -353,42 +353,6 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
return 0; return 0;
} }
static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
u32 value = 0;
int i;
/*
* pending state of interrupt is latched in pending_latch variable.
* Userspace will save and restore pending state and line_level
* separately.
* Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
* for handling of ISPENDR and ICPENDR.
*/
for (i = 0; i < len * 8; i++) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
bool state = irq->pending_latch;
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
int err;
err = irq_get_irqchip_state(irq->host_irq,
IRQCHIP_STATE_PENDING,
&state);
WARN_ON(err);
}
if (state)
value |= (1U << i);
vgic_put_irq(vcpu->kvm, irq);
}
return value;
}
static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len, gpa_t addr, unsigned int len,
unsigned long val) unsigned long val)
@ -666,7 +630,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
VGIC_ACCESS_32bit), VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
vgic_mmio_read_pending, vgic_mmio_write_spending, vgic_mmio_read_pending, vgic_mmio_write_spending,
vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
VGIC_ACCESS_32bit), VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
vgic_mmio_read_pending, vgic_mmio_write_cpending, vgic_mmio_read_pending, vgic_mmio_write_cpending,
@ -750,7 +714,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
VGIC_ACCESS_32bit), VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0, REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_spending, vgic_mmio_read_pending, vgic_mmio_write_spending,
vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
VGIC_ACCESS_32bit), VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0, REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_cpending, vgic_mmio_read_pending, vgic_mmio_write_cpending,

View File

@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
return 0; return 0;
} }
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, static unsigned long __read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len) gpa_t addr, unsigned int len,
bool is_user)
{ {
u32 intid = VGIC_ADDR_TO_INTID(addr, 1); u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
u32 value = 0; u32 value = 0;
@ -239,6 +240,15 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
unsigned long flags; unsigned long flags;
bool val; bool val;
/*
* When used from userspace with a GICv3 model:
*
* Pending state of interrupt is latched in pending_latch
* variable. Userspace will save and restore pending state
* and line_level separately.
* Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
* for handling of ISPENDR and ICPENDR.
*/
raw_spin_lock_irqsave(&irq->irq_lock, flags); raw_spin_lock_irqsave(&irq->irq_lock, flags);
if (irq->hw && vgic_irq_is_sgi(irq->intid)) { if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
int err; int err;
@ -248,10 +258,20 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
IRQCHIP_STATE_PENDING, IRQCHIP_STATE_PENDING,
&val); &val);
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
} else if (vgic_irq_is_mapped_level(irq)) { } else if (!is_user && vgic_irq_is_mapped_level(irq)) {
val = vgic_get_phys_line_level(irq); val = vgic_get_phys_line_level(irq);
} else { } else {
val = irq_is_pending(irq); switch (vcpu->kvm->arch.vgic.vgic_model) {
case KVM_DEV_TYPE_ARM_VGIC_V3:
if (is_user) {
val = irq->pending_latch;
break;
}
fallthrough;
default:
val = irq_is_pending(irq);
break;
}
} }
value |= ((u32)val << i); value |= ((u32)val << i);
@ -263,6 +283,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
return value; return value;
} }
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
return __read_pending(vcpu, addr, len, false);
}
unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
return __read_pending(vcpu, addr, len, true);
}
static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{ {
return (vgic_irq_is_sgi(irq->intid) && return (vgic_irq_is_sgi(irq->intid) &&

View File

@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len); gpa_t addr, unsigned int len);
unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len, gpa_t addr, unsigned int len,
unsigned long val); unsigned long val);

View File

@ -66,7 +66,7 @@ static void flush_context(void)
* the next context-switch, we broadcast TLB flush + I-cache * the next context-switch, we broadcast TLB flush + I-cache
* invalidation over the inner shareable domain on rollover. * invalidation over the inner shareable domain on rollover.
*/ */
kvm_call_hyp(__kvm_flush_vm_context); kvm_call_hyp(__kvm_flush_vm_context);
} }
static bool check_update_reserved_vmid(u64 vmid, u64 newvmid) static bool check_update_reserved_vmid(u64 vmid, u64 newvmid)

View File

@ -218,8 +218,6 @@ SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area)
*/ */
SYM_FUNC_START(__pi___dma_map_area) SYM_FUNC_START(__pi___dma_map_area)
add x1, x0, x1 add x1, x0, x1
cmp w2, #DMA_FROM_DEVICE
b.eq __pi_dcache_inval_poc
b __pi_dcache_clean_poc b __pi_dcache_clean_poc
SYM_FUNC_END(__pi___dma_map_area) SYM_FUNC_END(__pi___dma_map_area)
SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area) SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area)

View File

@ -12,10 +12,9 @@ static inline unsigned long exception_era(struct pt_regs *regs)
return regs->csr_era; return regs->csr_era;
} }
static inline int compute_return_era(struct pt_regs *regs) static inline void compute_return_era(struct pt_regs *regs)
{ {
regs->csr_era += 4; regs->csr_era += 4;
return 0;
} }
#endif /* _ASM_BRANCH_H */ #endif /* _ASM_BRANCH_H */

View File

@ -426,6 +426,11 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
#define kern_addr_valid(addr) (1) #define kern_addr_valid(addr) (1)
static inline unsigned long pmd_pfn(pmd_t pmd)
{
return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* We don't have hardware dirty/accessed bits, generic_pmdp_establish is fine.*/ /* We don't have hardware dirty/accessed bits, generic_pmdp_establish is fine.*/
@ -497,11 +502,6 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
return pmd; return pmd;
} }
static inline unsigned long pmd_pfn(pmd_t pmd)
{
return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT;
}
static inline struct page *pmd_page(pmd_t pmd) static inline struct page *pmd_page(pmd_t pmd)
{ {
if (pmd_trans_huge(pmd)) if (pmd_trans_huge(pmd))

View File

@ -263,7 +263,7 @@ void cpu_probe(void)
c->cputype = CPU_UNKNOWN; c->cputype = CPU_UNKNOWN;
c->processor_id = read_cpucfg(LOONGARCH_CPUCFG0); c->processor_id = read_cpucfg(LOONGARCH_CPUCFG0);
c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) >> 3) & 0x3; c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) & CPUCFG2_FPVERS) >> 3;
c->fpu_csr0 = FPU_CSR_RN; c->fpu_csr0 = FPU_CSR_RN;
c->fpu_mask = FPU_CSR_RSVD; c->fpu_mask = FPU_CSR_RSVD;

View File

@ -14,8 +14,6 @@
__REF __REF
SYM_ENTRY(_stext, SYM_L_GLOBAL, SYM_A_NONE)
SYM_CODE_START(kernel_entry) # kernel entry point SYM_CODE_START(kernel_entry) # kernel entry point
/* Config direct window and set PG */ /* Config direct window and set PG */

View File

@ -475,8 +475,7 @@ asmlinkage void noinstr do_ri(struct pt_regs *regs)
die_if_kernel("Reserved instruction in kernel code", regs); die_if_kernel("Reserved instruction in kernel code", regs);
if (unlikely(compute_return_era(regs) < 0)) compute_return_era(regs);
goto out;
if (unlikely(get_user(opcode, era) < 0)) { if (unlikely(get_user(opcode, era) < 0)) {
status = SIGSEGV; status = SIGSEGV;

View File

@ -37,6 +37,7 @@ SECTIONS
HEAD_TEXT_SECTION HEAD_TEXT_SECTION
. = ALIGN(PECOFF_SEGMENT_ALIGN); . = ALIGN(PECOFF_SEGMENT_ALIGN);
_stext = .;
.text : { .text : {
TEXT_TEXT TEXT_TEXT
SCHED_TEXT SCHED_TEXT
@ -101,6 +102,7 @@ SECTIONS
STABS_DEBUG STABS_DEBUG
DWARF_DEBUG DWARF_DEBUG
ELF_DETAILS
.gptab.sdata : { .gptab.sdata : {
*(.gptab.data) *(.gptab.data)

View File

@ -281,15 +281,16 @@ void setup_tlb_handler(int cpu)
if (pcpu_handlers[cpu]) if (pcpu_handlers[cpu])
return; return;
page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, get_order(vec_sz)); page = alloc_pages_node(cpu_to_node(cpu), GFP_ATOMIC, get_order(vec_sz));
if (!page) if (!page)
return; return;
addr = page_address(page); addr = page_address(page);
pcpu_handlers[cpu] = virt_to_phys(addr); pcpu_handlers[cpu] = (unsigned long)addr;
memcpy((void *)addr, (void *)eentry, vec_sz); memcpy((void *)addr, (void *)eentry, vec_sz);
local_flush_icache_range((unsigned long)addr, (unsigned long)addr + vec_sz); local_flush_icache_range((unsigned long)addr, (unsigned long)addr + vec_sz);
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_TLBRENTRY); csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY);
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY);
csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY); csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY);
} }
#endif #endif

View File

@ -111,8 +111,9 @@
clocks = <&cgu X1000_CLK_RTCLK>, clocks = <&cgu X1000_CLK_RTCLK>,
<&cgu X1000_CLK_EXCLK>, <&cgu X1000_CLK_EXCLK>,
<&cgu X1000_CLK_PCLK>; <&cgu X1000_CLK_PCLK>,
clock-names = "rtc", "ext", "pclk"; <&cgu X1000_CLK_TCU>;
clock-names = "rtc", "ext", "pclk", "tcu";
interrupt-controller; interrupt-controller;
#interrupt-cells = <1>; #interrupt-cells = <1>;

View File

@ -104,8 +104,9 @@
clocks = <&cgu X1830_CLK_RTCLK>, clocks = <&cgu X1830_CLK_RTCLK>,
<&cgu X1830_CLK_EXCLK>, <&cgu X1830_CLK_EXCLK>,
<&cgu X1830_CLK_PCLK>; <&cgu X1830_CLK_PCLK>,
clock-names = "rtc", "ext", "pclk"; <&cgu X1830_CLK_TCU>;
clock-names = "rtc", "ext", "pclk", "tcu";
interrupt-controller; interrupt-controller;
#interrupt-cells = <1>; #interrupt-cells = <1>;

View File

@ -44,6 +44,7 @@ static __init unsigned int ranchu_measure_hpt_freq(void)
__func__); __func__);
rtc_base = of_iomap(np, 0); rtc_base = of_iomap(np, 0);
of_node_put(np);
if (!rtc_base) if (!rtc_base)
panic("%s(): Failed to ioremap Goldfish RTC base!", __func__); panic("%s(): Failed to ioremap Goldfish RTC base!", __func__);

View File

@ -208,6 +208,12 @@ void __init ltq_soc_init(void)
of_address_to_resource(np_sysgpe, 0, &res_sys[2])) of_address_to_resource(np_sysgpe, 0, &res_sys[2]))
panic("Failed to get core resources"); panic("Failed to get core resources");
of_node_put(np_status);
of_node_put(np_ebu);
of_node_put(np_sys1);
of_node_put(np_syseth);
of_node_put(np_sysgpe);
if ((request_mem_region(res_status.start, resource_size(&res_status), if ((request_mem_region(res_status.start, resource_size(&res_status),
res_status.name) < 0) || res_status.name) < 0) ||
(request_mem_region(res_ebu.start, resource_size(&res_ebu), (request_mem_region(res_ebu.start, resource_size(&res_ebu),

View File

@ -408,6 +408,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
if (!ltq_eiu_membase) if (!ltq_eiu_membase)
panic("Failed to remap eiu memory"); panic("Failed to remap eiu memory");
} }
of_node_put(eiu_node);
return 0; return 0;
} }

View File

@ -441,6 +441,10 @@ void __init ltq_soc_init(void)
of_address_to_resource(np_ebu, 0, &res_ebu)) of_address_to_resource(np_ebu, 0, &res_ebu))
panic("Failed to get core resources"); panic("Failed to get core resources");
of_node_put(np_pmu);
of_node_put(np_cgu);
of_node_put(np_ebu);
if (!request_mem_region(res_pmu.start, resource_size(&res_pmu), if (!request_mem_region(res_pmu.start, resource_size(&res_pmu),
res_pmu.name) || res_pmu.name) ||
!request_mem_region(res_cgu.start, resource_size(&res_cgu), !request_mem_region(res_cgu.start, resource_size(&res_cgu),

View File

@ -214,6 +214,8 @@ static void update_gic_frequency_dt(void)
if (of_update_property(node, &gic_frequency_prop) < 0) if (of_update_property(node, &gic_frequency_prop) < 0)
pr_err("error updating gic frequency property\n"); pr_err("error updating gic frequency property\n");
of_node_put(node);
} }
#endif #endif

View File

@ -98,13 +98,18 @@ static int __init pic32_of_prepare_platform_data(struct of_dev_auxdata *lookup)
np = of_find_compatible_node(NULL, NULL, lookup->compatible); np = of_find_compatible_node(NULL, NULL, lookup->compatible);
if (np) { if (np) {
lookup->name = (char *)np->name; lookup->name = (char *)np->name;
if (lookup->phys_addr) if (lookup->phys_addr) {
of_node_put(np);
continue; continue;
}
if (!of_address_to_resource(np, 0, &res)) if (!of_address_to_resource(np, 0, &res))
lookup->phys_addr = res.start; lookup->phys_addr = res.start;
of_node_put(np);
} }
} }
of_node_put(root);
return 0; return 0;
} }

View File

@ -32,6 +32,9 @@ static unsigned int pic32_xlate_core_timer_irq(void)
goto default_map; goto default_map;
irq = irq_of_parse_and_map(node, 0); irq = irq_of_parse_and_map(node, 0);
of_node_put(node);
if (!irq) if (!irq)
goto default_map; goto default_map;

View File

@ -40,6 +40,8 @@ __iomem void *plat_of_remap_node(const char *node)
if (of_address_to_resource(np, 0, &res)) if (of_address_to_resource(np, 0, &res))
panic("Failed to get resource for %s", node); panic("Failed to get resource for %s", node);
of_node_put(np);
if (!request_mem_region(res.start, if (!request_mem_region(res.start,
resource_size(&res), resource_size(&res),
res.name)) res.name))

View File

@ -640,8 +640,6 @@ static int icu_get_irq(unsigned int irq)
printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2); printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2);
atomic_inc(&irq_err_count);
return -1; return -1;
} }

View File

@ -10,6 +10,7 @@ config PARISC
select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_FRAME_POINTERS
select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_PTE_SPECIAL
select ARCH_NO_SG_CHAIN select ARCH_NO_SG_CHAIN

Some files were not shown because too many files have changed in this diff Show More