dma-mapping updates for Linux 6.4

- fix a PageHighMem check in dma-coherent initialization (Doug Berger)
  - clean up the coherency defaul initialiation (Jiaxun Yang)
  - add cacheline to user/kernel dma-debug space dump messages
    (Desnes Nunes, Geert Uytterhoeve)
  - swiotlb statistics improvements (Michael Kelley)
  - misc cleanups (Petr Tesarik)
 -----BEGIN PGP SIGNATURE-----
 
 iQI/BAABCgApFiEEgdbnc3r/njty3Iq9D55TZVIEUYMFAmRLYsoLHGhjaEBsc3Qu
 ZGUACgkQD55TZVIEUYP4+RAAwpIqI198CrPxodCuBdwetuxznwncdwFvU3W+NQLF
 cC5gDeUB2ZZevVh3moKITV7gXHrbTJF7jQs9jpWV0QEA5APzu0WDf3Y0m4sXPVpn
 E9jS3jGJyntZ9rIMzHFs/lguI37xzT1YRAHAYgoZ84b7K/9g94NgEE2HecfNKVqZ
 D6PN0UJcA4KQo+5UJ7MWiQxWM3QAwVfSKsP1mXv51tiRGo4UUzNW77Ej2nKRJjhK
 wDNiZ+08khfeS2BuF9J2ebAzpgma5EgweH2z7zmx8Ch5t4Cx6hVAQ4Z6axbZMGjP
 HxXPw5rIwZTnQYoaGU86BrxrFH2j2bb963kWoDzliH+4PQrJ/iIEpkF7vu5Y2oWr
 WtXdOo6CsdQh1rT1UWA87ZYDtkWgj3/ITv5xJrXf8VyD9WHHSPst616XHLzBLGzo
 Hc+lAPhnVm59XZhQbVgXZy37Eqa9qHEG6GIRUkwD13nttSSfLfizO0IlXlH+awQV
 2A+TjbAt2lneUaRzMPfxG/yFt3rPqbBfSWj3o2ClPPn9sKksKxj7IjNW0v81Ztq/
 H6UmYRuq+wlQJzlwiF8+6SzoBXObztrmtIa2ipiM5k+xePG1jsPGFLm98UMlPcxN
 5IMz78DQ/hE3K3fKRt6clImd98xq5R0H9iUQPor2I7C/67fpTjThDRdHDUina1tk
 Oxo=
 =vAit
 -----END PGP SIGNATURE-----

Merge tag 'dma-mapping-6.4-2023-04-28' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig:

 - fix a PageHighMem check in dma-coherent initialization (Doug Berger)

 - clean up the coherency defaul initialiation (Jiaxun Yang)

 - add cacheline to user/kernel dma-debug space dump messages (Desnes
   Nunes, Geert Uytterhoeve)

 - swiotlb statistics improvements (Michael Kelley)

 - misc cleanups (Petr Tesarik)

* tag 'dma-mapping-6.4-2023-04-28' of git://git.infradead.org/users/hch/dma-mapping:
  swiotlb: Omit total_used and used_hiwater if !CONFIG_DEBUG_FS
  swiotlb: track and report io_tlb_used high water marks in debugfs
  swiotlb: fix debugfs reporting of reserved memory pools
  swiotlb: relocate PageHighMem test away from rmem_swiotlb_setup
  of: address: always use dma_default_coherent for default coherency
  dma-mapping: provide CONFIG_ARCH_DMA_DEFAULT_COHERENT
  dma-mapping: provide a fallback dma_default_coherent
  dma-debug: Use %pa to format phys_addr_t
  dma-debug: add cacheline to user/kernel space dump messages
  dma-debug: small dma_debug_entry's comment and variable name updates
  dma-direct: cleanup parameters to dma_direct_optimal_gfp_mask
This commit is contained in:
Linus Torvalds 2023-04-29 10:29:57 -07:00
commit b28e6315a0
11 changed files with 189 additions and 88 deletions

View File

@ -124,6 +124,7 @@ config PPC
#
select ARCH_32BIT_OFF_T if PPC32
select ARCH_DISABLE_KASAN_INLINE if PPC_RADIX_MMU
select ARCH_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_COPY_MC if PPC64
@ -287,7 +288,6 @@ config PPC
select NEED_PER_CPU_PAGE_FIRST_CHUNK if PPC64
select NEED_SG_DMA_LENGTH
select OF
select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
select OF_EARLY_FLATTREE
select OLD_SIGACTION if PPC32
select OLD_SIGSUSPEND

View File

@ -12,6 +12,7 @@ config 32BIT
config RISCV
def_bool y
select ARCH_DMA_DEFAULT_COHERENT
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
@ -127,7 +128,6 @@ config RISCV
select MODULES_USE_ELF_RELA if MODULES
select MODULE_SECTIONS if MODULES
select OF
select OF_DMA_DEFAULT_COHERENT
select OF_EARLY_FLATTREE
select OF_IRQ
select PCI_DOMAINS_GENERIC if PCI

View File

@ -102,8 +102,4 @@ config OF_OVERLAY
config OF_NUMA
bool
config OF_DMA_DEFAULT_COHERENT
# arches should select this if DMA is coherent by default for OF devices
bool
endif # OF

View File

@ -1037,7 +1037,7 @@ phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np)
bool of_dma_is_coherent(struct device_node *np)
{
struct device_node *node;
bool is_coherent = IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT);
bool is_coherent = dma_default_coherent;
node = of_node_get(np);

View File

@ -269,6 +269,8 @@ static inline bool dev_is_dma_coherent(struct device *dev)
return dev->dma_coherent;
}
#else
#define dma_default_coherent true
static inline bool dev_is_dma_coherent(struct device *dev)
{
return true;

View File

@ -87,6 +87,11 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
* @for_alloc: %true if the pool is used for memory allocation
* @nareas: The area number in the pool.
* @area_nslabs: The slot number in the area.
* @total_used: The total number of slots in the pool that are currently used
* across all areas. Used only for calculating used_hiwater in
* debugfs.
* @used_hiwater: The high water mark for total_used. Used only for reporting
* in debugfs.
*/
struct io_tlb_mem {
phys_addr_t start;
@ -102,6 +107,10 @@ struct io_tlb_mem {
unsigned int area_nslabs;
struct io_tlb_area *areas;
struct io_tlb_slot *slots;
#ifdef CONFIG_DEBUG_FS
atomic_long_t total_used;
atomic_long_t used_hiwater;
#endif
};
extern struct io_tlb_mem io_tlb_default_mem;

View File

@ -76,6 +76,13 @@ config ARCH_HAS_DMA_PREP_COHERENT
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
#
# Select this option if the architecture assumes DMA devices are coherent
# by default.
#
config ARCH_DMA_DEFAULT_COHERENT
bool
config SWIOTLB
bool
select NEED_DMA_MAP_STATE

View File

@ -53,6 +53,7 @@ enum map_err_types {
* struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
* @list: node on pre-allocated free_entries list
* @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
* @dev_addr: dma address
* @size: length of the mapping
* @type: single, page, sg, coherent
* @direction: enum dma_data_direction
@ -395,37 +396,6 @@ static unsigned long long phys_addr(struct dma_debug_entry *entry)
return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
}
/*
* Dump mapping entries for debugging purposes
*/
void debug_dma_dump_mappings(struct device *dev)
{
int idx;
for (idx = 0; idx < HASH_SIZE; idx++) {
struct hash_bucket *bucket = &dma_entry_hash[idx];
struct dma_debug_entry *entry;
unsigned long flags;
spin_lock_irqsave(&bucket->lock, flags);
list_for_each_entry(entry, &bucket->list, list) {
if (!dev || dev == entry->dev) {
dev_info(entry->dev,
"%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
type2name[entry->type], idx,
phys_addr(entry), entry->pfn,
entry->dev_addr, entry->size,
dir2name[entry->direction],
maperr2str[entry->map_err_type]);
}
}
spin_unlock_irqrestore(&bucket->lock, flags);
cond_resched();
}
}
/*
* For each mapping (initial cacheline in the case of
* dma_alloc_coherent/dma_map_page, initial cacheline in each page of a
@ -546,6 +516,70 @@ static void active_cacheline_remove(struct dma_debug_entry *entry)
spin_unlock_irqrestore(&radix_lock, flags);
}
/*
* Dump mappings entries on kernel space for debugging purposes
*/
void debug_dma_dump_mappings(struct device *dev)
{
int idx;
phys_addr_t cln;
for (idx = 0; idx < HASH_SIZE; idx++) {
struct hash_bucket *bucket = &dma_entry_hash[idx];
struct dma_debug_entry *entry;
unsigned long flags;
spin_lock_irqsave(&bucket->lock, flags);
list_for_each_entry(entry, &bucket->list, list) {
if (!dev || dev == entry->dev) {
cln = to_cacheline_number(entry);
dev_info(entry->dev,
"%s idx %d P=%llx N=%lx D=%llx L=%llx cln=%pa %s %s\n",
type2name[entry->type], idx,
phys_addr(entry), entry->pfn,
entry->dev_addr, entry->size,
&cln, dir2name[entry->direction],
maperr2str[entry->map_err_type]);
}
}
spin_unlock_irqrestore(&bucket->lock, flags);
cond_resched();
}
}
/*
* Dump mappings entries on user space via debugfs
*/
static int dump_show(struct seq_file *seq, void *v)
{
int idx;
phys_addr_t cln;
for (idx = 0; idx < HASH_SIZE; idx++) {
struct hash_bucket *bucket = &dma_entry_hash[idx];
struct dma_debug_entry *entry;
unsigned long flags;
spin_lock_irqsave(&bucket->lock, flags);
list_for_each_entry(entry, &bucket->list, list) {
cln = to_cacheline_number(entry);
seq_printf(seq,
"%s %s %s idx %d P=%llx N=%lx D=%llx L=%llx cln=%pa %s %s\n",
dev_driver_string(entry->dev),
dev_name(entry->dev),
type2name[entry->type], idx,
phys_addr(entry), entry->pfn,
entry->dev_addr, entry->size,
&cln, dir2name[entry->direction],
maperr2str[entry->map_err_type]);
}
spin_unlock_irqrestore(&bucket->lock, flags);
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(dump);
/*
* Wrapper function for adding an entry to the hash.
* This function takes care of locking itself.
@ -764,33 +798,6 @@ static const struct file_operations filter_fops = {
.llseek = default_llseek,
};
static int dump_show(struct seq_file *seq, void *v)
{
int idx;
for (idx = 0; idx < HASH_SIZE; idx++) {
struct hash_bucket *bucket = &dma_entry_hash[idx];
struct dma_debug_entry *entry;
unsigned long flags;
spin_lock_irqsave(&bucket->lock, flags);
list_for_each_entry(entry, &bucket->list, list) {
seq_printf(seq,
"%s %s %s idx %d P=%llx N=%lx D=%llx L=%llx %s %s\n",
dev_name(entry->dev),
dev_driver_string(entry->dev),
type2name[entry->type], idx,
phys_addr(entry), entry->pfn,
entry->dev_addr, entry->size,
dir2name[entry->direction],
maperr2str[entry->map_err_type]);
}
spin_unlock_irqrestore(&bucket->lock, flags);
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(dump);
static int __init dma_debug_fs_init(void)
{
struct dentry *dentry = debugfs_create_dir("dma-api", NULL);
@ -1262,13 +1269,13 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
}
EXPORT_SYMBOL(debug_dma_mapping_error);
void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
void debug_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
size_t size, int direction)
{
struct dma_debug_entry ref = {
.type = dma_debug_single,
.dev = dev,
.dev_addr = addr,
.dev_addr = dma_addr,
.size = size,
.direction = direction,
};
@ -1403,13 +1410,13 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
}
void debug_dma_free_coherent(struct device *dev, size_t size,
void *virt, dma_addr_t addr)
void *virt, dma_addr_t dma_addr)
{
struct dma_debug_entry ref = {
.type = dma_debug_coherent,
.dev = dev,
.offset = offset_in_page(virt),
.dev_addr = addr,
.dev_addr = dma_addr,
.size = size,
.direction = DMA_BIDIRECTIONAL,
};

View File

@ -44,10 +44,11 @@ u64 dma_direct_get_required_mask(struct device *dev)
return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
}
static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
u64 *phys_limit)
static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit)
{
u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
u64 dma_limit = min_not_zero(
dev->coherent_dma_mask,
dev->bus_dma_limit);
/*
* Optimistically try the zone that the physical address mask falls
@ -126,8 +127,7 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
if (is_swiotlb_for_alloc(dev))
return dma_direct_alloc_swiotlb(dev, size);
gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
&phys_limit);
gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit);
page = dma_alloc_contiguous(dev, size, gfp);
if (page) {
if (!dma_coherent_ok(dev, page_to_phys(page), size) ||
@ -172,14 +172,13 @@ static void *dma_direct_alloc_from_pool(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp)
{
struct page *page;
u64 phys_mask;
u64 phys_limit;
void *ret;
if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)))
return NULL;
gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
&phys_mask);
gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit);
page = dma_alloc_from_pool(dev, size, &ret, gfp, dma_coherent_ok);
if (!page)
return NULL;

View File

@ -17,7 +17,11 @@
#include "debug.h"
#include "direct.h"
bool dma_default_coherent;
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
bool dma_default_coherent = IS_ENABLED(CONFIG_ARCH_DMA_DEFAULT_COHERENT);
#endif
/*
* Managed DMA API

View File

@ -565,6 +565,40 @@ static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index)
return index;
}
/*
* Track the total used slots with a global atomic value in order to have
* correct information to determine the high water mark. The mem_used()
* function gives imprecise results because there's no locking across
* multiple areas.
*/
#ifdef CONFIG_DEBUG_FS
static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
{
unsigned long old_hiwater, new_used;
new_used = atomic_long_add_return(nslots, &mem->total_used);
old_hiwater = atomic_long_read(&mem->used_hiwater);
do {
if (new_used <= old_hiwater)
break;
} while (!atomic_long_try_cmpxchg(&mem->used_hiwater,
&old_hiwater, new_used));
}
static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
{
atomic_long_sub(nslots, &mem->total_used);
}
#else /* !CONFIG_DEBUG_FS */
static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
{
}
static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
{
}
#endif /* CONFIG_DEBUG_FS */
/*
* Find a suitable number of IO TLB entries size that will fit this request and
* allocate a buffer from that IO TLB pool.
@ -659,6 +693,8 @@ found:
area->index = wrap_area_index(mem, index + nslots);
area->used += nslots;
spin_unlock_irqrestore(&area->lock, flags);
inc_used_and_hiwater(mem, nslots);
return slot_index;
}
@ -791,6 +827,8 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
mem->slots[i].list = ++count;
area->used -= nslots;
spin_unlock_irqrestore(&area->lock, flags);
dec_used(mem, nslots);
}
/*
@ -885,34 +923,73 @@ bool is_swiotlb_active(struct device *dev)
}
EXPORT_SYMBOL_GPL(is_swiotlb_active);
#ifdef CONFIG_DEBUG_FS
static int io_tlb_used_get(void *data, u64 *val)
{
*val = mem_used(&io_tlb_default_mem);
struct io_tlb_mem *mem = data;
*val = mem_used(mem);
return 0;
}
static int io_tlb_hiwater_get(void *data, u64 *val)
{
struct io_tlb_mem *mem = data;
*val = atomic_long_read(&mem->used_hiwater);
return 0;
}
static int io_tlb_hiwater_set(void *data, u64 val)
{
struct io_tlb_mem *mem = data;
/* Only allow setting to zero */
if (val != 0)
return -EINVAL;
atomic_long_set(&mem->used_hiwater, val);
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
io_tlb_hiwater_set, "%llu\n");
static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
const char *dirname)
{
atomic_long_set(&mem->total_used, 0);
atomic_long_set(&mem->used_hiwater, 0);
mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
if (!mem->nslabs)
return;
debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
debugfs_create_file("io_tlb_used", 0400, mem->debugfs, NULL,
debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem,
&fops_io_tlb_used);
debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
&fops_io_tlb_hiwater);
}
static int __init __maybe_unused swiotlb_create_default_debugfs(void)
static int __init swiotlb_create_default_debugfs(void)
{
swiotlb_create_debugfs_files(&io_tlb_default_mem, "swiotlb");
return 0;
}
#ifdef CONFIG_DEBUG_FS
late_initcall(swiotlb_create_default_debugfs);
#endif
#else /* !CONFIG_DEBUG_FS */
static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
const char *dirname)
{
}
#endif /* CONFIG_DEBUG_FS */
#ifdef CONFIG_DMA_RESTRICTED_POOL
@ -955,6 +1032,11 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
/* Set Per-device io tlb area to one */
unsigned int nareas = 1;
if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) {
dev_err(dev, "Restricted DMA pool must be accessible within the linear mapping.");
return -EINVAL;
}
/*
* Since multiple devices can share the same pool, the private data,
* io_tlb_mem struct, will be initialized by the first device attached
@ -1016,11 +1098,6 @@ static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
of_get_flat_dt_prop(node, "no-map", NULL))
return -EINVAL;
if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) {
pr_err("Restricted DMA pool must be accessible within the linear mapping.");
return -EINVAL;
}
rmem->ops = &rmem_swiotlb_ops;
pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M);