From d1e0a4c368c28a4f929977bb850e51b95823fa94 Mon Sep 17 00:00:00 2001 From: Yaroslav Rosomakho Date: Mon, 12 Aug 2019 11:26:03 +0200 Subject: [PATCH] Ported pcie-brcmstb bounce buffer implementation to ARM64. This enables full 4G RAM usage on Raspberry Pi in 64-bit mode. Signed-off-by: Yaroslav Rosomakho --- arch/arm64/include/asm/dma-mapping.h | 21 + arch/arm64/mm/dma-mapping.c | 51 ++ drivers/pci/controller/Makefile | 3 + drivers/pci/controller/pcie-brcmstb-bounce.h | 2 +- .../pci/controller/pcie-brcmstb-bounce64.c | 576 ++++++++++++++++++ drivers/pci/controller/pcie-brcmstb.c | 30 +- 6 files changed, 659 insertions(+), 24 deletions(-) create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce64.c diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index b7847eb8a7bb76..9195e524cb0823 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -24,6 +24,27 @@ #include #include +extern void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs); +extern void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs); +extern int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +extern int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +extern int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, unsigned long attrs); +extern void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int, + enum dma_data_direction dir, unsigned long attrs); +extern void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir); +extern void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir); + + + extern const struct dma_map_ops dummy_dma_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index d3a5bb16f0b231..bd75595bf37dce 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -138,6 +138,12 @@ static void *__dma_alloc(struct device *dev, size_t size, return NULL; } +void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs) +{ + return __dma_alloc(dev, size, handle, gfp, attrs); +} + static void __dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) @@ -154,6 +160,12 @@ static void __dma_free(struct device *dev, size_t size, swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs); } +void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs) +{ + __dma_free(dev, size, cpu_addr, handle, attrs); +} + static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, @@ -197,6 +209,12 @@ static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, return ret; } +int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, unsigned long attrs) +{ + return __swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); +} + static void __swiotlb_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, @@ -213,6 +231,12 @@ static void __swiotlb_unmap_sg_attrs(struct device *dev, swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); } +void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, unsigned long attrs) +{ + __swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); +} + static void __swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir) @@ -245,6 +269,12 @@ static void __swiotlb_sync_sg_for_cpu(struct device *dev, swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); } +void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + __swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); +} + static void __swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir) @@ -259,6 +289,12 @@ static void __swiotlb_sync_sg_for_device(struct device *dev, sg->length, dir); } +void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + __swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); +} + static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, unsigned long pfn, size_t size) { @@ -294,6 +330,13 @@ static int __swiotlb_mmap(struct device *dev, return __swiotlb_mmap_pfn(vma, pfn, size); } +int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + return __swiotlb_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); +} + static int __swiotlb_get_sgtable_page(struct sg_table *sgt, struct page *page, size_t size) { @@ -314,6 +357,13 @@ static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, return __swiotlb_get_sgtable_page(sgt, page, size); } +int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + return __swiotlb_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); +} + static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) { if (swiotlb) @@ -888,6 +938,7 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { + dev_err(dev,"DMA MASK %llx",dev->dma_mask); if (!dev->dma_ops) dev->dma_ops = &arm64_swiotlb_dma_ops; diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile index c40b7b982ea48a..71be2a0a7e68d7 100644 --- a/drivers/pci/controller/Makefile +++ b/drivers/pci/controller/Makefile @@ -32,6 +32,9 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o ifdef CONFIG_ARM obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o endif +ifdef CONFIG_ARM64 +obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce64.o +endif obj-$(CONFIG_VMD) += vmd.o # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW diff --git a/drivers/pci/controller/pcie-brcmstb-bounce.h b/drivers/pci/controller/pcie-brcmstb-bounce.h index 2fe20a14d03522..7caa0781329b55 100644 --- a/drivers/pci/controller/pcie-brcmstb-bounce.h +++ b/drivers/pci/controller/pcie-brcmstb-bounce.h @@ -6,7 +6,7 @@ #ifndef _PCIE_BRCMSTB_BOUNCE_H #define _PCIE_BRCMSTB_BOUNCE_H -#ifdef CONFIG_ARM +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) int brcm_pcie_bounce_init(struct device *dev, unsigned long buffer_size, dma_addr_t threshold); diff --git a/drivers/pci/controller/pcie-brcmstb-bounce64.c b/drivers/pci/controller/pcie-brcmstb-bounce64.c new file mode 100644 index 00000000000000..d9f1a46fc3315f --- /dev/null +++ b/drivers/pci/controller/pcie-brcmstb-bounce64.c @@ -0,0 +1,576 @@ +/* + * This code started out as a version of arch/arm/common/dmabounce.c, + * modified to cope with highmem pages. Now it has been changed heavily - + * it now preallocates a large block (currently 4MB) and carves it up + * sequentially in ring fashion, and DMA is used to copy the data - to the + * point where very little of the original remains. + * + * Copyright (C) 2019 Raspberry Pi (Trading) Ltd. + * + * Original version by Brad Parker (brad@heeltoe.com) + * Re-written by Christopher Hoover + * Made generic by Deepak Saxena + * + * Copyright (C) 2002 Hewlett Packard Company. + * Copyright (C) 2004 MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define STATS + +#ifdef STATS +#define DO_STATS(X) do { X ; } while (0) +#else +#define DO_STATS(X) do { } while (0) +#endif + +/* ************************************************** */ + +struct safe_buffer { + struct list_head node; + + /* original request */ + size_t size; + int direction; + + struct dmabounce_pool *pool; + void *safe; + dma_addr_t unsafe_dma_addr; + dma_addr_t safe_dma_addr; +}; + +struct dmabounce_pool { + unsigned long pages; + void *virt_addr; + dma_addr_t dma_addr; + unsigned long *alloc_map; + unsigned long alloc_pos; + spinlock_t lock; + struct device *dev; + unsigned long num_pages; +#ifdef STATS + size_t max_size; + unsigned long num_bufs; + unsigned long max_bufs; + unsigned long max_pages; +#endif +}; + +struct dmabounce_device_info { + struct device *dev; + dma_addr_t threshold; + struct list_head safe_buffers; + struct dmabounce_pool pool; + rwlock_t lock; +#ifdef STATS + unsigned long map_count; + unsigned long unmap_count; + unsigned long sync_dev_count; + unsigned long sync_cpu_count; + unsigned long fail_count; + int attr_res; +#endif +}; + +static struct dmabounce_device_info *g_dmabounce_device_info; + +extern int bcm2838_dma40_memcpy_init(void); +extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size); + +#ifdef STATS +static ssize_t +bounce_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct dmabounce_device_info *device_info = g_dmabounce_device_info; + return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n", + device_info->map_count, + device_info->unmap_count, + device_info->sync_dev_count, + device_info->sync_cpu_count, + device_info->fail_count, + device_info->pool.max_size, + device_info->pool.num_bufs, + device_info->pool.max_bufs, + device_info->pool.num_pages * PAGE_SIZE, + device_info->pool.max_pages * PAGE_SIZE); +} + +static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL); +#endif + +static int bounce_create(struct dmabounce_pool *pool, struct device *dev, + unsigned long buffer_size) +{ + int ret = -ENOMEM; + pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE; + pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL); + if (!pool->alloc_map) + goto err_bitmap; + pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE, + &pool->dma_addr, GFP_KERNEL); + if (!pool->virt_addr) + goto err_dmabuf; + + pool->alloc_pos = 0; + spin_lock_init(&pool->lock); + pool->dev = dev; + pool->num_pages = 0; + + DO_STATS(pool->max_size = 0); + DO_STATS(pool->num_bufs = 0); + DO_STATS(pool->max_bufs = 0); + DO_STATS(pool->max_pages = 0); + + return 0; + +err_dmabuf: + bitmap_free(pool->alloc_map); +err_bitmap: + return ret; +} + +static void bounce_destroy(struct dmabounce_pool *pool) +{ + dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr, + pool->dma_addr); + + bitmap_free(pool->alloc_map); +} + +static void *bounce_alloc(struct dmabounce_pool *pool, size_t size, + dma_addr_t *dmaaddrp) +{ + unsigned long pages; + unsigned long flags; + unsigned long pos; + + pages = (size + PAGE_SIZE - 1)/PAGE_SIZE; + + DO_STATS(pool->max_size = max(size, pool->max_size)); + + spin_lock_irqsave(&pool->lock, flags); + pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages, + pool->alloc_pos, pages, 0); + /* If not found, try from the start */ + if (pos >= pool->pages && pool->alloc_pos) + pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages, + 0, pages, 0); + + if (pos >= pool->pages) { + spin_unlock_irqrestore(&pool->lock, flags); + return NULL; + } + + bitmap_set(pool->alloc_map, pos, pages); + pool->alloc_pos = (pos + pages) % pool->pages; + pool->num_pages += pages; + + DO_STATS(pool->num_bufs++); + DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs)); + DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages)); + + spin_unlock_irqrestore(&pool->lock, flags); + + *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE; + + return pool->virt_addr + pos * PAGE_SIZE; +} + +static void +bounce_free(struct dmabounce_pool *pool, void *buf, size_t size) +{ + unsigned long pages; + unsigned long flags; + unsigned long pos; + + pages = (size + PAGE_SIZE - 1)/PAGE_SIZE; + pos = (buf - pool->virt_addr)/PAGE_SIZE; + + BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1)); + + spin_lock_irqsave(&pool->lock, flags); + bitmap_clear(pool->alloc_map, pos, pages); + pool->num_pages -= pages; + if (pool->num_pages == 0) + pool->alloc_pos = 0; + DO_STATS(pool->num_bufs--); + spin_unlock_irqrestore(&pool->lock, flags); +} + +/* allocate a 'safe' buffer and keep track of it */ +static struct safe_buffer * +alloc_safe_buffer(struct dmabounce_device_info *device_info, + dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) +{ + struct safe_buffer *buf; + struct dmabounce_pool *pool = &device_info->pool; + struct device *dev = device_info->dev; + unsigned long flags; + + /* + * Although one might expect this to be called in thread context, + * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic() + * was previously used to select the appropriate allocation mode, + * but this is unsafe. + */ + buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC); + if (!buf) { + dev_warn(dev, "%s: kmalloc failed\n", __func__); + return NULL; + } + + buf->unsafe_dma_addr = dma_addr; + buf->size = size; + buf->direction = dir; + buf->pool = pool; + + buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr); + + if (!buf->safe) { + dev_warn(dev, + "%s: could not alloc dma memory (size=%d)\n", + __func__, size); + kfree(buf); + return NULL; + } + + write_lock_irqsave(&device_info->lock, flags); + list_add(&buf->node, &device_info->safe_buffers); + write_unlock_irqrestore(&device_info->lock, flags); + + return buf; +} + +/* determine if a buffer is from our "safe" pool */ +static struct safe_buffer * +find_safe_buffer(struct dmabounce_device_info *device_info, + dma_addr_t safe_dma_addr) +{ + struct safe_buffer *b, *rb = NULL; + unsigned long flags; + + read_lock_irqsave(&device_info->lock, flags); + + list_for_each_entry(b, &device_info->safe_buffers, node) + if (b->safe_dma_addr <= safe_dma_addr && + b->safe_dma_addr + b->size > safe_dma_addr) { + rb = b; + break; + } + + read_unlock_irqrestore(&device_info->lock, flags); + return rb; +} + +static void +free_safe_buffer(struct dmabounce_device_info *device_info, + struct safe_buffer *buf) +{ + unsigned long flags; + + write_lock_irqsave(&device_info->lock, flags); + list_del(&buf->node); + write_unlock_irqrestore(&device_info->lock, flags); + + bounce_free(buf->pool, buf->safe, buf->size); + + kfree(buf); +} + +/* ************************************************** */ + +static struct safe_buffer * +find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where) +{ + if (!dev || !g_dmabounce_device_info) + return NULL; + if (dma_mapping_error(dev, dma_addr)) { + dev_err(dev, "Trying to %s invalid mapping\n", where); + return NULL; + } + return find_safe_buffer(g_dmabounce_device_info, dma_addr); +} + +static dma_addr_t +map_single(struct device *dev, struct safe_buffer *buf, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + BUG_ON(buf->size != size); + BUG_ON(buf->direction != dir); + + dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr, + (u64)buf->safe_dma_addr); + + if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) && + !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr, + size); + + return buf->safe_dma_addr; +} + +static dma_addr_t +unmap_single(struct device *dev, struct safe_buffer *buf, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + BUG_ON(buf->size != size); + BUG_ON(buf->direction != dir); + + if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) && + !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { + dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr, + (u64)buf->unsafe_dma_addr); + + bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr, + size); + } + return buf->unsafe_dma_addr; +} + +/* ************************************************** */ + +/* + * see if a buffer address is in an 'unsafe' range. if it is + * allocate a 'safe' buffer and copy the unsafe buffer into it. + * substitute the safe buffer for the unsafe one. + * (basically move the buffer from an unsafe area to a safe one) + */ +static dma_addr_t +dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct dmabounce_device_info *device_info = g_dmabounce_device_info; + dma_addr_t dma_addr; + + dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset; + + swiotlb_sync_single_for_device(dev, dma_addr, size, dir); + if (!is_device_dma_coherent(dev)) + __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); + + if (device_info && (dma_addr + size) > device_info->threshold) { + struct safe_buffer *buf; + + buf = alloc_safe_buffer(device_info, dma_addr, size, dir); + if (!buf) { + DO_STATS(device_info->fail_count++); + return (~(dma_addr_t)0x0); + } + + DO_STATS(device_info->map_count++); + + dma_addr = map_single(dev, buf, size, dir, attrs); + } + return dma_addr; +} + +/* + * see if a mapped address was really a "safe" buffer and if so, copy + * the data from the safe buffer back to the unsafe buffer and free up + * the safe buffer. (basically return things back to the way they + * should be) + */ +static void +dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + struct safe_buffer *buf; + + buf = find_safe_buffer_dev(dev, dma_addr, __func__); + if (buf) { + DO_STATS(g_dmabounce_device_info->unmap_count++); + dma_addr = unmap_single(dev, buf, size, dir, attrs); + free_safe_buffer(g_dmabounce_device_info, buf); + } + + if (!is_device_dma_coherent(dev)) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); + swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir); +} + +/* + * A version of dmabounce_map_page that assumes the mapping has already + * been created - intended for streaming operation. + */ +static void +dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir) +{ + struct safe_buffer *buf; + + swiotlb_sync_single_for_device(dev, dma_addr, size, dir); + if (!is_device_dma_coherent(dev)) + __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); + + buf = find_safe_buffer_dev(dev, dma_addr, __func__); + if (buf) { + DO_STATS(g_dmabounce_device_info->sync_dev_count++); + map_single(dev, buf, size, dir, 0); + } +} + +/* + * A version of dmabounce_unmap_page that doesn't destroy the mapping - + * intended for streaming operation. + */ +static void +dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir) +{ + struct safe_buffer *buf; + + buf = find_safe_buffer_dev(dev, dma_addr, __func__); + if (buf) { + DO_STATS(g_dmabounce_device_info->sync_cpu_count++); + dma_addr = unmap_single(dev, buf, size, dir, 0); + } + + if (!is_device_dma_coherent(dev)) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); + swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir); +} + +static int dmabounce_dma_supported(struct device *dev, u64 dma_mask) +{ + if (g_dmabounce_device_info) + return 0; + + return swiotlb_dma_supported(dev, dma_mask); +} + +static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return swiotlb_dma_mapping_error(dev, dma_addr); +} + +static const struct dma_map_ops dmabounce_ops = { + .alloc = arm64_dma_alloc, + .free = arm64_dma_free, + .mmap = arm64_dma_mmap, + .get_sgtable = arm64_dma_get_sgtable, + .map_page = dmabounce_map_page, + .unmap_page = dmabounce_unmap_page, + .sync_single_for_cpu = dmabounce_sync_for_cpu, + .sync_single_for_device = dmabounce_sync_for_device, + .map_sg = arm64_dma_map_sg, + .unmap_sg = arm64_dma_unmap_sg, + .sync_sg_for_cpu = arm64_dma_sync_sg_for_cpu, + .sync_sg_for_device = arm64_dma_sync_sg_for_device, + .dma_supported = dmabounce_dma_supported, + .mapping_error = dmabounce_mapping_error, +}; + +int brcm_pcie_bounce_init(struct device *dev, + unsigned long buffer_size, + dma_addr_t threshold) +{ + struct dmabounce_device_info *device_info; + int ret; + + /* Only support a single client */ + if (g_dmabounce_device_info) + return -EBUSY; + + ret = bcm2838_dma40_memcpy_init(); + if (ret) + return ret; + + device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC); + if (!device_info) { + dev_err(dev, + "Could not allocated dmabounce_device_info\n"); + return -ENOMEM; + } + + ret = bounce_create(&device_info->pool, dev, buffer_size); + if (ret) { + dev_err(dev, + "dmabounce: could not allocate %ld byte DMA pool\n", + buffer_size); + goto err_bounce; + } + + device_info->dev = dev; + device_info->threshold = threshold; + INIT_LIST_HEAD(&device_info->safe_buffers); + rwlock_init(&device_info->lock); + + DO_STATS(device_info->map_count = 0); + DO_STATS(device_info->unmap_count = 0); + DO_STATS(device_info->sync_dev_count = 0); + DO_STATS(device_info->sync_cpu_count = 0); + DO_STATS(device_info->fail_count = 0); + DO_STATS(device_info->attr_res = + device_create_file(dev, &dev_attr_dmabounce_stats)); + + g_dmabounce_device_info = device_info; + + dev_err(dev, "dmabounce: initialised - %ld kB, threshold %pad\n", + buffer_size / 1024, &threshold); + + return 0; + + err_bounce: + kfree(device_info); + return ret; +} +EXPORT_SYMBOL(brcm_pcie_bounce_init); + +void brcm_pcie_bounce_uninit(struct device *dev) +{ + struct dmabounce_device_info *device_info = g_dmabounce_device_info; + + g_dmabounce_device_info = NULL; + + if (!device_info) { + dev_warn(dev, + "Never registered with dmabounce but attempting" + "to unregister!\n"); + return; + } + + if (!list_empty(&device_info->safe_buffers)) { + dev_err(dev, + "Removing from dmabounce with pending buffers!\n"); + BUG(); + } + + bounce_destroy(&device_info->pool); + + DO_STATS(if (device_info->attr_res == 0) + device_remove_file(dev, &dev_attr_dmabounce_stats)); + + kfree(device_info); +} +EXPORT_SYMBOL(brcm_pcie_bounce_uninit); + +int brcm_pcie_bounce_register_dev(struct device *dev) +{ + set_dma_ops(dev, &dmabounce_ops); + + return 0; +} +EXPORT_SYMBOL(brcm_pcie_bounce_register_dev); + +MODULE_AUTHOR("Phil Elwell "); +MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index f99ab05ab4778a..f279ed67e4e16c 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -617,28 +617,6 @@ static const struct dma_map_ops brcm_dma_ops = { static void brcm_set_dma_ops(struct device *dev) { - int ret; - - if (IS_ENABLED(CONFIG_ARM64)) { - /* - * We are going to invoke get_dma_ops(). That - * function, at this point in time, invokes - * get_arch_dma_ops(), and for ARM64 that function - * returns a pointer to dummy_dma_ops. So then we'd - * like to call arch_setup_dma_ops(), but that isn't - * exported. Instead, we call of_dma_configure(), - * which is exported, and this calls - * arch_setup_dma_ops(). Once we do this the call to - * get_dma_ops() will work properly because - * dev->dma_ops will be set. - */ - ret = of_dma_configure(dev, dev->of_node, true); - if (ret) { - dev_err(dev, "of_dma_configure() failed: %d\n", ret); - return; - } - } - arch_dma_ops = get_dma_ops(dev); if (!arch_dma_ops) { dev_err(dev, "failed to get arch_dma_ops\n"); @@ -657,12 +635,12 @@ static int brcmstb_platform_notifier(struct notifier_block *nb, extern unsigned long max_pfn; struct device *dev = __dev; const char *rc_name = "0000:00:00.0"; + int ret; switch (event) { case BUS_NOTIFY_ADD_DEVICE: if (max_pfn > (bounce_threshold/PAGE_SIZE) && strcmp(dev->kobj.name, rc_name)) { - int ret; ret = brcm_pcie_bounce_register_dev(dev); if (ret) { @@ -671,6 +649,12 @@ static int brcmstb_platform_notifier(struct notifier_block *nb, ret); return ret; } + } else if (IS_ENABLED(CONFIG_ARM64)) { + ret = of_dma_configure(dev, dev->of_node, true); + if (ret) { + dev_err(dev, "of_dma_configure() failed: %d\n", ret); + return; + } } brcm_set_dma_ops(dev); return NOTIFY_OK;