/* * This code started out as a version of arch/arm/common/dmabounce.c, * modified to cope with highmem pages. Now it has been changed heavily - * it now preallocates a large block (currently 4MB) and carves it up * sequentially in ring fashion, and DMA is used to copy the data - to the * point where very little of the original remains. * * Copyright (C) 2019 Raspberry Pi (Trading) Ltd. * * Original version by Brad Parker (brad@heeltoe.com) * Re-written by Christopher Hoover <ch@murgatroid.com> * Made generic by Deepak Saxena <dsaxena@plexity.net> * * Copyright (C) 2002 Hewlett Packard Company. * Copyright (C) 2004 MontaVista Software, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * version 2 as published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/page-flags.h> #include <linux/device.h> #include <linux/dma-mapping.h> #include <linux/dma-direct.h> #include <linux/dmapool.h> #include <linux/list.h> #include <linux/scatterlist.h> #include <linux/bitmap.h> #include <linux/swiotlb.h> #include <asm/cacheflush.h> #define STATS #ifdef STATS #define DO_STATS(X) do { X ; } while (0) #else #define DO_STATS(X) do { } while (0) #endif /* ************************************************** */ struct safe_buffer { struct list_head node; /* original request */ size_t size; int direction; struct dmabounce_pool *pool; void *safe; dma_addr_t unsafe_dma_addr; dma_addr_t safe_dma_addr; }; struct dmabounce_pool { unsigned long pages; void *virt_addr; dma_addr_t dma_addr; unsigned long *alloc_map; unsigned long alloc_pos; spinlock_t lock; struct device *dev; unsigned long num_pages; #ifdef STATS size_t max_size; unsigned long num_bufs; unsigned long max_bufs; unsigned long max_pages; #endif }; struct dmabounce_device_info { struct device *dev; dma_addr_t threshold; struct list_head safe_buffers; struct dmabounce_pool pool; rwlock_t lock; #ifdef STATS unsigned long map_count; unsigned long unmap_count; unsigned long sync_dev_count; unsigned long sync_cpu_count; unsigned long fail_count; int attr_res; #endif }; static struct dmabounce_device_info *g_dmabounce_device_info; extern int bcm2838_dma40_memcpy_init(void); extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size); #ifdef STATS static ssize_t bounce_show(struct device *dev, struct device_attribute *attr, char *buf) { struct dmabounce_device_info *device_info = g_dmabounce_device_info; return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n", device_info->map_count, device_info->unmap_count, device_info->sync_dev_count, device_info->sync_cpu_count, device_info->fail_count, device_info->pool.max_size, device_info->pool.num_bufs, device_info->pool.max_bufs, device_info->pool.num_pages * PAGE_SIZE, device_info->pool.max_pages * PAGE_SIZE); } static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL); #endif static int bounce_create(struct dmabounce_pool *pool, struct device *dev, unsigned long buffer_size) { int ret = -ENOMEM; pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE; pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL); if (!pool->alloc_map) goto err_bitmap; pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE, &pool->dma_addr, GFP_KERNEL); if (!pool->virt_addr) goto err_dmabuf; pool->alloc_pos = 0; spin_lock_init(&pool->lock); pool->dev = dev; pool->num_pages = 0; DO_STATS(pool->max_size = 0); DO_STATS(pool->num_bufs = 0); DO_STATS(pool->max_bufs = 0); DO_STATS(pool->max_pages = 0); return 0; err_dmabuf: bitmap_free(pool->alloc_map); err_bitmap: return ret; } static void bounce_destroy(struct dmabounce_pool *pool) { dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr, pool->dma_addr); bitmap_free(pool->alloc_map); } static void *bounce_alloc(struct dmabounce_pool *pool, size_t size, dma_addr_t *dmaaddrp) { unsigned long pages; unsigned long flags; unsigned long pos; pages = (size + PAGE_SIZE - 1)/PAGE_SIZE; DO_STATS(pool->max_size = max(size, pool->max_size)); spin_lock_irqsave(&pool->lock, flags); pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages, pool->alloc_pos, pages, 0); /* If not found, try from the start */ if (pos >= pool->pages && pool->alloc_pos) pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages, 0, pages, 0); if (pos >= pool->pages) { spin_unlock_irqrestore(&pool->lock, flags); return NULL; } bitmap_set(pool->alloc_map, pos, pages); pool->alloc_pos = (pos + pages) % pool->pages; pool->num_pages += pages; DO_STATS(pool->num_bufs++); DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs)); DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages)); spin_unlock_irqrestore(&pool->lock, flags); *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE; return pool->virt_addr + pos * PAGE_SIZE; } static void bounce_free(struct dmabounce_pool *pool, void *buf, size_t size) { unsigned long pages; unsigned long flags; unsigned long pos; pages = (size + PAGE_SIZE - 1)/PAGE_SIZE; pos = (buf - pool->virt_addr)/PAGE_SIZE; BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1)); spin_lock_irqsave(&pool->lock, flags); bitmap_clear(pool->alloc_map, pos, pages); pool->num_pages -= pages; if (pool->num_pages == 0) pool->alloc_pos = 0; DO_STATS(pool->num_bufs--); spin_unlock_irqrestore(&pool->lock, flags); } /* allocate a 'safe' buffer and keep track of it */ static struct safe_buffer * alloc_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { struct safe_buffer *buf; struct dmabounce_pool *pool = &device_info->pool; struct device *dev = device_info->dev; unsigned long flags; /* * Although one might expect this to be called in thread context, * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic() * was previously used to select the appropriate allocation mode, * but this is unsafe. */ buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC); if (!buf) { dev_warn(dev, "%s: kmalloc failed\n", __func__); return NULL; } buf->unsafe_dma_addr = dma_addr; buf->size = size; buf->direction = dir; buf->pool = pool; buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr); if (!buf->safe) { dev_warn(dev, "%s: could not alloc dma memory (size=%d)\n", __func__, size); kfree(buf); return NULL; } write_lock_irqsave(&device_info->lock, flags); list_add(&buf->node, &device_info->safe_buffers); write_unlock_irqrestore(&device_info->lock, flags); return buf; } /* determine if a buffer is from our "safe" pool */ static struct safe_buffer * find_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t safe_dma_addr) { struct safe_buffer *b, *rb = NULL; unsigned long flags; read_lock_irqsave(&device_info->lock, flags); list_for_each_entry(b, &device_info->safe_buffers, node) if (b->safe_dma_addr <= safe_dma_addr && b->safe_dma_addr + b->size > safe_dma_addr) { rb = b; break; } read_unlock_irqrestore(&device_info->lock, flags); return rb; } static void free_safe_buffer(struct dmabounce_device_info *device_info, struct safe_buffer *buf) { unsigned long flags; write_lock_irqsave(&device_info->lock, flags); list_del(&buf->node); write_unlock_irqrestore(&device_info->lock, flags); bounce_free(buf->pool, buf->safe, buf->size); kfree(buf); } /* ************************************************** */ static struct safe_buffer * find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where) { if (!dev || !g_dmabounce_device_info) return NULL; if (dma_mapping_error(dev, dma_addr)) { dev_err(dev, "Trying to %s invalid mapping\n", where); return NULL; } return find_safe_buffer(g_dmabounce_device_info, dma_addr); } static dma_addr_t map_single(struct device *dev, struct safe_buffer *buf, size_t size, enum dma_data_direction dir, unsigned long attrs) { BUG_ON(buf->size != size); BUG_ON(buf->direction != dir); dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr, (u64)buf->safe_dma_addr); if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr, size); return buf->safe_dma_addr; } static dma_addr_t unmap_single(struct device *dev, struct safe_buffer *buf, size_t size, enum dma_data_direction dir, unsigned long attrs) { BUG_ON(buf->size != size); BUG_ON(buf->direction != dir); if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr, (u64)buf->unsafe_dma_addr); bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr, size); } return buf->unsafe_dma_addr; } /* ************************************************** */ /* * see if a buffer address is in an 'unsafe' range. if it is * allocate a 'safe' buffer and copy the unsafe buffer into it. * substitute the safe buffer for the unsafe one. * (basically move the buffer from an unsafe area to a safe one) */ static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct dmabounce_device_info *device_info = g_dmabounce_device_info; dma_addr_t dma_addr; dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset; swiotlb_sync_single_for_device(dev, dma_addr, size, dir); if (!is_device_dma_coherent(dev)) __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); if (device_info && (dma_addr + size) > device_info->threshold) { struct safe_buffer *buf; buf = alloc_safe_buffer(device_info, dma_addr, size, dir); if (!buf) { DO_STATS(device_info->fail_count++); return (~(dma_addr_t)0x0); } DO_STATS(device_info->map_count++); dma_addr = map_single(dev, buf, size, dir, attrs); } return dma_addr; } /* * see if a mapped address was really a "safe" buffer and if so, copy * the data from the safe buffer back to the unsafe buffer and free up * the safe buffer. (basically return things back to the way they * should be) */ static void dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct safe_buffer *buf; buf = find_safe_buffer_dev(dev, dma_addr, __func__); if (buf) { DO_STATS(g_dmabounce_device_info->unmap_count++); dma_addr = unmap_single(dev, buf, size, dir, attrs); free_safe_buffer(g_dmabounce_device_info, buf); } if (!is_device_dma_coherent(dev)) __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir); } /* * A version of dmabounce_map_page that assumes the mapping has already * been created - intended for streaming operation. */ static void dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { struct safe_buffer *buf; swiotlb_sync_single_for_device(dev, dma_addr, size, dir); if (!is_device_dma_coherent(dev)) __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); buf = find_safe_buffer_dev(dev, dma_addr, __func__); if (buf) { DO_STATS(g_dmabounce_device_info->sync_dev_count++); map_single(dev, buf, size, dir, 0); } } /* * A version of dmabounce_unmap_page that doesn't destroy the mapping - * intended for streaming operation. */ static void dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { struct safe_buffer *buf; buf = find_safe_buffer_dev(dev, dma_addr, __func__); if (buf) { DO_STATS(g_dmabounce_device_info->sync_cpu_count++); dma_addr = unmap_single(dev, buf, size, dir, 0); } if (!is_device_dma_coherent(dev)) __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir); } static int dmabounce_dma_supported(struct device *dev, u64 dma_mask) { if (g_dmabounce_device_info) return 0; return swiotlb_dma_supported(dev, dma_mask); } static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr) { return swiotlb_dma_mapping_error(dev, dma_addr); } static const struct dma_map_ops dmabounce_ops = { .alloc = arm64_dma_alloc, .free = arm64_dma_free, .mmap = arm64_dma_mmap, .get_sgtable = arm64_dma_get_sgtable, .map_page = dmabounce_map_page, .unmap_page = dmabounce_unmap_page, .sync_single_for_cpu = dmabounce_sync_for_cpu, .sync_single_for_device = dmabounce_sync_for_device, .map_sg = arm64_dma_map_sg, .unmap_sg = arm64_dma_unmap_sg, .sync_sg_for_cpu = arm64_dma_sync_sg_for_cpu, .sync_sg_for_device = arm64_dma_sync_sg_for_device, .dma_supported = dmabounce_dma_supported, .mapping_error = dmabounce_mapping_error, }; int brcm_pcie_bounce_init(struct device *dev, unsigned long buffer_size, dma_addr_t threshold) { struct dmabounce_device_info *device_info; int ret; /* Only support a single client */ if (g_dmabounce_device_info) return -EBUSY; ret = bcm2838_dma40_memcpy_init(); if (ret) return ret; device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC); if (!device_info) { dev_err(dev, "Could not allocated dmabounce_device_info\n"); return -ENOMEM; } ret = bounce_create(&device_info->pool, dev, buffer_size); if (ret) { dev_err(dev, "dmabounce: could not allocate %ld byte DMA pool\n", buffer_size); goto err_bounce; } device_info->dev = dev; device_info->threshold = threshold; INIT_LIST_HEAD(&device_info->safe_buffers); rwlock_init(&device_info->lock); DO_STATS(device_info->map_count = 0); DO_STATS(device_info->unmap_count = 0); DO_STATS(device_info->sync_dev_count = 0); DO_STATS(device_info->sync_cpu_count = 0); DO_STATS(device_info->fail_count = 0); DO_STATS(device_info->attr_res = device_create_file(dev, &dev_attr_dmabounce_stats)); g_dmabounce_device_info = device_info; dev_info(dev, "dmabounce: initialised - %ld kB, threshold %pad\n", buffer_size / 1024, &threshold); return 0; err_bounce: kfree(device_info); return ret; } EXPORT_SYMBOL(brcm_pcie_bounce_init); void brcm_pcie_bounce_uninit(struct device *dev) { struct dmabounce_device_info *device_info = g_dmabounce_device_info; g_dmabounce_device_info = NULL; if (!device_info) { dev_warn(dev, "Never registered with dmabounce but attempting" "to unregister!\n"); return; } if (!list_empty(&device_info->safe_buffers)) { dev_err(dev, "Removing from dmabounce with pending buffers!\n"); BUG(); } bounce_destroy(&device_info->pool); DO_STATS(if (device_info->attr_res == 0) device_remove_file(dev, &dev_attr_dmabounce_stats)); kfree(device_info); } EXPORT_SYMBOL(brcm_pcie_bounce_uninit); int brcm_pcie_bounce_register_dev(struct device *dev) { set_dma_ops(dev, &dmabounce_ops); return 0; } EXPORT_SYMBOL(brcm_pcie_bounce_register_dev); MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>"); MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb"); MODULE_LICENSE("GPL");