Commit f492bc04 authored by Federico Vaga's avatar Federico Vaga Committed by Federico Vaga

dma: improve dma transfer with kernel 3.6

Thanks to this improvement we gain 10us on
"Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz"

(there is more space for optimization for zio-buf-vmalloc)
Signed-off-by: Federico Vaga's avatarFederico Vaga <federico.vaga@cern.ch>
parent f07570f7
......@@ -26,10 +26,24 @@
*/
static int __zio_dma_alloc_pool(struct zio_dma_sgt *zdma, size_t page_desc_size)
{
unsigned int tot_nents = 0;
int i;
/* Prepare the transfers pool area */
zdma->page_desc_size = page_desc_size;
zdma->page_desc_pool_size = zdma->page_desc_size * zdma->sgt.nents;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,6,0)
for (i = 0; i < zdma->n_blocks; ++i)
tot_nents += zdma->sg_blocks[i].sgt.nents;
#else
tot_nents = zdma->sgt.nents;
#endif
if (unlikely(!tot_nents)) {
dev_warn(zdma->hwdev, "No DMA page descriptor to allocate\n");
return -ENOMEM;
}
zdma->page_desc_pool_size = zdma->page_desc_size * tot_nents;
zdma->page_desc_pool = kzalloc(zdma->page_desc_pool_size,
GFP_ATOMIC | GFP_DMA);
if (!zdma->page_desc_pool) {
......@@ -48,7 +62,7 @@ static int __zio_dma_alloc_pool(struct zio_dma_sgt *zdma, size_t page_desc_size)
}
pr_debug("%s:%d DMA transfer pool allocated for max %d transfers\n",
__func__, __LINE__, zdma->sgt.nents);
__func__, __LINE__, tot_nents);
return 0;
}
......@@ -70,6 +84,334 @@ static void __zio_dma_free_pool(struct zio_dma_sgt *zdma)
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,6,0)
/**
* It convert a zio block into an array of pages
* @param[in] block
* @param[out] pages
* @param[in] max_n_pages
* @return number of real pages
*/
static unsigned int zio_block_to_pages(struct zio_block *block,
struct page **pages,
unsigned int max_n_pages)
{
unsigned int bytesleft = block->datalen, mapbytes;
unsigned int n_pages = 0;
void *bufp = block->data;
if (is_vmalloc_addr(bufp)) {
while (bytesleft && n_pages < max_n_pages) {
pages[n_pages] = vmalloc_to_page(bufp);
n_pages++;
if (bytesleft < (PAGE_SIZE - offset_in_page(bufp)))
mapbytes = bytesleft;
else
mapbytes = PAGE_SIZE - offset_in_page(bufp);
bufp += mapbytes;
bytesleft -= mapbytes;
}
} else {
while (bytesleft && n_pages < max_n_pages) {
pages[n_pages] = virt_to_page(bufp);
n_pages++;
if (bytesleft < (PAGE_SIZE - offset_in_page(bufp)))
mapbytes = bytesleft;
else
mapbytes = PAGE_SIZE - offset_in_page(bufp);
bufp += mapbytes;
bytesleft -= mapbytes;
}
}
pr_debug("%s:%d found %d pages\n", __func__, __LINE__, n_pages);
return n_pages;
}
/**
* Allocate resources for DMA stransfer of a single block
* @param[in] sgb block which need a DMA transfer
*/
static int __zio_dma_alloc_sg_single(struct zio_blocks_sg *sgb, gfp_t gfp)
{
unsigned int max_n_pages;
int err;
/* Get pages from the zio block */
max_n_pages = (((unsigned long)sgb->block->data & ~PAGE_MASK) +
sgb->block->datalen + ~PAGE_MASK) >> PAGE_SHIFT;
if (unlikely(!max_n_pages)) {
pr_warn("%s: No page to allocate for DMA transfer\n", __func__);
return -ENOMEM;
}
sgb->pages = kmalloc(max_n_pages * sizeof(struct page *), gfp);
if (!sgb->pages)
return -ENOMEM;
/* get the array of pages for a given block */
sgb->n_pages = zio_block_to_pages(sgb->block, sgb->pages,
max_n_pages);
/* Allocate scatterlist table and optimize dma transfers */
err = sg_alloc_table_from_pages(&sgb->sgt,
sgb->pages, sgb->n_pages,
offset_in_page(sgb->block->data),
sgb->block->datalen,
gfp);
if (err)
kfree(sgb->pages);
pr_debug("%s:%d allocated scatter list for max %d pages\n",
__func__, __LINE__, sgb->n_pages);
return err;
}
/**
* Release resources for a single block
* @param[in] sgb block to release
*/
static void __zio_dma_free_sg_single(struct zio_blocks_sg *sgb)
{
sg_free_table(&sgb->sgt);
kfree(sgb->pages);
}
/**
*
* @param[in] zdma zio DMA descriptor
* @param[in] sgb block to release
* @param[in] fill_desc callback for the driver in order to fill each transfer
* descriptor.
* @return 0 on success, otherwise en error code
*/
static int __zio_dma_map_sg_single(struct zio_dma_sgt *zdma,
struct zio_blocks_sg *sgb, unsigned int i_blk,
int (*fill_desc)(struct zio_dma_sg *zsg))
{
uint32_t dev_mem_off = 0, sglen;
struct scatterlist *sg;
struct zio_dma_sg zsg;
void *item_ptr;
int err, i;
dev_mem_off = sgb->dev_mem_off;
/* Map DMA buffers */
sglen = dma_map_sg(zdma->hwdev, sgb->sgt.sgl, sgb->sgt.nents,
DMA_FROM_DEVICE);
if (!sglen) {
dev_err(zdma->hwdev, "cannot map dma SG memory\n");
return -ENOMEM;
}
zsg.zsgt = zdma;
for_each_sg(sgb->sgt.sgl, sg, sgb->sgt.nents, i) {
dev_dbg(zdma->hwdev, "%s: addr 0x%x, len %d, dev_off 0x%x\n",
__func__, sg_dma_address(sg), sg_dma_len(sg),
dev_mem_off);
dev_dbg(zdma->hwdev, "%d 0x%x\n", i, dev_mem_off);
/* Configure hardware pages */
zsg.sg = sg;
zsg.dev_mem_off = dev_mem_off;
zsg.page_desc = zdma->page_desc_next;
zsg.block_idx = i_blk;
zsg.page_idx= i;
/* Point to the next free DMA slot for page descriptors */
zdma->page_desc_next += zdma->page_desc_size;
zdma->page_desc_pool_dma_next += zdma->page_desc_size;
/* Ask driver to fill page descriptor */
err = fill_desc(&zsg);
if (err) {
dev_err(zdma->hwdev, "Cannot fill descriptor %d\n", i);
goto out;
}
dev_mem_off += sg_dma_len(sg);
}
pr_debug("%s:%d mapped %d DMA transfers\n",
__func__, __LINE__, i);
return 0;
/* errors */
out:
dma_unmap_sg(zdma->hwdev, sgb->sgt.sgl, sgb->sgt.nents,
DMA_FROM_DEVICE);
return err;
}
/**
*
* @param[in] zdma zio DMA descriptor
* @param[in] sgb block to release
*/
static void __zio_dma_unmap_sg_single(struct zio_dma_sgt *zdma,
struct zio_blocks_sg *sgb)
{
dma_unmap_sg(zdma->hwdev, sgb->sgt.sgl, sgb->sgt.nents,
DMA_FROM_DEVICE);
}
/**
* @param[in] chan zio channel associated to this scatterlist
* @param[in] hwdev low level device responsible of the DMA
* @param[in] blocks array of zio_block to transfer
* @param[in] n_blocks number of blocks to transfer
* @param[in] gfp gfp flags for memory allocation
*
* The function allocates and initializes a scatterlist ready for DMA
* transfer
*/
struct zio_dma_sgt *zio_dma_alloc_sg(struct zio_channel *chan,
struct device *hwdev,
struct zio_block **blocks, /* FIXME to array */
unsigned int n_blocks, gfp_t gfp)
{
struct zio_dma_sgt *zdma;
unsigned int i;
int err;
if (unlikely(!chan || !hwdev || !blocks || !n_blocks))
return ERR_PTR(-EINVAL);
/*
* Allocate a new zio_dma_sgt structure that will contains all necessary
* information for DMA
*/
zdma = kzalloc(sizeof(struct zio_dma_sgt), gfp);
if (!zdma)
return ERR_PTR(-ENOMEM);
zdma->chan = chan;
/* Allocate a new list of blocks with sg information */
zdma->sg_blocks = kzalloc(sizeof(struct zio_blocks_sg) * n_blocks, gfp);
if (!zdma->sg_blocks) {
err = -ENOMEM;
goto out;
}
/* fill the zio_dma_sgt structure for each sg_block */
zdma->hwdev = hwdev;
zdma->n_blocks = n_blocks;
for (i = 0; i < n_blocks; ++i) {
zdma->sg_blocks[i].block = blocks[i];
err = __zio_dma_alloc_sg_single(&zdma->sg_blocks[i], gfp);
if (err)
goto out_alloc;
}
pr_debug("%s:%d allocated scatter lists for %d blocks\n",
__func__, __LINE__, n_blocks);
return zdma;
/* errors */
out_alloc:
while (--i)
__zio_dma_free_sg_single(&zdma->sg_blocks[i]);
kfree(zdma->sg_blocks);
out:
kfree(zdma);
return ERR_PTR(err);
}
EXPORT_SYMBOL(zio_dma_alloc_sg);
/**
* It releases resources
* @param[in] zdma: zio DMA transfer descriptor
*/
void zio_dma_free_sg(struct zio_dma_sgt *zdma)
{
int i;
/* release all sgt tables and array of pages */
for (i = 0; i < zdma->n_blocks; ++i)
__zio_dma_free_sg_single(&zdma->sg_blocks[i]);
kfree(zdma->sg_blocks);
kfree(zdma);
}
EXPORT_SYMBOL(zio_dma_free_sg);
/**
* It maps a sg table
* @param[in] zdma zio DMA descriptor from zio_dma_alloc_sg()
* @param[in] page_desc_size the size (in byte) of the dma transfer descriptor
* of the specific hw
* @param[in] fill_desc callback for the driver in order to fill each transfer
* descriptor.
* @return 0 on success, otherwise an error code
*/
int zio_dma_map_sg(struct zio_dma_sgt *zdma, size_t page_desc_size,
int (*fill_desc)(struct zio_dma_sg *zsg))
{
unsigned int i;
int err;
if (unlikely(!zdma || !fill_desc || !page_desc_size))
return -EINVAL;
err = __zio_dma_alloc_pool(zdma, page_desc_size);
if (err)
return err;
/* Configure a DMA transfer for each block */
zdma->page_desc_next = zdma->page_desc_pool;
zdma->page_desc_pool_dma_next = zdma->page_desc_pool_dma;
for (i = 0; i < zdma->n_blocks; ++i) {
err = __zio_dma_map_sg_single(zdma, &zdma->sg_blocks[i],
i, fill_desc);
if (err)
goto out;
}
pr_debug("%s:%d mapped %d blocks\n", __func__, __LINE__, zdma->n_blocks);
return 0;
/* errors */
out:
while (--i)
__zio_dma_unmap_sg_single(zdma, &zdma->sg_blocks[i]);
__zio_dma_free_pool(zdma);
return err;
}
EXPORT_SYMBOL(zio_dma_map_sg);
/**
* It unmaps a sg table
* @param[im] zdma zio DMA descriptor from zio_dma_alloc_sg()
*/
void zio_dma_unmap_sg(struct zio_dma_sgt *zdma)
{
struct zio_blocks_sg *sgb;
size_t size;
int i;
/* release all the mapped areas */
for (i = 0; i < zdma->n_blocks; ++i) {
sgb = &zdma->sg_blocks[i];
size = zdma->page_desc_size * sgb->sgt.nents;
dma_unmap_sg(zdma->hwdev, sgb->sgt.sgl, sgb->sgt.nents,
DMA_FROM_DEVICE);
}
__zio_dma_free_pool(zdma);
}
EXPORT_SYMBOL(zio_dma_unmap_sg);
#else /* LINUX KERNEL < 3.2.0 */
static int zio_calculate_nents(struct zio_blocks_sg *sg_blocks,
unsigned int n_blocks)
{
......@@ -338,7 +680,7 @@ void zio_dma_unmap_sg(struct zio_dma_sgt *zdma)
}
EXPORT_SYMBOL(zio_dma_unmap_sg);
#endif
/**
* Notify zio about a DMA error so it can clean up ZIO structures,
* free all blocks programmed for the DMA and the active_block
......
......@@ -19,9 +19,14 @@
* @dev_mem_off: device memory offset where retrieve data for this block
*/
struct zio_blocks_sg {
struct sg_table sgt;
struct zio_block *block;
unsigned int first_nent;
unsigned long dev_mem_off;
unsigned int offset;
struct page **pages;
unsigned int n_pages;
};
/**
......@@ -42,6 +47,7 @@ struct zio_dma_sgt {
struct sg_table sgt;
size_t page_desc_size;
size_t page_desc_pool_size;
void *page_desc_pool; /**< kmalloc */
void *page_desc_next; /**< next free page */
dma_addr_t page_desc_pool_dma; /**< dma address */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment