ZYNQ学习之路——DMA PS(PL330)基础

本文转载自:亦梦云烟微信公众号

1. 什么是DMA
DMA是直接内存访问(Direct Memory Access),DMA引擎可以将数据从一个地方传输到另一个地方,在传输过程中不经过CPU的控制。最简单的DMA用法是将数据从内存的一个区域搬运到另一个区域。DMA也可以将外设的数据(如ADC)搬运到内存中,或者将内存数据搬运到外设中(如DAC)。

Zynq-7000系列器件PS端的DMA控制器采用ARM的IP核DMA-330(PL-330)实现。

开发环境

  • Windows 10 64位
  • Vivado 2018.2
  • XC7Z010-1-CLG400
  • 1.1 结构特点
    DMA控制器具有以下的特点:

    1. 8个独立的通道,4个可用于PL—PS间数据管理,每个通道有1024Byte的MFIFO;
    2. 使用CPU_2x 时钟搬运数据,CPU_2x = (CPU frq/6)*2;
    3. 执行自定义内存区域内的DMA指令运行DMA;
    4. AHB控制寄存器支持安全和非安全模式;
    5. 每个通道内置4字Cache;
    6. 可以访问SoC的以下映射物理地址:

    DDR、OCM、PL、Linear QSPI Read、SMC和M_AXI_GP设备,访问设备的互联结构如图1所示。

    图1 DMA PS结构示意图

    1.2 Zynq 访问互联结构图
    从图1可以看出DMA控制器可以访问连接到Central Interconnect上的所有设备,并提供了四个通道的外设管理接口可用于控制PL的数据搬运。

    Zynq系列器件中DMA控制器采用ARM PL-330 IP和r1p1版,结构框图如图2所示。

    图2 ZYNQ DMA控制器结构示意图

    如图2所示,DMA控制器由指令加速引擎,AXI Master数据接口,AXI APB寄存器访问接口以及可以连接到PL的外设请求接口,数据缓冲FIFO和控制及状态产生单元组成。

    从图2可以看到,DMA PL330的设计思想是:DMA控制器通过DMA指令执行引擎执行自己的指令,并将执行状态通过APB总线和中断等形式反馈给CPU,达到数据搬运不占用CPU的目的。

    DMA控制器共有八个通道,其中四个通道负责互联到Central Interconnectcun存储单元上的数据搬运;四个数据通道为外设请求接口,可用于PL AXI互联接口的数据访问管理。

    每个DMA通道都执行自己的指令,拥有自己的独立线程,通道间互不影响。指令执行引擎有自己独立的Cache线。

    2. 实例测试
    首先构建AXI DMA例程使用的硬件环境,如图3所示,ZYNQ通过GP0端口读取Block RAM数据。

    图3 Block RAM硬件结构

    2.1 测试硬件完整性
    首先使用SDK测试硬件的完成整性,编写如下代码测试BRAM读写情况。

    #include <stdio.h>
    #include "platform.h"
    #include "xil_printf.h"
    #include "xtime_l.h"
    #include "xparameters.h"
     
    void TC_BRAM();
    #define RAM_W	XPAR_AXI_BRAM_CTRL_0_S_AXI_BASEADDR
    #define RAM_R	XPAR_AXI_BRAM_CTRL_1_S_AXI_BASEADDR
     
    int main()
    {
        init_platform();
     
        TC_BRAM();
     
        cleanup_platform();
        return 0;
    }
     
    void TC_BRAM()
    {
    	printf("test for block RAM\n");
    	XTime tb, te;
    	double dt = 0.0;
    	XTime_SetTime(0);
     
     
    	for(int i=0; i<4*1024; i++)
    	{
    		*(int *)(RAM_W+4*i) = i;
    	}
    	XTime_GetTime(&tb);
    	for(int i=0; i<4*1024; i++)
    	{
    		if(*(int *)(RAM_R+4*i) != i)
    		{
    			printf("Test Failed\n");
    			break;
    		}
    	}
     
    	XTime_GetTime(&te);
    	printf("Test pass\n");
    	dt = (te-tb)*1000000/COUNTS_PER_SECOND;
    	printf("%fus\n",dt);
    	printf("test for block RAM end!\n");
    }

    在串口终端中如果没有输出"Test Failed"则说明硬件设计无误。

    2.2 测试内存读取速度
    在使用DMA之前,首先在不使用DMA的情况下测试内存读取的速度。本例程首先写入0~4095,然后全部读取出来。

    指针循环访问:

    void TC_PointerSpeed()
    {
    	XTime tb, te;
    	double dt = 0.0;
    	int a[4*1024];
    	XTime_SetTime(0);
        for(int i=0; i<4*1024; i++)
        {
            *(int *)(RAM_W+4*i) = i;
        }
        XTime_GetTime(&tb);
        for(int i=0; i<4*1024; i++)
        {
            a[i] = *(int *)(RAM_R+4*i);
        }
        XTime_GetTime(&te);
        dt = (te-tb)*1000000/COUNTS_PER_SECOND;
        printf("%fus\n",dt);
    }

    memcpy:

    void TC_MemcpySpeed()
    {
    	XTime tb, te;
    	double dt = 0.0;
    	int a[4*1024];
    	XTime_SetTime(0);
     
        for(int i=0; i<4*1024; i++)
        {
            *(int *)(RAM_W+4*i) = i;
        }
        XTime_GetTime(&tb);
        memcpy(a, (void*)RAM_R, 4*1024*4);
        XTime_GetTime(&te);
        dt = (te-tb)*1000000/COUNTS_PER_SECOND;
        printf("%fus\n",dt);
    }

    速度如下表所示。

    可以看出使用CPU进行的内存复制效率非常低。

    3. DMAPS应用
    3.1 编程模型

    本文不考虑外设请求接口,DMA控制器编程分为以下几个部分:

    1. DMA控制器初始化;
    2. 组织DMA引擎执行代码;
    3. 启动或停止DMA传输;
    4. 异常处理。

    官方例程在Vivado安装路径下:

    Vivado2018.2\SDK\2018.2\data\embeddedsw\XilinxProcessorIPLib\drivers\dmaps_v2_3\examples

    #include
    #include "platform.h"
    #include "xil_printf.h"
    #include "sleep.h"
    #include "xparameters.h"
    #include "xil_types.h"
    #include "xil_assert.h"
    #include "xil_io.h"
    #include "xil_exception.h"
    #include "xil_cache.h"
    #include "xil_printf.h"
    #include "xscugic.h"
    #include "xdmaps.h"

    /************************** Constant Definitions *****************************/
    /*
    * The following constants map to the XPAR parameters created in the
    * xparameters.h file. They are defined here such that a user can easily
    * change all the needed parameters in one place.
    */
    #define DMA_DEVICE_ID XPAR_XDMAPS_1_DEVICE_ID
    #define INTC_DEVICE_ID XPAR_SCUGIC_SINGLE_DEVICE_ID

    #define DMA_DONE_INTR_0 XPAR_XDMAPS_0_DONE_INTR_0
    #define DMA_DONE_INTR_1 XPAR_XDMAPS_0_DONE_INTR_1
    #define DMA_DONE_INTR_2 XPAR_XDMAPS_0_DONE_INTR_2
    #define DMA_DONE_INTR_3 XPAR_XDMAPS_0_DONE_INTR_3
    #define DMA_DONE_INTR_4 XPAR_XDMAPS_0_DONE_INTR_4
    #define DMA_DONE_INTR_5 XPAR_XDMAPS_0_DONE_INTR_5
    #define DMA_DONE_INTR_6 XPAR_XDMAPS_0_DONE_INTR_6
    #define DMA_DONE_INTR_7 XPAR_XDMAPS_0_DONE_INTR_7
    #define DMA_FAULT_INTR XPAR_XDMAPS_0_FAULT_INTR

    #define TEST_ROUNDS 1 /* Number of loops that the Dma transfers run.*/
    #define DMA_LENGTH 1024 /* Length of the Dma Transfers */
    #define TIMEOUT_LIMIT 0x2000 /* Loop count for timeout */

    /************************** Function Prototypes ******************************/

    int XDmaPs_Example_W_Intr(XScuGic *GicPtr, u16 DeviceId);
    int SetupInterruptSystem(XScuGic *GicPtr, XDmaPs *DmaPtr);
    void DmaDoneHandler(unsigned int Channel, XDmaPs_Cmd *DmaCmd,
    void *CallbackRef);

    /************************** Variable Definitions *****************************/
    #ifdef __ICCARM__
    #pragma data_alignment=32
    static int Src[DMA_LENGTH];
    static int Dst[DMA_LENGTH];
    #pragma data_alignment=4
    #else
    static int Src[DMA_LENGTH] __attribute__ ((aligned (32)));
    static int Dst[DMA_LENGTH] __attribute__ ((aligned (32)));
    #endif

    XDmaPs DmaInstance;
    #ifndef TESTAPP_GEN
    XScuGic GicInstance;
    #endif

    #ifndef TESTAPP_GEN
    int main(void)
    {
    int Status;

    Status = XDmaPs_Example_W_Intr(&GicInstance,DMA_DEVICE_ID);
    if (Status != XST_SUCCESS) {
    xil_printf("Error: XDMaPs_Example_W_Intr failed\r\n");
    return XST_FAILURE;
    }

    xil_printf("Successfully ran XDMaPs_Example_W_Intr\r\n");
    return XST_SUCCESS;

    }
    #endif

    /*****************************************************************************/
    /**
    *
    * Interrupt Example to test the DMA.
    *
    * @param DeviceId is the Device ID of the DMA controller.
    *
    * @return XST_SUCCESS to indicate success, otherwise XST_FAILURE.
    *
    * @note None.
    *
    ****************************************************************************/
    int XDmaPs_Example_W_Intr(XScuGic *GicPtr, u16 DeviceId)
    {
    int Index;
    unsigned int Channel = 0;
    int Status;
    int TestStatus;
    int TestRound;
    int TimeOutCnt;
    volatile int Checked[XDMAPS_CHANNELS_PER_DEV];
    XDmaPs_Config *DmaCfg;
    XDmaPs *DmaInst = &DmaInstance;
    XDmaPs_Cmd DmaCmd;

    memset(&DmaCmd, 0, sizeof(XDmaPs_Cmd));

    DmaCmd.ChanCtrl.SrcBurstSize = 4;
    DmaCmd.ChanCtrl.SrcBurstLen = 4;
    DmaCmd.ChanCtrl.SrcInc = 1;
    DmaCmd.ChanCtrl.DstBurstSize = 4;
    DmaCmd.ChanCtrl.DstBurstLen = 4;
    DmaCmd.ChanCtrl.DstInc = 1;
    DmaCmd.BD.SrcAddr = (u32) Src;
    DmaCmd.BD.DstAddr = (u32) Dst;
    DmaCmd.BD.Length = DMA_LENGTH * sizeof(int);

    /*
    * Initialize the DMA Driver
    */
    DmaCfg = XDmaPs_LookupConfig(DeviceId);
    if (DmaCfg == NULL) {
    return XST_FAILURE;
    }

    Status = XDmaPs_CfgInitialize(DmaInst,
    DmaCfg,
    DmaCfg->BaseAddress);
    if (Status != XST_SUCCESS) {
    return XST_FAILURE;
    }

    /*
    * Setup the interrupt system.
    */
    Status = SetupInterruptSystem(GicPtr, DmaInst);
    if (Status != XST_SUCCESS) {
    return XST_FAILURE;
    }

    TestStatus = XST_SUCCESS;

    for (TestRound = 0; TestRound < TEST_ROUNDS; TestRound++) {
    xil_printf("Test round %d\r\n", TestRound);
    for (Channel = 0;
    Channel < XDMAPS_CHANNELS_PER_DEV;
    Channel++) {

    /* Initialize source */
    for (Index = 0; Index < DMA_LENGTH; Index++)
    Src[Index] = DMA_LENGTH - Index;

    /* Clear destination */
    for (Index = 0; Index < DMA_LENGTH; Index++)
    Dst[Index] = 0;

    Checked[Channel] = 0;

    /* Set the Done interrupt handler */
    XDmaPs_SetDoneHandler(DmaInst,
    Channel,
    DmaDoneHandler,
    (void *)Checked);

    Status = XDmaPs_Start(DmaInst, Channel, &DmaCmd, 0);
    if (Status != XST_SUCCESS) {
    return XST_FAILURE;
    }

    TimeOutCnt = 0;

    /* Now the DMA is done */
    while (!Checked[Channel]
    && TimeOutCnt < TIMEOUT_LIMIT) {
    TimeOutCnt++;
    }

    if (TimeOutCnt >= TIMEOUT_LIMIT) {
    TestStatus = XST_FAILURE;
    }

    if (Checked[Channel] < 0) {
    /* DMA controller failed */
    TestStatus = XST_FAILURE;
    }
    }
    }

    return TestStatus;

    }

    /******************************************************************************/
    /**
    *
    * This function connects the interrupt handler of the interrupt controller to
    * the processor. This function is seperate to allow it to be customized for
    * each application. Each processor or RTOS may require unique processing to
    * connect the interrupt handler.
    *
    * @param GicPtr is the GIC instance pointer.
    * @param DmaPtr is the DMA instance pointer.
    *
    * @return None.
    *
    * @note None.
    *
    ****************************************************************************/
    int SetupInterruptSystem(XScuGic *GicPtr, XDmaPs *DmaPtr)
    {
    int Status;
    #ifndef TESTAPP_GEN
    XScuGic_Config *GicConfig;

    Xil_ExceptionInit();

    /*
    * Initialize the interrupt controller driver so that it is ready to
    * use.
    */
    GicConfig = XScuGic_LookupConfig(INTC_DEVICE_ID);
    if (NULL == GicConfig) {
    return XST_FAILURE;
    }

    Status = XScuGic_CfgInitialize(GicPtr, GicConfig,
    GicConfig->CpuBaseAddress);
    if (Status != XST_SUCCESS) {
    return XST_FAILURE;
    }

    /*
    * Connect the interrupt controller interrupt handler to the hardware
    * interrupt handling logic in the processor.
    */
    Xil_ExceptionRegisterHandler(XIL_EXCEPTION_ID_IRQ_INT,
    (Xil_ExceptionHandler)XScuGic_InterruptHandler,
    GicPtr);
    #endif
    /*
    * Connect the device driver handlers that will be called when an interrupt
    * for the device occurs, the device driver handler performs the specific
    * interrupt processing for the device
    */

    /*
    * Connect the Fault ISR
    */
    Status = XScuGic_Connect(GicPtr,
    DMA_FAULT_INTR,
    (Xil_InterruptHandler)XDmaPs_FaultISR,
    (void *)DmaPtr);
    if (Status != XST_SUCCESS) {
    return XST_FAILURE;
    }

    /*
    * Connect the Done ISR for all 8 channels of DMA 0
    */
    Status = XScuGic_Connect(GicPtr,
    DMA_DONE_INTR_0,
    (Xil_InterruptHandler)XDmaPs_DoneISR_0,
    (void *)DmaPtr);
    Status |= XScuGic_Connect(GicPtr,
    DMA_DONE_INTR_1,
    (Xil_InterruptHandler)XDmaPs_DoneISR_1,
    (void *)DmaPtr);
    Status |= XScuGic_Connect(GicPtr,
    DMA_DONE_INTR_2,
    (Xil_InterruptHandler)XDmaPs_DoneISR_2,
    (void *)DmaPtr);
    Status |= XScuGic_Connect(GicPtr,
    DMA_DONE_INTR_3,
    (Xil_InterruptHandler)XDmaPs_DoneISR_3,
    (void *)DmaPtr);
    Status |= XScuGic_Connect(GicPtr,
    DMA_DONE_INTR_4,
    (Xil_InterruptHandler)XDmaPs_DoneISR_4,
    (void *)DmaPtr);
    Status |= XScuGic_Connect(GicPtr,
    DMA_DONE_INTR_5,
    (Xil_InterruptHandler)XDmaPs_DoneISR_5,
    (void *)DmaPtr);
    Status |= XScuGic_Connect(GicPtr,
    DMA_DONE_INTR_6,
    (Xil_InterruptHandler)XDmaPs_DoneISR_6,
    (void *)DmaPtr);
    Status |= XScuGic_Connect(GicPtr,
    DMA_DONE_INTR_7,
    (Xil_InterruptHandler)XDmaPs_DoneISR_7,
    (void *)DmaPtr);

    if (Status != XST_SUCCESS)
    return XST_FAILURE;

    /*
    * Enable the interrupts for the device
    */
    XScuGic_Enable(GicPtr, DMA_DONE_INTR_0);
    XScuGic_Enable(GicPtr, DMA_DONE_INTR_1);
    XScuGic_Enable(GicPtr, DMA_DONE_INTR_2);
    XScuGic_Enable(GicPtr, DMA_DONE_INTR_3);
    XScuGic_Enable(GicPtr, DMA_DONE_INTR_4);
    XScuGic_Enable(GicPtr, DMA_DONE_INTR_5);
    XScuGic_Enable(GicPtr, DMA_DONE_INTR_6);
    XScuGic_Enable(GicPtr, DMA_DONE_INTR_7);
    XScuGic_Enable(GicPtr, DMA_FAULT_INTR);

    Xil_ExceptionEnable();
    return XST_SUCCESS;
    }

    /*****************************************************************************/
    /**
    *
    * DmaDoneHandler.
    *
    * @param Channel is the Channel number.
    * @param DmaCmd is the Dma Command.
    * @param CallbackRef is the callback reference data.
    *
    * @return None.
    *
    * @note None.
    *
    ******************************************************************************/
    void DmaDoneHandler(unsigned int Channel, XDmaPs_Cmd *DmaCmd, void *CallbackRef)
    {

    /* done handler */
    volatile int *Checked = (volatile int *)CallbackRef;
    int Index;
    int Status = 1;
    int *Src;
    int *Dst;

    Src = (int *)DmaCmd->BD.SrcAddr;
    Dst = (int *)DmaCmd->BD.DstAddr;

    /* DMA successful */
    /* compare the src and dst buffer */
    for (Index = 0; Index < DMA_LENGTH; Index++) {
    if ((Src[Index] != Dst[Index]) ||
    (Dst[Index] != DMA_LENGTH - Index)) {
    Status = -XST_FAILURE;
    }
    }

    Checked[Channel] = Status;
    }

    3.2 修改DMA PS
    修改DMA配置,使其将PL中的数据传输到内存中。

    修改DMA的源地址:
    DmaCmd.BD.SrcAddr = (u32) RAM_R;

    测量DMA传输16KB数据,时间约为180us,远远高于memcpy。

    4. Linux DMA驱动
    4.1 编程方法
    配置DMA
    void dma_init(u32 s, int size)
    {
    dma_cap_mask_t mask;
    //alloc 512B src memory and dst memory
    dma_src = s;
    printk(KERN_INFO "dma_src = 0x%x\n",src);
    //src = dma_alloc_coherent(NULL, MM_SIZE, &dma_src, GFP_KERNEL);
    dst = dma_alloc_coherent(NULL, size, &dma_dst, GFP_KERNEL);
    printk(KERN_INFO "dst = 0x%x, dma_dst = 0x%x\n",dst, dma_dst);

    dma_cap_zero(mask);
    dma_cap_set(DMA_MEMCPY, mask);//direction:memory to memory
    chan = dma_request_channel(mask,NULL,NULL); //request a dma channel
    printk(KERN_INFO "dma channel id = %d\n",chan->chan_id);

    flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
    dev = chan->device;
    }

    释放DMA
    void dma_del(void)
    {
    //free memory and dma channel
    dma_free_coherent(NULL, MM_SIZE, dst, &dma_dst);
    dma_release_channel(chan);
    }

    向DMA引擎发起一个传输请求
    void dma_read(u32 dst,u32 src,int size)
    {
    //alloc a desc,and set dst_addr,src_addr,data_size.
    /*获取时间*/
    do_gettimeofday(&tb);
    tx = dev->device_prep_dma_memcpy(chan, dst, src, size, flags);
    if (!tx){
    printk(KERN_INFO "Failed to prepare DMA memcpy");
    }

    tx->callback = dma_callback_func;//set call back function
    tx->callback_param = NULL;
    cookie = tx->tx_submit(tx); //submit the desc
    if (dma_submit_error(cookie)){
    printk(KERN_INFO "Failed to do DMA tx_submit");
    }
    dma_async_issue_pending(chan);//begin dma transfer
    }

    4.2 实例代码
    将Block RAM中的数据先使用ioremap映射的地址src,写入一些字符,然后使用DMA从Block RAM中传输16KB数据到分配的内存dst中。传输完成后调用dma_callback_func函数,在该函数中比较传输的数据和发送的数据是否相同,并测量DMA消耗的时间。
    #include
    #include
    #include
    #include
    #include
    #include
    #include
    #include
    #include
    #include
    #include
    #include
    #include
    #include

    #include
    #include

    #define DEVICE_NAME "dma_driver"
    #define ImageReadAddress0 0x40000000
    volatile unsigned int *CaptureReadAddr0;

    struct timeval tb, te;

    #define MM_SIZE (1440*10)

    void dma_callback_func(void *dma_async_param);
    void dma_read(u32 dma_dst,u32 dma_src,int size);
    void dma_init(u32 s, int size);
    void dma_del(void);

    struct dma_chan *chan;
    //bus address
    dma_addr_t dma_src;
    dma_addr_t dma_dst;
    //virtual address
    char *src = NULL;
    char *dst = NULL ;
    struct dma_device *dev;
    struct dma_async_tx_descriptor *tx = NULL;
    enum dma_ctrl_flags flags;
    dma_cookie_t cookie;

    //When dma transfer finished,this function will be called.
    void dma_callback_func(void *dma_async_param)
    {
    int i=0;
    do_gettimeofday(&te);
    printk("DMA\n");
    printk("T:%ld, %ld\n", tb.tv_sec, tb.tv_usec);
    printk("T2:%ld, %ld\n", te.tv_sec, te.tv_usec);
    printk(KERN_ALERT "time use:%ld, %ld\n",
    (te.tv_sec-tb.tv_sec),
    (te.tv_usec-tb.tv_usec));

    printk("memcpy\n");
    do_gettimeofday(&tb);
    memcpy(dst ,src, MM_SIZE);
    do_gettimeofday(&te);
    printk("T:%ld, %ld\n", tb.tv_sec, tb.tv_usec);
    printk("T2:%ld, %ld\n", te.tv_sec, te.tv_usec);
    printk(KERN_ALERT "time use:%ld, %ld\n",
    (te.tv_sec-tb.tv_sec),
    (te.tv_usec-tb.tv_usec));

    printk("DMA transfer finished!\n\r");
    for(i=0; i {
    if(*(dst + i) != (char)('a' + i%26))
    {
    printk("Failed\n");
    return;
    }
    }
    printk("PASS\n");
    }

    void dma_read(u32 dst,u32 src,int size)
    {
    //alloc a desc,and set dst_addr,src_addr,data_size.
    /*获取时间*/
    do_gettimeofday(&tb);
    tx = dev->device_prep_dma_memcpy(chan, dst, src, size, flags);
    if (!tx){
    printk(KERN_INFO "Failed to prepare DMA memcpy");
    }

    tx->callback = dma_callback_func;//set call back function
    tx->callback_param = NULL;
    cookie = tx->tx_submit(tx); //submit the desc
    if (dma_submit_error(cookie)){
    printk(KERN_INFO "Failed to do DMA tx_submit");
    }
    dma_async_issue_pending(chan);//begin dma transfer
    }

    void dma_init(u32 s, int size)
    {
    dma_cap_mask_t mask;
    //alloc 512B src memory and dst memory
    dma_src = s;
    printk(KERN_INFO "dma_src = 0x%x\n",src);
    //src = dma_alloc_coherent(NULL, MM_SIZE, &dma_src, GFP_KERNEL);
    dst = dma_alloc_coherent(NULL, size, &dma_dst, GFP_KERNEL);
    printk(KERN_INFO "dst = 0x%x, dma_dst = 0x%x\n",dst, dma_dst);

    dma_cap_zero(mask);
    dma_cap_set(DMA_SLAVE, mask);//direction:memory to memory
    chan = dma_request_channel(mask,NULL,NULL); //request a dma channel
    printk(KERN_INFO "dma channel id = %d\n",chan->chan_id);

    flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
    dev = chan->device;
    }

    void dma_del(void)
    {
    //free memory and dma channel
    dma_free_coherent(NULL, MM_SIZE, dst, &dma_dst);
    dma_release_channel(chan);
    }

    static int device_open(struct inode *inode, struct file *file)
    {
    return 0;
    }

    static int device_close(struct inode *indoe, struct file *file)
    {
    printk("device close\n");
    return 0;
    }

    static ssize_t device_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
    {
    int ret = 0;

    dma_read(dma_dst, dma_src, MM_SIZE);

    return ret;
    }

    static struct file_operations device_fops =
    {
    .owner = THIS_MODULE,
    .open = device_open,
    .release = device_close,
    .read = device_read,
    };

    static struct miscdevice MMAP_misc =
    {
    .minor = MISC_DYNAMIC_MINOR,
    .name = DEVICE_NAME,
    .fops = &device_fops,
    };

    static int __init char_device_init( void )
    {
    int ret=0;
    int i = 0;
    printk("init module\n");
    ret = misc_register(&MMAP_misc);
    if(ret)
    {
    printk("Error:misc_register failed!\n");
    return 0;
    }

    CaptureReadAddr0 = (volatile unsigned int*)ioremap(ImageReadAddress0, 1440*10);

    printk("init module\n");
    dma_init(ImageReadAddress0, MM_SIZE);

    src = (char*)CaptureReadAddr0;
    for (i = 0; i < MM_SIZE; i++){
    *(src + i) = (char)('a' + i%26);
    }

    return 0;
    }

    static void __exit char_device_exit( void )
    {
    printk(KERN_ALERT"module exit\n");
    misc_deregister(&MMAP_misc);

    iounmap(CaptureReadAddr0);
    dma_del();
    }

    MODULE_LICENSE("GPL");
    MODULE_AUTHOR("DMA_test");

    module_init(char_device_init);//模块加载
    module_exit(char_device_exit);//模块退出

    使用DMA搬运和memcpy搬运PL中的数据速度对比如下:
    Z-turn# ./test
    Test for dma
    DMA
    T:34, 358179
    T2:34, 358290
    time use:0, 111
    memcpy
    T:34, 364372
    T2:34, 364796
    time use:0, 424
    DMA transfer finished!
    PASS

    DMA搬运消耗了111us,而memcpy需要使用424us,可见DMA速度远高于CPU对数据的搬运。

    最新文章

    最新文章