Zynq学习笔记——HLS FAST corner导出keypoints(一)

PC平台:WINDOWS 10 64位
Xilinx设计开发套件:Xilinx_vivado_sdk_2015.4
开发板:ZedBoard

之前参照XAPP1167文档,使用HLS Video函数库里的FASTX跑了一下例子,当时的例子是直接把keypoint以mask方式画在了原始视频图像上,应用层并没有获取到keypoint的坐标信息,所以无法开展下一步的图像处理,比如获取keypoint的特征点信息进行图像匹配等,其实HLS FASTX提供了两个函数,一个是返回keypoint的mask图像,另一个是返回keypoint数组,所以如果需要获取到keypoint的坐标信息,必须得使用第二个函数

可以在Vivado HLS安装目录下找到FASTX的源代码,有两个地方,Xilinx\Vivado_HLS\2015.4\include\hls\hls_video_fast.h和Xilinx\Vivado_HLS\2015.4\common\technology\autopilot\hls\hls_video_fast.h
//generate array
template
void FAST_t_opr(
Mat &_src,
Point_ (&_keypoints)[N],
HLS_TNAME(SRC_T) _threshold,
bool _nonmax_supression,
int (&flag)[PSize][2]
)
{
typedef typename pixel_op_type::T INPUT_T;
LineBuffer k_buf;
LineBuffer<2,COLS+KERNEL_SIZE,ap_int<16> > core_buf;
Window<3,3,ap_int<16> > core_win;
Window win;
Scalar s;
int rows= _src.rows;
int cols= _src.cols;
assert(rows <= ROWS);
assert(cols <= COLS);
int kernel_half=KERNEL_SIZE/2;
ap_uint<2> flag_val[PSize+PSize/2+1];
int flag_d[PSize+PSize/2+1];
#pragma HLS ARRAY_PARTITION variable=flag_val dim=0
#pragma HLS ARRAY_PARTITION variable=flag_d dim=0
int index=0;
int offset=KERNEL_SIZE/2;

if(_nonmax_supression)
{
offset=offset+1;
}
loop_height: for(HLS_SIZE_T i=0;i loop_width: for(HLS_SIZE_T j=0;j #pragma HLS LOOP_FLATTEN off
#pragma HLS PIPELINE II=1
if(i for(int r= 0;r for(int c=0;c win.val[r][c]=win.val[r][c+1];//column left shift
}
}
win.val[0][KERNEL_SIZE-1]=k_buf.val[0][j];
for(int buf_row= 1;buf_row< KERNEL_SIZE-1;buf_row++) {
win.val[buf_row][KERNEL_SIZE-1]=k_buf.val[buf_row][j];
k_buf.val[buf_row-1][j]=k_buf.val[buf_row][j];
}
//-------
_src>>s;
win.val[KERNEL_SIZE-1][KERNEL_SIZE-1]=s.val[0];
k_buf.val[KERNEL_SIZE-2][j]=s.val[0];
}
//------core
for(int r= 0;r<3;r++)
{
for(int c=0;c<3-1;c++)
{
core_win.val[r][c]=core_win.val[r][c+1];//column left shift
}
}
core_win.val[0][3-1]=core_buf.val[0][j];
for(int buf_row= 1;buf_row< 3-1;buf_row++)
{
core_win.val[buf_row][3-1]=core_buf.val[buf_row][j];
core_buf.val[buf_row-1][j]=core_buf.val[buf_row][j];
}
int core=0;
//output
if(i>=KERNEL_SIZE-1&&j>=KERNEL_SIZE-1)
{
//process
bool iscorner=fast_judge(win,(INPUT_T)_threshold,flag_val,flag_d,flag,core,_nonmax_supression);
if(iscorner&&!_nonmax_supression)
{
if(index {
_keypoints[index].x=j-offset;
_keypoints[index].y=i-offset;
index++;
}
}
}
if(i>=rows||j>=cols)
{
core=0;
}
if(_nonmax_supression)
{
core_win.val[3-1][3-1]=core;
core_buf.val[3-2][j]=core;
if(i>=KERNEL_SIZE&&j>=KERNEL_SIZE&&core_win.val[1][1]!=0)
{
bool iscorner=fast_nonmax(core_win);
if(iscorner)
{
if(index {
_keypoints[index].x=j-offset;
_keypoints[index].y=i-offset;
index++;
}
}
}
}

}
}
}
template
void FASTX(
Mat &_src,
Point_ (&_keypoints)[N],
HLS_TNAME(SRC_T) _threshold,
bool _nomax_supression
)
{
#pragma HLS INLINE
int flag[16][2]={{3,0},{4,0},{5,1},{6,2},{6,3},{6,4},{5,5},{4,6},
{3,6},{2,6},{1,5},{0,4},{0,3},{0,2},{1,1},{2,0}};
FAST_t_opr<16,7>(_src,_keypoints,_threshold,_nomax_supression,flag);
}

为了方便在PS端应用层获取到keypoint信息,我对FASTX函数进行了小小的修改,取消了Point数据类型,直接使用int数组,数组的第1个元素记录keypoint的总个数,后面就是每个keypoint的坐标信息,int的高16位保存X坐标,低16保存Y坐标,修改后的代码如下,为了不影响原来的函数,可以直接增加以下代码
//generate array
template
void FAST_t_opr(
Mat &_src,
int *_keypoints,
int _len,
HLS_TNAME(SRC_T) _threshold,
bool _nonmax_supression,
int (&flag)[PSize][2]
)
{
typedef typename pixel_op_type::T INPUT_T;
LineBuffer k_buf;
LineBuffer<2,COLS+KERNEL_SIZE,ap_int<16> > core_buf;
Window<3,3,ap_int<16> > core_win;
Window win;
Scalar s;
int rows= _src.rows;
int cols= _src.cols;
assert(rows <= ROWS);
assert(cols <= COLS);
int kernel_half=KERNEL_SIZE/2;
ap_uint<2> flag_val[PSize+PSize/2+1];
int flag_d[PSize+PSize/2+1];
#pragma HLS ARRAY_PARTITION variable=flag_val dim=0
#pragma HLS ARRAY_PARTITION variable=flag_d dim=0

int index = 1;
int offset=KERNEL_SIZE/2;
int location =0;

if(_nonmax_supression)
{
offset=offset+1;
}
loop_height: for(HLS_SIZE_T i=0;i loop_width: for(HLS_SIZE_T j=0;j #pragma HLS LOOP_FLATTEN off
#pragma HLS PIPELINE II=1
if(i for(int r= 0;r for(int c=0;c win.val[r][c]=win.val[r][c+1];//column left shift
}
}
win.val[0][KERNEL_SIZE-1]=k_buf.val[0][j];
for(int buf_row= 1;buf_row< KERNEL_SIZE-1;buf_row++) {
win.val[buf_row][KERNEL_SIZE-1]=k_buf.val[buf_row][j];
k_buf.val[buf_row-1][j]=k_buf.val[buf_row][j];
}
//-------
_src>>s;
win.val[KERNEL_SIZE-1][KERNEL_SIZE-1]=s.val[0];
k_buf.val[KERNEL_SIZE-2][j]=s.val[0];
}
//------core
for(int r= 0;r<3;r++)
{
for(int c=0;c<3-1;c++)
{
core_win.val[r][c]=core_win.val[r][c+1];//column left shift
}
}
core_win.val[0][3-1]=core_buf.val[0][j];
for(int buf_row= 1;buf_row< 3-1;buf_row++)
{
core_win.val[buf_row][3-1]=core_buf.val[buf_row][j];
core_buf.val[buf_row-1][j]=core_buf.val[buf_row][j];
}
int core=0;
//output
if(i>=KERNEL_SIZE-1&&j>=KERNEL_SIZE-1)
{
//process
bool iscorner=fast_judge(win,(INPUT_T)_threshold,flag_val,flag_d,flag,core,_nonmax_supression);
if(iscorner&&!_nonmax_supression)
{
if(index<_len)
{
location = j-offset;
location <<= 16;
location |= i-offset;
_keypoints[index] = location;
index++;
}
}
}
if(i>=rows||j>=cols)
{
core=0;
}
if(_nonmax_supression)
{
core_win.val[3-1][3-1]=core;
core_buf.val[3-2][j]=core;
if(i>=KERNEL_SIZE&&j>=KERNEL_SIZE&&core_win.val[1][1]!=0)
{
bool iscorner=fast_nonmax(core_win);
if(iscorner)
{
if(index<_len)
{
location = j-offset;
location <<= 16;
location |= i-offset;
_keypoints[index] = location;
index++;
}
}
}
}

}
}

_keypoints[0] = (index-1); // keypoints total count
}
template
void FASTX(
Mat &_src,
int *_keypoints,
int _len,
HLS_TNAME(SRC_T) _threshold,
bool _nomax_supression
)
{
#pragma HLS INLINE
int flag[16][2]={{3,0},{4,0},{5,1},{6,2},{6,3},{6,4},{5,5},{4,6},
{3,6},{2,6},{1,5},{0,4},{0,3},{0,2},{1,1},{2,0}};
FAST_t_opr<16,7>(_src,_keypoints,_len,_threshold,_nomax_supression,flag);
}

修改完FAST函数后,把原来的例子进行相应的修改
void hls_fast_corner(AXI_STREAM& INPUT_STREAM, AXI_STREAM& OUTPUT_STREAM, int rows, int cols, int threhold, int keypoints[MAX_KEYPOINTS])
{
#pragma HLS INTERFACE axis port=INPUT_STREAM
#pragma HLS INTERFACE axis port=OUTPUT_STREAM

#pragma HLS INTERFACE s_axilite port=rows bundle=BUS_CTRL
#pragma HLS INTERFACE s_axilite port=cols bundle=BUS_CTRL
#pragma HLS INTERFACE s_axilite port=threhold bundle=BUS_CTRL
#pragma HLS INTERFACE s_axilite port=keypoints bundle=BUS_CTRL

#pragma HLS INTERFACE s_axilite port=return bundle=BUS_CTRL

hls::Mat _src(rows,cols);
hls::Mat _dst(rows,cols);
#pragma HLS dataflow
hls::AXIvideo2Mat(INPUT_STREAM, _src);
hls::Mat src0(rows,cols);
hls::Mat src1(rows,cols);
hls::Mat gray(rows,cols);

hls::Duplicate(_src,src0,src1);

hls::CvtColor(src0,gray);

hls::FASTX(gray,keypoints, MAX_KEYPOINTS, threhold,true);

hls::Mat2AXIvideo(src1, OUTPUT_STREAM);
}

没有了paintmask,不能在图像上直接看到keypoint了,该如何验证呢,可以在testbench上使用cvCircle把keypoint画上去,代码如下:
int main (int argc, char** argv) {

IplImage* src = cvLoadImage(INPUT_IMAGE);
IplImage* dst = cvCreateImage(cvGetSize(src), src->depth, src->nChannels);

AXI_STREAM src_axi, dst_axi;
IplImage2AXIvideo(src, src_axi);

int threhold = 60;
int keypoints[MAX_KEYPOINTS];
hls_fast_corner(src_axi, dst_axi, src->height, src->width, threhold, keypoints);

AXIvideo2IplImage(dst_axi, dst);

int count = keypoints[0];
printf("keypoints count:%d\n", count);
for(int i=1;i {
int x = keypoints[i] >> 16;
int y = (keypoints[i] & 0xFFFF);

cvCircle(dst,cvPoint(x,y),2,CV_RGB(0,0,255),2);
}

cvSaveImage(OUTPUT_IMAGE, dst);

return 0;
}

C 仿真的效果

导出IP后,打开driver目录下的xhls_fast_corner.h,可以看到比之前多了下面这些函数,通过read keypoints函数就可以获取到keypoint信息了
u32 XHls_fast_corner_Get_keypoints_BaseAddress(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_HighAddress(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_TotalBytes(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_BitWidth(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Get_keypoints_Depth(XHls_fast_corner *InstancePtr);
u32 XHls_fast_corner_Write_keypoints_Words(XHls_fast_corner *InstancePtr, int offset, int *data, int length);
u32 XHls_fast_corner_Read_keypoints_Words(XHls_fast_corner *InstancePtr, int offset, int *data, int length);
u32 XHls_fast_corner_Write_keypoints_Bytes(XHls_fast_corner *InstancePtr, int offset, char *data, int length);
u32 XHls_fast_corner_Read_keypoints_Bytes(XHls_fast_corner *InstancePtr, int offset, char *data, int length);

来源:luotong86的专栏
*本文已由作者授权转发,如需转载请联系作者本人获得授权