Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
I
Image processing core library
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
image/svg+xml
Discourse
Discourse
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Projects
Image processing core library
Commits
059e050a
Commit
059e050a
authored
Feb 11, 2014
by
Francisco Barranco
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Uploading attention core v0.1
parent
e86681b9
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
5163 additions
and
2 deletions
+5163
-2
GaborPrimitives.hcc
attention/attention_v0.1/GaborPrimitives.hcc
+1543
-0
GaborPrimitives.hch
attention/attention_v0.1/GaborPrimitives.hch
+114
-0
README
attention/attention_v0.1/README
+0
-0
channels.hcc
attention/attention_v0.1/channels.hcc
+166
-0
channels.hch
attention/attention_v0.1/channels.hch
+53
-0
cores.hcc
attention/attention_v0.1/cores.hcc
+115
-0
cores.hch
attention/attention_v0.1/cores.hch
+20
-0
generic.hcc
attention/attention_v0.1/generic.hcc
+39
-0
generic.hch
attention/attention_v0.1/generic.hch
+33
-0
lklib.hcc
attention/attention_v0.1/lklib.hcc
+592
-0
lklib.hch
attention/attention_v0.1/lklib.hch
+37
-0
main.hcc
attention/attention_v0.1/main.hcc
+326
-0
opticflow.hcc
attention/attention_v0.1/opticflow.hcc
+2032
-0
opticflow.hch
attention/attention_v0.1/opticflow.hch
+67
-0
parameters.hch
attention/attention_v0.1/parameters.hch
+20
-0
cores.hcc
optical_flow/gradient_based_method/flow_v0.1/cores.hcc
+1
-1
cores.hch
optical_flow/gradient_based_method/flow_v0.1/cores.hch
+1
-1
lklib.hcc
optical_flow/gradient_based_method/flow_v0.1/lklib.hcc
+4
-0
No files found.
attention/attention_v0.1/GaborPrimitives.hcc
0 → 100644
View file @
059e050a
//********************************************************************
//
// Programmed by Javier Díaz, DRIVSCO project
// Granada, October 2009, version 1.0
//
// Note: Francisco Barranco added some changes to this file. The functions
// added have been documented. The rest has been used as they are.
//********************************************************************
#include "GaborPrimitives.hch"
/*
// Gabor filters kernels coefficients for the 14 bits (normalization to 2^14
// **************************************************************************
macro expr NORMY= 16384; //exp2(KERN_BITS);
macro expr NORMX= NORMY/2; // one bit more precision
// FILTERS AND NORM COMPUTED BY KERNEL COEFICIENTS SUM = 1 --> this allow division by power of 2
macro expr Gab1={ 243 , 601 , 1214 , 1989 , 2671 , 2948 }; // sym
macro expr Gab2={ -12 , 590 , -23 , -2012 , -23 , 2960 }; // sym
macro expr Gab3={ -254 , 0 , 1214 , 0 , -2682 , 0 }; // antisym
macro expr Gab4={ 127 , -220 , -1237 , -1260 , 1133 , 2914 }; // sym
macro expr Gab5={ 162 , 590 , 231 , -1584 , -2393 , 0 }; // antisym
macro expr Gab6={ 127 , 520 , -439 , -1966 , 289 , 2938 }; // sym
macro expr Gab7={ -208 , 289 , 1145 , -474 , -2659 , 0 }; // antisym
macro expr Gab8={ -254 , -451 , -289 , 717 , 2197 , 2948 }; // sym
macro expr Gab9={ -23 , -405 , -1179 , -1862 , -1526 , 0 }; // antisym */
// Gabor filters kernels coefficients for the 12 bits (normalization to 2^12)
// **************************************************************************
macro expr NORMY= 4096/2; //exp2(KERN_BITS); // one bit more precision
macro expr NORMX= 4096/2;
macro expr Gab1={ 61, 150, 304, 497, 668, 736}; // changed 737 for the norm
macro expr Gab2={ -6, 147, -6, -502, -3, 740}; // changed -503 for the DC component
macro expr Gab3={ -64, 0, 304, 0, -671, 0}; // antisym
macro expr Gab4={ 32, -55, -309, -315, 283, 728}; // sym
macro expr Gab5={ 40, 147, 58, -396, -598, 0}; // antisym
macro expr Gab6={ 32, 130, -110, -491, 72, 734}; // sym
macro expr Gab7={ -52, 72, 286, -119, -665, 0}; // antisym
macro expr Gab8={ -64, -113, -72, 179, 549, 737}; // sym
macro expr Gab9={ -6, -101, -295, -465, -382, 0}; // antisym
// Recursive vector addition with ballanced tree
// ************************************************************************************
macro expr SumMacro(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom,Extend) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom, adjs(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1,Extend) + RecurseAddAux(Array, Middle, Bottom,Extend));
in
RecurseAddAux(Array, Index, begin,Extend);
// Recursive vector addition with ballanced tree for unsigned
//************************************************************************************
macro expr UnSumMacro2(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom,Extend) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom, adju(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1,Extend) + RecurseAddAux(Array, Middle, Bottom,Extend));
in
RecurseAddAux(Array, Index, begin,Extend);
// Generic convolution kernel multiplication
// *******************************************************************************/
macro proc GenKernel_Gabor(buffer,Out,mask,norm, Symmetry)
{
macro expr Retiming=7; // 8 no / 9 no
macro expr PipeLatency=3+Retiming-1;
macro expr DataWidth=(width(buffer[0])+KERN_BITS); // Norm needs KERN_BITS + 1 (sign) + 8 (from 256 gray levels)
const signed KERN_BITS kernel[6]=mask;
signed DataWidth Register[6];
signed (DataWidth) aux0;
signed (width(Out)) aux[Retiming];
par
{
ifselect (Symmetry==1)
{
par
{
xilinxmult(Register[0], (adjs(buffer[0],(width(buffer[0])+1)) + adjs(buffer[10],(width(buffer[0])+1))), kernel[0]);
xilinxmult(Register[1], (adjs(buffer[1],(width(buffer[0])+1)) + adjs(buffer[9],(width(buffer[0])+1))), kernel[1]);
xilinxmult(Register[2], (adjs(buffer[2],(width(buffer[0])+1)) + adjs(buffer[8],(width(buffer[0])+1))), kernel[2]);
xilinxmult(Register[3], (adjs(buffer[3],(width(buffer[0])+1)) + adjs(buffer[7],(width(buffer[0])+1))), kernel[3]);
xilinxmult(Register[4], (adjs(buffer[4],(width(buffer[0])+1)) + adjs(buffer[6],(width(buffer[0])+1))), kernel[4]);
xilinxmult(Register[5], (adjs(buffer[5],(width(buffer[0])+1)) + 0 ), kernel[5]);
}
}
else // antisymmetric kernel
{
par
{
xilinxmult(Register[0], (adjs(buffer[10],(width(buffer[0])+1)) - adjs(buffer[0],(width(buffer[0])+1))), kernel[0]);
xilinxmult(Register[1], (adjs(buffer[9],(width(buffer[0])+1)) - adjs(buffer[1],(width(buffer[0])+1))), kernel[1]);
xilinxmult(Register[2], (adjs(buffer[8],(width(buffer[0])+1)) - adjs(buffer[2],(width(buffer[0])+1))), kernel[2]);
xilinxmult(Register[3], (adjs(buffer[7],(width(buffer[0])+1)) - adjs(buffer[3],(width(buffer[0])+1))), kernel[3]);
xilinxmult(Register[4], (adjs(buffer[6],(width(buffer[0])+1)) - adjs(buffer[4],(width(buffer[0])+1))), kernel[4]);
Register[5]= 0;
}
} // end symemtry
//aux0= adjs(Register[0],width(aux0)) + adjs(Register[1],width(aux0)) + adjs(Register[2],width(aux0))+ adjs(Register[3],width(aux0))+ adjs(Register[4],width(aux0))+ adjs(Register[5],width(aux0));
aux0= SumMacro(Register, 0, 5, width(aux0));
// CAREFULL, NORM/2 NEVER SHOULD OVERFLOW DATA!!!
/* if (aux0>=0)
aux[0]= ((aux0+norm/2)/norm)<-(width(Out)); //
else
aux[0]= ((aux0-norm/2)/norm)<-(width(Out)); // */
aux[0]= ((aux0)/norm)<-(width(Out)); // */
//aux[0]= (aux0>>11)<-(width(Out));
par(i=1;i<(Retiming);i++)
{
aux[i]=aux[i-1];
}
Out= aux[Retiming-1];
//Out=adjs((adjs(buffer[2],11)*kernel[0]),width(Out));
} // End main par
}
// ************************************************************************************
// GENERIC X-Y SEPARABLE CONVOLUTION --> TESTEADA!!!
// ************************************************************************************
macro proc GenericConvolution(Input, Output, X_FIR, Y_FIR, NTaps, NTapsMinus1, ColumnLength,normx, normy, Sx,Sy)
{
macro expr PipeLatency= 2 + 1 +1 + 4*2; // 2 from main, 1 input, 1 output, 4*2 kernels
macro expr Retiming=1; // Retiming value = Retiming-1
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[NTapsMinus1] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
unsigned (log2ceil((MAX_RES_X/SCALE))) col, colbis;
signed (width(Input)) DataArrayX[NTaps], DataArrayY[NTaps], aux[Retiming] ;
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayX[NTaps-1]=Input;
/// Shift X data through array
par (i = 0; i != (NTaps-1); i++) // NOTE WIDTH(i)=LOG2CEIL(NTaps)
{
DataArrayX[i] = DataArrayX[i+1];
}
GenKernel_Gabor(DataArrayX,DataArrayY[NTapsMinus1],X_FIR,normx,Sx);
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Read data into array every cycle
par(r1=0;r1!=NTapsMinus1;r1++)
{
// Fill data through array
DataArrayY[r1] = readRAM(r1<-(log2ceil(NTapsMinus1)),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=NTapsMinus1;r2++)
{
writeRAM(r2<-(log2ceil(NTapsMinus1)),colbis,DataArrayY[r2+1]);
}
GenKernel_Gabor(DataArrayY,Output,Y_FIR,normy,Sy) ;
/* Y_FIR(DataArrayY,aux[0]) ;
par(i=1;i<(Retiming);i++)
{
aux[i]=aux[i-1];
}
Output= aux[Retiming-1];*/
} // End Global par
}
void GenKernel_Gabor_Gab0[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab1,NORMY, 1);
}
void GenKernel_Gabor_Gab1[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab2,NORMY, 1);
}
void GenKernel_Gabor_Gab2[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab3,NORMY, -1);
}
void GenKernel_Gabor_Gab3[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab4,NORMY, 1);
}
void GenKernel_Gabor_Gab4[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab5,NORMY, -1);
}
void GenKernel_Gabor_Gab5[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab6,NORMY, 1);
}
void GenKernel_Gabor_Gab6[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab7,NORMY, -1);
}
void GenKernel_Gabor_Gab7[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab8,NORMY, 1);
}
void GenKernel_Gabor_Gab8[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab9,NORMY, -1);
}
// Y CONVOLUTION FILTERS
// ************************************************************************************
macro proc GaborY(Input, FNY, NTaps, NTapsMinus1, ColumnLength)
{
macro expr PipeLatency= 2 + 1 +1 + 4*2; // 2 from main, 1 input, 1 output, 4*2 kernels
macro expr Retiming=1; // Retiming value = Retiming-1
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[NTapsMinus1] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
static unsigned (log2ceil((MAX_RES_X/SCALE))) col=((MAX_RES_X/SCALE)+1 -2-1-4), colbis=((MAX_RES_X/SCALE) -2-1-8);
signed (width(Input)) DataArrayY[NTaps];//, DataArray1[NTaps],DataArray2[NTaps],aux[Retiming] ;
signal signed GHaux;
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayY[NTapsMinus1]=Input;
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
par
{
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
}
// Read data into array every cycle
par(r1=0;r1!=(NTaps-1);r1++) // NOTE WIDTH(i)=LOG2CEIL(NTaps)
{
// Fill data through array
DataArrayY[r1] = readRAM(r1<-(log2ceil(NTapsMinus1)),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=(NTaps-1);r2++)
{
writeRAM(r2<-(log2ceil(NTapsMinus1)),colbis,DataArrayY[r2+1]);
}
GenKernel_Gabor(DataArrayY,(FNY[0]),Gab1,NORMY, 1);
GenKernel_Gabor(DataArrayY,(FNY[1]),Gab2,NORMY, 1);
GenKernel_Gabor(DataArrayY,(FNY[2]),Gab3,NORMY, -1);
GenKernel_Gabor(DataArrayY,(FNY[3]),Gab4,NORMY, 1);
GenKernel_Gabor(DataArrayY,(FNY[4]),Gab5,NORMY, -1);
GenKernel_Gabor(DataArrayY,(FNY[5]),Gab6,NORMY, 1);
GenKernel_Gabor(DataArrayY,(FNY[6]),Gab7,NORMY, -1);
GenKernel_Gabor(DataArrayY,(FNY[7]),Gab8,NORMY, 1);
GenKernel_Gabor(DataArrayY,(FNY[8]),Gab9,NORMY, -1);
} // End Global par
}
// GABOR FILTERS BASE SET
// ************************************************************************************
// NO SHARING
// ************************************************************************************
macro proc GenericConvolutionX(Input, Output, X_FIR, NTaps,norm, sym)
{
signed (width(Input)) DataArrayX[NTaps];
// Macro Begin
// ----------------------------------------------------
//assert (1 == 24, 0, "Application requires %d",log2ceil(8));
par
{
// Read data into array every cycle
DataArrayX[NTaps-1]=Input;
// Shift X data through array
par (i = 0; i != (NTaps-1); i++) // NOTE WIDTH(i)=LOG2CEIL(NTaps)
{
DataArrayX[i] = DataArrayX[i+1];
}
GenKernel_Gabor(DataArrayX,Output,X_FIR,norm,sym) ;
}
}
macro proc GaborBase(DataIn, FNYNX,Columns)
{
macro expr NTAPS=11;
signed CONV_BITS FNY[9];
/*
FNYNX[0]= F1Y2X
FNYNX[1]= F1Y3X
FNYNX[2]= F2Y1X
FNYNX[3]= F3Y1X
FNYNX[4]= F4YF4X
FNYNX[5]= F5YF5X
FNYNX[6]= F5YF4X
FNYNX[7]= F4YF5X
FNYNX[8]= F8YF6X
FNYNX[9]= F9YF7X
FNYNX[10]=F9YF6X
FNYNX[11]=F8YF7X
FNYNX[12]=F6YF8X
FNYNX[13]=F7YF9X
FNYNX[14]=F7YF8X
FNYNX[15]=F6YF9X
*/
par // ELIMINAR _INDEX!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
{
//Y-convolution
GaborY(DataIn, FNY, NTAPS, (NTAPS-1), Columns);
//X-convolutions
GenericConvolutionX(FNY[0], FNYNX[0] , Gab2, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[0], FNYNX[1] , Gab3, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[1], FNYNX[2] , Gab1, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[2], FNYNX[3] , Gab1, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[3], FNYNX[4] , Gab4, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[4], FNYNX[5] , Gab5, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[4], FNYNX[6] , Gab4, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[3], FNYNX[7] , Gab5, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[7], FNYNX[8] , Gab6, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[8], FNYNX[9] , Gab7, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[8], FNYNX[10], Gab6, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[7], FNYNX[11], Gab7, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[5], FNYNX[12], Gab8, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[6], FNYNX[13], Gab9, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[6], FNYNX[14], Gab8, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[5], FNYNX[15], Gab9, NTAPS, (NORMX), -1);
}
}
// SHARING
// ************************************************************************************
macro proc GenericConvolutionX_index(Input, Output, X_FIR, NTaps,norm, sym, sharerProcesses, index)
{
static signed (width(Input)) DataArrayX[sharerProcesses][NTaps];
// Macro Begin
// ----------------------------------------------------
//assert (1 == 24, 0, "Application requires %d",log2ceil(8));
par
{
// Read data into array every cycle
DataArrayX[index][NTaps-1]=Input;
// Shift X data through array
par (i = 0; i != (NTaps-1); i++) // NOTE WIDTH(i)=LOG2CEIL(NTaps)
{
DataArrayX[index][i] = DataArrayX[index][i+1];
}
GenKernel_Gabor(DataArrayX[index],Output,X_FIR,norm,sym);
//Output=DataArrayX[0];
}
}
void functionGenericConvolutionX_index_stereo(signed CONV_BITS *FNY, signed CONV_BITS *FNYNX, unsigned DISPARITY_INDEX_BITS index)
{
macro expr NTAPS=11;
/*
FNYNX[0]= F1Y2X
FNYNX[1]= F1Y3X
FNYNX[2]= F2Y1X
FNYNX[3]= F3Y1X
FNYNX[4]= F4YF4X
FNYNX[5]= F5YF5X
FNYNX[6]= F5YF4X
FNYNX[7]= F4YF5X
FNYNX[8]= F8YF6X
FNYNX[9]= F9YF7X
FNYNX[10]=F9YF6X
FNYNX[11]=F8YF7X
FNYNX[12]=F6YF8X
FNYNX[13]=F7YF9X
FNYNX[14]=F7YF8X
FNYNX[15]=F6YF9X
*/
par
{
GenericConvolutionX_index(FNY[0], FNYNX[0] , Gab2, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[0], FNYNX[1] , Gab3, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[1], FNYNX[2] , Gab1, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[2], FNYNX[3] , Gab1, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[3], FNYNX[4] , Gab4, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[4], FNYNX[5] , Gab5, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[4], FNYNX[6] , Gab4, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[3], FNYNX[7] , Gab5, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[7], FNYNX[8] , Gab6, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[8], FNYNX[9] , Gab7, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[8], FNYNX[10], Gab6, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[7], FNYNX[11], Gab7, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[5], FNYNX[12], Gab8, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[6], FNYNX[13], Gab9, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[6], FNYNX[14], Gab8, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[5], FNYNX[15], Gab9, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
}
}
void functionGenericConvolutionX_index_flow(signed CONV_BITS *FNY, signed CONV_BITS *FNYNX, unsigned FLOW_INDEX_BITS index)
{
macro expr NTAPS=11;
/*
FNYNX[0]= F1Y2X
FNYNX[1]= F1Y3X
FNYNX[2]= F2Y1X
FNYNX[3]= F3Y1X
FNYNX[4]= F4YF4X
FNYNX[5]= F5YF5X
FNYNX[6]= F5YF4X
FNYNX[7]= F4YF5X
FNYNX[8]= F8YF6X
FNYNX[9]= F9YF7X
FNYNX[10]=F9YF6X
FNYNX[11]=F8YF7X
FNYNX[12]=F6YF8X
FNYNX[13]=F7YF9X
FNYNX[14]=F7YF8X
FNYNX[15]=F6YF9X
*/
par
{
GenericConvolutionX_index(FNY[0], FNYNX[0] , Gab2, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[0], FNYNX[1] , Gab3, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[1], FNYNX[2] , Gab1, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[2], FNYNX[3] , Gab1, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[3], FNYNX[4] , Gab4, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[4], FNYNX[5] , Gab5, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[4], FNYNX[6] , Gab4, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[3], FNYNX[7] , Gab5, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[7], FNYNX[8] , Gab6, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[8], FNYNX[9] , Gab7, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[8], FNYNX[10], Gab6, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[7], FNYNX[11], Gab7, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[5], FNYNX[12], Gab8, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[6], FNYNX[13], Gab9, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[6], FNYNX[14], Gab8, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[5], FNYNX[15], Gab9, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
}
}
// BUILDING GABOR FILTERS
// ************************************************************************************
macro proc BuildGabor(FNYNX,fe,fo)
{
/*
1) even F1Y2X, odd F1Y3X
5) even F2Y1X, odd F3Y1X
3) even = F4YF4X - F5YF5X; odd = F5YF4X + F4YF5X;
7) even = F4YF4X + F5YF5X; odd = F5YF4X - F4YF5X;
2) even = F8YF6X - F9YF7X; odd = F9YF6X + F8YF7X;
8) even = F8YF6X + F9YF7X; odd = F9YF6X - F8YF7X;
4) even = F6YF8X - F7YF9X; odd = F7YF8X + F7YF8X;
6) even = F6YF8X + F7YF9X; odd = F7YF8X - F6YF9X;
FNYNX[0]= F1Y2X
FNYNX[1]= F1Y3X
FNYNX[2]= F2Y1X
FNYNX[3]= F3Y1X
FNYNX[4]= F4YF4X
FNYNX[5]= F5YF5X
FNYNX[6]= F5YF4X
FNYNX[7]= F4YF5X
FNYNX[8]= F8YF6X
FNYNX[9]= F9YF7X
FNYNX[10]=F9YF6X
FNYNX[11]=F8YF7X
FNYNX[12]=F6YF8X
FNYNX[13]=F7YF9X
FNYNX[14]=F7YF8X
FNYNX[15]=F6YF9X
1) even 0, odd 1
5) even 2, odd 3
3) even = 4 - 5; odd = 6 + 7;
7) even = 4 + 5; odd = 6 - 7;
2) even = 8 - 9; odd = 10+ 11;
8) even = 8 + 9; odd = 10- 11;
4) even = 12 - 13; odd = 14+ 15;
6) even = 12+ 13; odd = 14- 15;
0) even 0, odd 1
1) even = 8 - 9; odd = 10+ 11;
2) even = 4 - 5; odd = 6 + 7;
3) even = 12 - 13; odd = 14+ 15;
4) even 2, odd 3
5) even = 12+ 13; odd = 14- 15;
6) even = 4 + 5; odd = 6 - 7;
7) even = 8 + 9; odd = 10- 11;
*/
par
{
fe[0]=FNYNX[0];
fo[0]=FNYNX[1];
fe[1]=FNYNX[8] - FNYNX[9];
fo[1]=FNYNX[10] + FNYNX[11];
fe[2]=FNYNX[4] - FNYNX[5];
fo[2]=FNYNX[6] + FNYNX[7];
fe[3]=FNYNX[12] - FNYNX[13];
fo[3]=FNYNX[14] + FNYNX[15];
fe[4]=FNYNX[2];
fo[4]=FNYNX[3];
fe[5]=FNYNX[12] + FNYNX[13];
fo[5]=FNYNX[14] - FNYNX[15];
fe[6]=FNYNX[4] + FNYNX[5];
fo[6]=FNYNX[6] - FNYNX[7];
fe[7]=FNYNX[8] + FNYNX[9];
fo[7]=FNYNX[10] - FNYNX[11];
}
}
void BuildGabor_function[2](signed CONV_BITS *FNYNX, signed F_BITS *fe, signed F_BITS *fo)
{
BuildGabor(FNYNX,fe,fo);
}
// CORES FOR ATAN FUNCTION COMPUTATION AND SQRT
// ************************************************************************************
macro proc CoreATAN2CORDICPHI(y, x, ena, angle,data_rdy)
{
macro expr CoreWidthIn = ATAN2COREWIDTHIN_P;
macro expr CoreWidthOut = ATAN2COREWIDTHOUT_P;
macro expr CoreLatency = ATAN2CORELATENCY_P;
signal aux;
#ifdef DEBUG
angle=0@y@x;
#else
/*
component atan2cordic24
port (
x_in: IN std_logic_VECTOR(23 downto 0);
y_in: IN std_logic_VECTOR(23 downto 0);
phase_out: OUT std_logic_VECTOR(9 downto 0);
rdy: OUT std_logic;
clk: IN std_logic;
ce: IN std_logic);
end component;
*/
interface ATAN2CORENAME_P(signed CoreWidthOut phase_out, unsigned 1 rdy) atan2(signed CoreWidthIn x_in=-x,
signed CoreWidthIn y_in=y, unsigned 1 clk=__clock, unsigned 1 ce=ena) with {busformat="B<I>"};
par
{
//assert (width(aux)==3, 0, "Width of x is not 3 (it is %d)", width(aux));
// Left shift to utilize the unused bit of the 2QN core output format (range +/- 1, not above)
aux=(-(atan2.phase_out<<1));
data_rdy=atan2.rdy;
angle=aux;
}
#endif
}
// Core reppliaction for chipscope debugging
macro proc CoreATAN2CORDICORI(y, x, ena, angle,data_rdy)
{
macro expr CoreWidthIn = ATAN2COREWIDTHIN;
macro expr CoreWidthOut = ATAN2COREWIDTHOUT;
macro expr CoreLatency = ATAN2CORELATENCY;
signal aux;
#ifdef DEBUG
angle=y@x;
#else
/*
component atan2cordic24
port (
x_in: IN std_logic_VECTOR(23 downto 0);
y_in: IN std_logic_VECTOR(23 downto 0);
phase_out: OUT std_logic_VECTOR(9 downto 0);
rdy: OUT std_logic;
clk: IN std_logic;
ce: IN std_logic);
end component;
*/
interface ATAN2CORENAME(signed CoreWidthOut phase_out, unsigned 1 rdy) atan2(signed CoreWidthIn x_in=x,
signed CoreWidthIn y_in=-y, unsigned 1 clk=__clock, unsigned 1 ce=ena) with {busformat="B<I>"};
par
{
aux=(atan2.phase_out)>>1;
data_rdy=atan2.rdy;
if (aux<0) // Left shift 2 bits to utilize the unused bit of the 2QN core output format + the sign bit (range 0,1, not above, nor negative)
angle=((aux + ((signed 24)0x200000))<<2)\\(ATAN2COREWIDTHOUT-PHASE_ORI_BITS); // adding 0.5x2 (2Q9 format) to warp orientation to [0,pi)
else
angle=(aux<<2)\\(ATAN2COREWIDTHOUT-PHASE_ORI_BITS);
} // Shift left is allow because in fact, angle values are positive
#endif
}
// SQRT Core
//---------------------------------------
macro proc CoreSQRT(input, ena, output, data_rdy)
{
macro expr SqrtWidthIn = SQRTCOREWIDTHIN;
macro expr SqrtWidthOut = SQRTCOREWIDTHOUT+1;
macro expr SqrtLatency = SQRTCORELATENCY;
#ifndef DEBUG
/*
component sqrtcordic20
port (
x_in: IN std_logic_VECTOR(19 downto 0);
x_out: OUT std_logic_VECTOR(10 downto 0); --> theorethically 11 bits, but this is a very rare case, 10 bits is enouh
rdy: OUT std_logic;
clk: IN std_logic;
ce: IN std_logic);
end component; */
interface SQRTCORENAME(unsigned SqrtWidthOut x_out, unsigned 1 rdy) sqrt (unsigned SqrtWidthIn x_in=input,
unsigned 1 ce= ena, unsigned 1 clk=__clock) with {busformat="B<I>"};
par
{
output = sqrt.x_out<-SQRTCOREWIDTHOUT;
data_rdy=sqrt.rdy;
}
#else
output=input;
#endif
}
/* extern "C"
{
int cocosine(int a);
} */
// COSLUT Core
//---------------------------------------
macro proc CoreCosLUT(input, output)
{
macro expr cosLUTWidth = COSLUTCOREWIDTH;
#ifndef DEBUG
/*
component wrapped_cosLUT
port (
THETA: IN std_logic_VECTOR(9 downto 0);
COSINE: OUT std_logic_VECTOR(9 downto 0));
end component; */
interface COSLUTCORENAME(signed cosLUTWidth COSINE) cosineLUT (unsigned cosLUTWidth THETA=input) with {busformat="B<I>"};
par
{
output = cosineLUT.COSINE;
}
#else
output=input;//cocosine(adjs(input,32));
#endif
}
// COMPUTING PHASE, MAGNITUDE AND ORIENTATION
// ************************************************************************************
macro proc Primitives(fe,fo,Energy, Orientation, TH ,Latencies)
{
// Sine and cosine LUTs for orientation computation
// ******************************************************
const signed TRIG_BITS sin[NORIENTATIONS]={ 0, 91, 128, 91, 0, -91, -128, -91};
const signed TRIG_BITS cos[NORIENTATIONS]={ 128, 91 , 0 ,-91 ,-128 , -91 , 0 ,91};
// MAL!!! const signed COSLUTCOREWIDTH angleLUT[NORIENTATIONS]={ 0, 50 , 101, 151, 201, 251, 302, 352};
const unsigned COSLUTCOREWIDTH angleLUT[NORIENTATIONS]={0 , 64 , 128 , 192 , 256 , 320 , 384 , 448};
// Pipeline equalization delays.
macro expr LATENCIESOFFSET = 15; // + 15 for Softwarre adjustment, 0 for final hardware implementation
macro expr PIPEenergy = 17;
macro expr PIPEOri = 40;
macro expr PIPEPhase = PIPEOri+40;
macro expr EQPIPEenergy = PIPEPhase-PIPEenergy-1+1+LATENCIESOFFSET ; // -1 from Software adjustment
macro expr EQPIPEOri = PIPEPhase-PIPEOri-1+1+LATENCIESOFFSET ; // + 4 from Software adjustment
macro expr EQPIPEphase = 0+1+LATENCIESOFFSET; // longest stage // + 0 from Software adjustment
//macro expr EQPIPEphase = 15+1+LATENCIESOFFSET; // longest stage // + 0 from Software adjustment
macro expr WAIT_FOR_ORI = PIPEOri+2-8 + LATENCIESOFFSET; // + 10 from Software adjustment
// Data
unsigned phiangle[NORIENTATIONS];
signed cosLUTphiangle[NORIENTATIONS];
signed (F_BITS*2) EnergyA[NORIENTATIONS], EnergyB[NORIENTATIONS];
unsigned (F_BITS*2) EnergyC[NORIENTATIONS];
unsigned SQRTCOREWIDTHIN meanEnergy;
signed (ORIENTED_ENERGY_BITS+TRIG_BITS-1) OriA[NORIENTATIONS], OriB[NORIENTATIONS];
signed (ACORI_BITS) AcNumOri[2], AcDenOri[2];
signed ACPHI_BITS AcNumPhi[NORIENTATIONS], AcDenPhi[NORIENTATIONS], AcNumPhiBIS, AcDenPhiBIS;
signed fecopy[WAIT_FOR_ORI][NORIENTATIONS], focopy[WAIT_FOR_ORI][NORIENTATIONS];
//signal <signed (PHASE_ORI_BITS*2)> OriForPhaseSignal;
unsigned COSLUTCOREWIDTH OriForPhase;
// Outputs
unsigned E[EQPIPEenergy];
signed PHASE_ORI_BITS OriAngle[EQPIPEOri], PhiAngle[NORIENTATIONS];
signed PHASE_ORI_BITS Phi[EQPIPEphase];
// Cores control signals
static signal <unsigned 1> en0=0;
static signal <unsigned 1> en1=0;
static signal <unsigned 1> en2=0;
unsigned int 1 rdy0, rdy1, rdy2;
/*#ifdef DEBUG
int 16 auxOri;
chanin <signed 16> chanori with { infile= "C:/RC2000/RC2000Local_features/DKsimulator/matlab/ori.dk" };
#endif*/
par
{
// Enabling using signals the sqrt and atan2 cores for one clock cycle
en0=1;
en1=1;
en2=1;
// ********************************************************************* //
// ENERGY
// ********************************************************************* //
par(o=0;o<NORIENTATIONS;o++)
{
// Pipe 1, Energy
xilinxmult(EnergyA[o], fe[o],fe[o]);
xilinxmult(EnergyB[o], fo[o],fo[o]);
// Pipe 2, Energy
EnergyC[o]= (unsigned)EnergyA[o]+(unsigned)EnergyB[o];
}
// Pipe 3, Energy BE CAREFULL WITH OVERFLOW EFFECTS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
meanEnergy=(UnSumMacro2(EnergyC,0,(NORIENTATIONS-1),(width(EnergyC[0])+3))\\(3+width(EnergyC[0])-width(meanEnergy)))<-width(meanEnergy);
// Pipe 4-20, Energy.
CoreSQRT(meanEnergy, en0, E[0], rdy0); // 17 cycles latency
// ********************************************************************* //
// ORIENTATION
// ********************************************************************* //
// Pipe 3, Orientation WARNNING: EnergyC has two clock cycles delay to Ori
par(o=0;o<NORIENTATIONS;o++)
{ // 34 to -> 28 bits, fractional part from 14 to 8
xilinxmult(OriA[o], ((signed)adju((EnergyC[o]>>(width(EnergyC[0])-ORIENTED_ENERGY_BITS)),width(EnergyC[0])+2)), sin[o]);
xilinxmult(OriB[o], ((signed)adju((EnergyC[o]>>(width(EnergyC[0])-ORIENTED_ENERGY_BITS)),width(EnergyC[0])+2)), cos[o]);
} // 28+9-1 (sign) = 36 bits for OriAB => +2 required
// Pipe 4, Orientation acumulation lower than 2 (in fact is 2.8)
AcNumOri[0]=SumMacro(OriA,0,(NORIENTATIONS-1),(width(OriA[0])+1))\\(width(OriA[0])+1-width(AcNumOri[0]));
AcDenOri[0]=SumMacro(OriB,0,(NORIENTATIONS-1),(width(OriB[0])+1))\\(width(OriA[0])+1-width(AcNumOri[0]));
//assert (width(OriA)==3, 0, "Width of Ori is not 3 (it is %d)", width(OriA));
// Pipe 5, Orientation
AcNumOri[1]=AcNumOri[0]>>0; // core needs inputs in -1<=x<=1 format
AcDenOri[1]=AcDenOri[0]>>0; // PERHAPS >>1 is needed in case the whole range be used
//assert (width(OriAngle[1])==3, 0, "Width of x is not 3 (it is %d)", width(OriAngle[1]));
// Pipe 6-40, Orientation (atan2 core latency =35)
CoreATAN2CORDICORI(AcNumOri[1], AcDenOri[1], en1, OriAngle[0], rdy1);
// ********************************************************************* //
// PHASE
// ********************************************************************* //
// Pipe synchronization, waiting orientation data and storing filters outputs
/* par(k=0; k<WAIT_FOR_ORI;k++)
{
ifselect(k==0)
{
par(o=0; o<NORIENTATIONS;o++)
{
fecopy[k][o]=fe[o];
focopy[k][o]=fo[o];
}
}
else
{
par(o=0; o<NORIENTATIONS;o++)
{
fecopy[k][o]=fecopy[k-1][o];
focopy[k][o]=focopy[k-1][o];
}
}
}
// WAIT_FOR_ORI + 1 cycles // 2QN x 2QN => 5QN (duplicated sign bit)
//xilinxmult(OriForPhaseSignal, (signed PHASE_ORI_BITS) PI, ((OriAngle[0])>>2));
// Note that for coherence we go back to the 2QN format for OriAngle[0]
// Core input range 0-> 2pi (core input = 1024*angle(rad)/2pi) => because our max input is pi we have to /2
OriForPhase=(((unsigned)OriAngle[0])>>1)\\(width(OriAngle[0])-width(OriForPhase));
par(o=0;o<NORIENTATIONS;o++)
{ // WAIT_FOR_ORI + 2 cycles
if (OriForPhase>=angleLUT[o])
phiangle[o]=OriForPhase - angleLUT[o];
else
phiangle[o]= angleLUT[o]- OriForPhase;
// WAIT_FOR_ORI + 3 cycles
CoreCosLUT(phiangle[o], cosLUTphiangle[o]);
// WAIT_FOR_ORI + 4 cycles
xilinxmult(AcNumPhi[o], focopy[WAIT_FOR_ORI-1-adju(Latencies[15:12],6)][o], cosLUTphiangle[o] );
xilinxmult(AcDenPhi[o], fecopy[WAIT_FOR_ORI-1-adju(Latencies[15:12],6)][o], abs(cosLUTphiangle[o]) );
} //
// WAIT_FOR_ORI + 5 cycles
AcNumPhiBIS=SumMacro(AcNumPhi,0,(NORIENTATIONS-1),(ACPHI_BITS));
AcDenPhiBIS=SumMacro(AcDenPhi,0,(NORIENTATIONS-1),(ACPHI_BITS));
// WAIT_FOR_ORI + 5-40 cycles
CoreATAN2CORDICPHI(adjs(AcNumPhiBIS,ATAN2COREWIDTHIN), adjs(AcDenPhiBIS,ATAN2COREWIDTHIN), en2, Phi[0], rdy2);
AcNumPhiBIS=SumMacro(fo,0,(NORIENTATIONS-1),(ACPHI_BITS));
AcDenPhiBIS=SumMacro(fe,0,(NORIENTATIONS-1),(ACPHI_BITS));
// WAIT_FOR_ORI + 5-40 cycles
CoreATAN2CORDICPHI(AcNumPhiBIS, AcDenPhiBIS, en2, Phi[0], rdy2); */
// ********************************************************************* //
// Pipe equalization and sending processed data
// ********************************************************************* //
// Pipe 20-??
par(i=0; i<(EQPIPEenergy-1);i++)
{
E[i+1]=E[i];
}
par(i=0; i<(EQPIPEOri-1);i++)
{
OriAngle[i+1]=OriAngle[i];
}
/*par(i=0; i<(EQPIPEphase-1);i++)
{
Phi[i+1]=Phi[i];
}*/
Energy=(E[EQPIPEenergy-1-adju(Latencies[11:8],7)]> 0@TH) ? E[EQPIPEenergy-1-adju(Latencies[11:8],7)]\\1 : 0; //SetNAN(E[0]);
// Divide by 2 when reading in software because we use the double angle representation
Orientation=(E[EQPIPEenergy-1-adju(Latencies[11:8],7)]> 0@TH ) ? OriAngle[EQPIPEOri-1-adju(Latencies[7:4],6)]\\1 : 0;//SetNAN(Orientation);
//Phase=(E[EQPIPEenergy-1-adju(Latencies[11:8],7)]> 0@TH ) ? Phi[EQPIPEphase-1-adju(Latencies[3:0],4)]\\1 : SetNAN(Phase);
}
}
// COMPUTING PHASE, MAGNITUDE AND ORIENTATION
// ************************************************************************************
macro proc Primitives_short(fe,fo,Energy, Orientation, TH ,Latencies)
{
// Pipeline equalization delays.
macro expr LATENCIESOFFSET = 15; // + 15 for Softwarre adjustment, 0 for final hardware implementation
macro expr PIPEenergy = 17;
//macro expr PIPEOri = 40;
macro expr PIPEOri = 40-35-3+1;
macro expr PIPEPhase = PIPEOri+40;
macro expr EQPIPEenergy = PIPEPhase-PIPEenergy-1+1+LATENCIESOFFSET ; // -1 from Software adjustment
macro expr EQPIPEOri = PIPEPhase-PIPEOri-1+1+LATENCIESOFFSET ; // + 4 from Software adjustment
macro expr EQPIPEphase = 0+1+LATENCIESOFFSET; // longest stage // + 0 from Software adjustment
macro expr WAIT_FOR_ORI = PIPEOri+2-8 + LATENCIESOFFSET; // + 10 from Software adjustment
// Data
unsigned phiangle[NORIENTATIONS];
signed cosLUTphiangle[NORIENTATIONS];
signed (F_BITS*2) EnergyA[NORIENTATIONS], EnergyB[NORIENTATIONS];
unsigned (F_BITS*2) EnergyC[NORIENTATIONS];
unsigned SQRTCOREWIDTHIN meanEnergy;
signed (ORIENTED_ENERGY_BITS+TRIG_BITS-1) OriA[NORIENTATIONS], OriB[NORIENTATIONS];
signed (ACORI_BITS) AcNumOri[2], AcDenOri[2];
signed ACPHI_BITS AcNumPhi[NORIENTATIONS], AcDenPhi[NORIENTATIONS], AcNumPhiBIS, AcDenPhiBIS;
signed fecopy[WAIT_FOR_ORI][NORIENTATIONS], focopy[WAIT_FOR_ORI][NORIENTATIONS];
unsigned COSLUTCOREWIDTH OriForPhase;
// Outputs
unsigned 10 E[EQPIPEenergy];
unsigned PHASE_ORI_BITS OriAngle[EQPIPEOri][NORIENTATIONS];
signed PHASE_ORI_BITS Phi[EQPIPEphase];
// Cores control signals
static signal <unsigned 1> en0=0;
static signal <unsigned 1> en1=0;
static signal <unsigned 1> en2=0;
unsigned int 1 rdy0, rdy1, rdy2;
par
{
// Enabling using signals the sqrt and atan2 cores for one clock cycle
en0=1;
en1=1;
en2=1;
// ********************************************************************* //
// ENERGY
// ********************************************************************* //
par(o=0;o<NORIENTATIONS;o++)
{
// Pipe 1, Energy
xilinxmult(EnergyA[o], fe[o],fe[o]);
xilinxmult(EnergyB[o], fo[o],fo[o]);
// Pipe 2, Energy
EnergyC[o]= (unsigned)EnergyA[o]+(unsigned)EnergyB[o];
}
// Pipe 3, Energy BE CAREFULL WITH OVERFLOW EFFECTS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
meanEnergy=(UnSumMacro2(EnergyC,0,(NORIENTATIONS-1),(width(EnergyC[0])+3))\\(3+width(EnergyC[0])-width(meanEnergy)))<-width(meanEnergy);
// Pipe 4-20, Energy.
CoreSQRT(meanEnergy, en0, E[0], rdy0); // 17 cycles latency
// ********************************************************************* //
// ORIENTATION
// ********************************************************************* //
par(cnt=0; cnt<NORIENTATIONS; cnt++)
{
OriAngle[0][cnt]= adju(EnergyC[cnt]\\4, width(OriAngle[0][0]));
}
// ********************************************************************* //
// PHASE
// ********************************************************************* //
// ********************************************************************* //
// Pipe equalization and sending processed data
// ********************************************************************* //
// Pipe 20-??
par(i=0; i<(EQPIPEenergy-1);i++)
{
E[i+1]=E[i];
}
par(i=0; i<(EQPIPEOri-1);i++)
{
par(cnt2=0; cnt2<NORIENTATIONS; cnt2++)
{
OriAngle[i+1][cnt2]=OriAngle[i][cnt2];
}
}
Energy=(E[EQPIPEenergy-1-adju(Latencies[11:8],6)]> 0@TH) ? E[EQPIPEenergy-1-adju(Latencies[11:8],6)]\\1 : 0; //SetNAN(E[0]);
// Divide by 2 when reading in software because we use the double angle representation
par(cnt3=0; cnt3<NORIENTATIONS; cnt3++)
{
Orientation[cnt3]=(E[EQPIPEenergy-1-adju(Latencies[11:8],6)]> 0@TH ) ? OriAngle[EQPIPEOri-1-adju(Latencies[7:4],6)][cnt3]\\1 : 0;//SetNAN(Orientation);
}
}
}
macro proc PhasePrimitive (fe, fo, phase, latency)
{
macro expr maxlatency=80;
unsigned 1 rdy;
signal static unsigned 1 en=0;
signed ACPHI_BITS NumPhi, DenPhi, phi;
signed 9 phi9[maxlatency];
par
{
en=1;
NumPhi=SumMacro(fo,0,(NORIENTATIONS-1),(ACPHI_BITS));
DenPhi=SumMacro(fe,0,(NORIENTATIONS-1),(ACPHI_BITS));
// WAIT_FOR_ORI + 5-40 cycles
CoreATAN2CORDICPHI(NumPhi, DenPhi, en, phi, rdy);
phi9[0]=phi\\1;
par(d=0;d<maxlatency-1;d++)
{
phi9[d+1]=phi9[d];
}
phase = phi9[latency-1];
}
}
// *******************************************************************************
// AUXILIARY MACROS (ONLY BETA VERSIONS, UNDER TEST)
// *******************************************************************************
// *******************************************************************************
// *******************************************************************************
// *******************************************************************************
// Sorting input data
// *******************************************************************************
// Input data must be signed
macro proc Sort(bufferIn,bufferOut, bufferLength)
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3;
macro expr DataWidth=(width(bufferIn[0]));
//macro expr SumMacro(vector,begin,end,Extend)= select(end==begin, adju(vector[begin],Extend),
// adju(vector[end],Extend)+SumMacro(vector,begin,end-1,Extend));
macro expr SumMacro(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom,adju(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1) + RecurseAddAux(Array, Middle, Bottom));
in
RecurseAddAux(Array, Index, begin);
signed DataWidth bufferInternal[Retiming+1][bufferLength];
unsigned 1 sum[bufferLength][(bufferLength-0)]; // In fact is -1 but the compiler fails,
unsigned (log2ceil(bufferLength)) position[Retiming][bufferLength]; // time to synthizer optimization
par(i1=0;i1<bufferLength;i1++)
{
par(i2=0;i2<(bufferLength-1);i2++)
{
//assert ((MAX_RES_X/SCALE) == 24, 0, "Application requires %d",width(i2));
// Comparisons
ifselect(i1>(0@i2)) // left side
{
if(bufferIn[i1]>bufferIn[0@i2])
sum[i1][i2]=1;
else
sum[i1][i2]=0;
}
else //ifselect(i1<=i2) // i1<=i2, right side, except center pixel
{
if(bufferIn[i1]>=bufferIn[0@i2+1])
sum[i1][i2]=1;
else
sum[i1][i2]=0;
}
bufferInternal[0][i1]=bufferIn[i1];
// Positions estimation
position[0][i1]=SumMacro(sum[i1],0,(bufferLength-2),width(position[0]));
bufferInternal[1][i1]=bufferInternal[0][i1];
// Retiming
/*par(t=1;t<Retiming;t++)
{
position[t][i1]=position[t-1][i1];
bufferInternal[t+1][i1]=bufferInternal[t][i1];
}*/
// Sorting vector
bufferOut[position[Retiming-1][i1]]=bufferInternal[Retiming][i1];
}
}
}
// *******************************************************************************
// Sorting input data with invalid values
// *******************************************************************************
// Input data must be signed
macro proc SortNaN(bufferIn,bufferOut, bufferLength, offset)
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3;
macro expr DataWidth=(width(bufferIn[0]));
//macro expr SumMacro(vector,begin,end,Extend)= select(end==begin, adju(vector[begin],Extend),
// adju(vector[end],Extend)+SumMacro(vector,begin,end-1,Extend));
macro expr SumMacro(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom,adju(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1) + RecurseAddAux(Array, Middle, Bottom));
in
RecurseAddAux(Array, Index, begin);
signed DataWidth bufferInternal[Retiming+1][bufferLength];
unsigned 1 sum[bufferLength][(bufferLength-0)]; // In fact is -1 but the compiler fails,
unsigned (log2ceil(bufferLength)) position[Retiming][bufferLength]; // time to synthizer optimization
unsigned (log2ceil(bufferLength)) NumInvalid;
unsigned 1 SumInvalid[bufferLength];
par(i1=0;i1<bufferLength;i1++)
{
par(i2=0;i2<(bufferLength-1);i2++)
{
//assert ((MAX_RES_X/SCALE) == 24, 0, "Application requires %d",width(i2));
// Comparisons
ifselect(i1>(0@i2)) // left side
{
if(bufferIn[i1]>bufferIn[0@i2])
sum[i1][i2]=1;
else
sum[i1][i2]=0;
}
else //ifselect(i1<=i2) // i1<=i2, right side, except center pixel
{
if(bufferIn[i1]>=bufferIn[0@i2+1])
sum[i1][i2]=1;
else
sum[i1][i2]=0;
}
bufferInternal[0][i1]=bufferIn[i1];
// Positions estimation
position[0][i1]=SumMacro(sum[i1],0,(bufferLength-2),width(position[0]));
bufferInternal[1][i1]=bufferInternal[0][i1];
// Retiming
/*par(t=1;t<Retiming;t++)
{
position[t][i1]=position[t-1][i1];
bufferInternal[t+1][i1]=bufferInternal[t][i1];
}*/
// Sorting vector
bufferOut[position[Retiming-1][i1]]=bufferInternal[Retiming][i1];
// counting invalid values
if(bufferInternal[0][i1]==0b100000000000)
SumInvalid[i1]=1;
else
SumInvalid[i1]=0;
offset = SumMacro(SumInvalid,0,(bufferLength-1),width(offset));
}
}
}
// ************************************************************************************
// MEDIAN FITLERING FOR IMAGE SALT & PEPPER DENOISING
// ************************************************************************************
macro proc Median(Input, Output, ColumnLength)
{
macro expr PipeLatency= 1 +1 + 3; // 1 input, 1 output,3 sorting data
macro expr NTaps=3;
macro expr Retiming=1; // Retiming value = Retiming-1
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[NTaps-1] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
static unsigned (log2ceil((MAX_RES_X/SCALE))) col=((MAX_RES_X/SCALE)+1 -2-1-4), colbis=((MAX_RES_X/SCALE) -2-1-8);
signed (width(Input)) DataMatrix[NTaps][NTaps], bufferIn[(NTaps*NTaps)], bufferOut[(NTaps*NTaps)];
// Macro Begin
// ----------------------------------------------------
par
{
//assert (1 == 24, 0, "Application requires %d",log2ceil(8));
// Updating matrix. Read data into array every cycle
par (r = 0; r != NTaps; r++)
{
par (c = 0; c != NTaps; c++)
{
ifselect(c==0)
{
ifselect(r==0)
DataMatrix[0][0]=Input; // Read new data
else
{
DataMatrix[r][c] = readRAM((r-1)<-log2ceil(NTaps-1),col);
}
}
else // shift data through the matrix
{
DataMatrix[r][c]=DataMatrix[r][c-1];
}
}
}
// Storing previous data
par(r1=0;r1!=(NTaps-1);r1++)
{
writeRAM(r1,colbis,DataMatrix[0@r1][0]);
}
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col==(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Sorting data and median filtering
par(r2=0;r2!=NTaps;r2++)
{
par(c2=0;c2!=NTaps;c2++)
{
bufferIn[adju(r2,log2ceil(NTaps*NTaps))*NTaps+adju(c2,log2ceil(NTaps*NTaps))]=DataMatrix[r2][c2];
}
}
Sort(bufferIn,bufferOut, (NTaps*NTaps));
Output=bufferOut[(NTaps*NTaps)/2];
/*par(i=1;i<(Retiming);i++)
{
aux[i]=aux[i-1];
}
Output= aux[Retiming-1];*/
} // End Global par
}
/*
% Input - Input value for the convolution
% Output - Result of the convolution
% KernelX - Kernel for the X convolution
% KernelY - Kernel for the Y convolution
% ColumnLength - Number of elements of each column
%
% DESCRIPTION
% This function computes the separable 2D convolution of the input.
% It stores 4 columns before performing it, with the current column
% they are 5. Then, the convolution is carried out using KernelX for
% for the rows and KernelY for the columns.
%
% RETURN
%
*/
macro proc SpatialConvolutions_last(Input,Output,KernelX,KernelY, ColumnLength)
{
macro expr PipeLatency=6 + 2;
macro expr Retiming=1; // Retiming value = Retiming-1
//const unsigned int col_size=log2ceil(ColumnLength);
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[4] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
signed (width(Input)) DataArrayX[5], DataArrayY[5] ;
//static unsigned (log2ceil((VIDEOINCOLUMNS/SCALE))) col=1, colbis=0;
//static unsigned (log2ceil((ColumnLength))) col=1, colbis=0;
static unsigned (log2ceil(MAX_RES_X/SCALE)) col=1, colbis=0;
signed (width(Output)) aux[Retiming];
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayX[4]=Input;
// Shift X data through array
par (i = 0; i != 4; i++)
{
DataArrayX[i] = DataArrayX[i+1];
}
KernelX(DataArrayX,DataArrayY[4]);
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Read data into array every cycle
par(r1=0;r1!=4;r1++)
{
// Fill data through array
DataArrayY[r1] = readRAM(adju(r1,3),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=4;r2++)
{
writeRAM(adju(r2,3),colbis,DataArrayY[r2+1]);
}
KernelY(DataArrayY,Output);
/*par(i=1;i<(Retiming);i++)
{
aux[i]=aux[i-1];
}
Output= aux[Retiming-1];*/
} // End Global par
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the kernel
% k = [2 16 28 16 2]/64. It is a band-pass filter. The Retiming is not used
% for the implementation because the performance was good enough.
%
% RETURN
%
*/
macro proc Prefilter5Taps(buffer,Out) // mask=[2 16 28 16 2]/64
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=6;
macro expr DataWidth=(width(buffer[0])+6);
signed DataWidth Register[3], aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)+adjs(buffer[4],DataWidth))<<1;
Register[1]=(adjs(buffer[1],DataWidth)+adjs(buffer[3],DataWidth))<<4;
Register[2]=(adjs(buffer[2],DataWidth))*28;
//xilinxmult(Register[2], (adjs(buffer[2],DataWidth)) ,((int 18) 28) );
aux0= Register[0] + Register[1] + Register[2];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
//par(i=1;i<(Retiming);i++)
//{
// aux[i]=aux[i-1];
//}
Out= aux[Retiming-1];
//Out=buffer[0];
}
}
/*
% Num - Numerator
% Den - Denominator
% Result - Quotient
%
% DESCRIPTION
% This function computes the division of Num and Den, obtaining the
% the quotient that is returned in result. It can be done using the
% standard Handel-C implementation, as simply result = Den/Num.
% The problem is that the performance is affected by the required
% logic and resources. This is why we are using a core from
% the core Generator. The interface is divider_18 because we are using
% 18 bits for the division to obtain a better precision.
%
% RETURN
%
*/
macro proc division_core(Num, Den, result)
{
// Enable for Cores
static signal unsigned 1 enable=0;
//signed DIVIDER_INPUT quot2;
interface divider_18(signed DIVIDER_INPUT quot, signed DIVIDER_INPUT remd, unsigned 1 rfd) divider(signed DIVIDER_INPUT dividend = Num,
signed DIVIDER_INPUT divisor = adjs(Den,DIVIDER_INPUT), unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
par
{
//Enabling division Core: only for 1 clock cycle
enable=1;
result = divider.quot;
//quot2 = Num/Den;
//quot = qout2;
}
}
/*
% Input - Input value for the convolution
% Output - Delayed input value
% ColumnLength - Number of elements of each column
%
% DESCRIPTION
% This function computes delays the input as many cycles as the
% function SpatialConvolutions_last. It is used for synchronization
%
% RETURN
%
*/
macro proc Delaying(Input, Output, ColumnLength)
{
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[6] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
signed (width(Input)) DataArrayX[7], DataArrayY[7] ;
static unsigned (log2ceil(MAX_RES_X/SCALE)) col=1, colbis=0;
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayX[6]=Input;
// Shift X data through array
par (i = 0; i != 6; i++)
{
DataArrayX[i] = DataArrayX[i+1];
}
DataArrayY[6] = DataArrayX[4];
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Read data into array every cycle
par(r1=0;r1!=6;r1++)
{
// Fill data through array
DataArrayY[r1] = readRAM(adju(r1,3),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=6;r2++)
{
writeRAM(adju(r2,3),colbis,DataArrayY[r2+1]);
}
Output=DataArrayY[3];
} // End Global par
}
\ No newline at end of file
attention/attention_v0.1/GaborPrimitives.hch
0 → 100644
View file @
059e050a
//********************************************************************
//
// Programmed by Javier Díaz, DRIVSCO project
// Granada, March 2008, version 2.1
//
//********************************************************************
#ifndef __GABORPRIMITIVES__
#define __GABORPRIMITIVES__
#include <stdlib.hch>
#include "generic.hch"
#include "parameters.hch"
// Data bit-widths
//*****************************************
#define KERN_BITS 12 //14
#define CONV_BITS 10
#define CONV_FRACT_BITS 0
#define F_BITS (CONV_BITS) // USE CONV_BITS+1 FOR OPTICAL FLOW AND STEREO OR 17 FOR LOCAL FEATURES
#define TRIG_BITS 9
#define ORIENTED_ENERGY_BITS 20
#define ACORI_BITS (24)
#define PHASE_ORI_BITS 10 // 16 for hardware, 64 for debugging
#define ENER_BITS 10 // 16 for hardware, 32 for debugging
#define ACPHI_BITS 10//(COSLUTCOREWIDTH+F_BITS+3) // in fact this is larger than the software simulator
//#define PI 25736 // it uses 16 bits in format 2QN (1 bit sign, 2 bit integer part, 13 bits fractional part)
#define PI 201 // it uses 10 bits ( 6 bits of frac part)
// SQRT CORE (SCALED RADIANS 2Q24 FORMAT (1 for sign, 2 as integer part and the others as fractional part)
#define SQRTCOREWIDTHIN 20
#define SQRTCOREWIDTHOUT 10
#define SQRTCORELATENCY (SQRTCOREWIDTHOUT+2)
#define SQRTCORENAME sqrtcordic
// ARC TAN CORE
#define ATAN2COREWIDTHIN 24
#define ATAN2COREWIDTHOUT 24
#define ATAN2CORELATENCY (ATAN2COREWIDTHOUT+4)
#define ATAN2CORENAME atan2cordic24
// ARC TAN CORE PHASE
#define ATAN2COREWIDTHIN_P 10 //32
#define ATAN2COREWIDTHOUT_P 10 //32
#define ATAN2CORELATENCY_P (ATAN2COREWIDTHOUT_P+4)
#define ATAN2CORENAME_P atan2cordic10//atan2cordic24
// cosLUT CORE
#define COSLUTCOREWIDTH 10
#define COSLUTCORENAME cosLUT
/* BIT CONFIGUATION EXAMPLES
*****************************************
1) #define KERN_BITS 11
#define CONV_BITS 9
#define ATAN2COREWIDTH 20
#define ATAN2CORENAME atan2cordic20
2) #define KERN_BITS 13
#define CONV_BITS 11
#define ATAN2COREWIDTH 24
#define ATAN2CORENAME atan2cordic24
3) #define KERN_BITS 15 / 17 / 17 / 19 / 21
#define CONV_BITS 14 / 16 / 18 / 20 / 22
#define ATAN2COREWIDTH 30 / 34 / 38 / 42 / 46
#define ATAN2CORENAME atan2cordic30 / atan2cordic34 / atan2cordic38 / atan2cordic42 / atan2cordic46
*/
// Extra parameters
#define NORIENTATIONS 8
//#define PI 201 // 3.14 (3 bit integer, 6 bit fractional)
//#define NAN 0b100000000000
//#define NSCALES 1
#define MAX_PROC_DISPARITY 2
#define MAX_PROC_FLOW 3
#define FLOW_INDEX_BITS 2
#define DISPARITY_INDEX_BITS 1
#define DIVIDER_INPUT 18
#define DIVIDER_LATENCY DIVIDER_INPUT+4 // is +4 if divider has clks/div==1
// Generic Macros
macro expr SumMacro(Array, begin, Index,Extend);
// Computing Macros
/*********************************************************************/
macro proc GenericConvolution(Input, Output, X_FIR, Y_FIR, NTaps, NTapsMinus1, ColumnLength,normx, normy,Sx,Sy);
macro proc GaborY(Input, FNY, NTaps, NTapsMinus1, ColumnLength);
macro proc GaborBase(DataIn, FNYNX,Columns);
macro proc BuildGabor(FNYNX,fe,fo);
macro proc SortNaN(bufferIn,bufferOut, bufferLength, offset);
macro proc PhasePrimitive (fe, fo, phase, latency);
macro proc Primitives(fe,fo,Energy, Orientation, TH ,Latencies);
macro proc Primitives_short(fe,fo,Energy, Orientation, TH ,Latencies);
// Added macros (F Barranco)
/*********************************************************************/
macro proc SpatialConvolutions_last(Input,Output,KernelX,KernelY, ColumnLength);
macro proc Delaying(Input, Output, ColumnLength);
macro proc Prefilter5Taps(buffer,Out);
macro proc division_core(Num, Den, result);
#endif
\ No newline at end of file
attention/attention_v0.1/README
0 → 100644
View file @
059e050a
attention/attention_v0.1/channels.hcc
0 → 100644
View file @
059e050a
/* channels.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#include "channels.hch"
// ***************************************************************
// Channels implemented using signals
// ***************************************************************
/*
% Channel - Send data through this channel
% Input - Data to be sent through the channel
%
% DESCRIPTION
% This function sends Input through Channel.
% Channels are structs declared
% in channels.hcc file
%
% RETURN
%
*/
macro proc Send(Channel, Input)
{
//register indicating that the procedure has completed
unsigned 1 done;
//do this at least once
do
{
par
{
//set the transfer wires to the input value
Channel.DataTransfer = Input;
//indicate that the send process is ready
Channel.SendReady = 1;
//set the done register if the read process is ready
done = Channel.ReadReady;
}
}while(!done); //until the transfer is complete
}
/*
% Channel - Send signed data through this channel
% Input - Data to be sent through the channel
%
% DESCRIPTION
% This function safely sends Input through Channel: to work, there must be
% a channel reading in the other side. It waits until the ready signal is activated
% and then sends the correct data. Otherwise, it is sending 0.
% Channels are structs declared in channels.hcc file
%
% RETURN
%
*/
macro proc SignedSecureSend(Channel, Input)
{
signed auxInput;
if (Read_Ready(Channel))
Send(Channel, Input);
else
{
auxInput=Input;
Send(Channel, auxInput);
}
}
/*
% Channel - Send unsigned data through this channel
% Input - Data to be sent through the channel
%
% DESCRIPTION
% This function safely sends Input through Channel: to work, there must be
% a channel reading in the other side. It waits until the ready signal is activated
% and then sends the correct data. Otherwise, it is sending 0.
% Channels are structs declared in channels.hcc file
%
% RETURN
%
*/
macro proc UnsignedSecureSend(Channel, Input)
{
unsigned auxInput;
if (Read_Ready(Channel))
Send(Channel, Input);
else
{
auxInput=Input;
Send(Channel, auxInput);
}
}
/*
% Channel - Receive data coming through this channel
% Output - Data to be received through the channel
%
% DESCRIPTION
% This function safely receives Output through Channel.
% It waits until the ready signal is activated (meaning that
% the sending part is ready) and then receives the data.
% This function is blocked until the reception of the first
% transference.
% Channels are structs declared in channels.hcc file
%
% RETURN
%
*/
macro proc Receive(Channel, Output)
{
//register indicating that the procedure has completed
unsigned 1 done;
//do this at least once
do
{
par
{
//is the send process is ready
if (Channel.SendReady)
{
//ready the value on the data transfer wires
Output = Channel.DataTransfer;
}
else
delay;
//indicate that the receive process is ready
Channel.ReadReady = 1;
//set the done register if the send process is ready
done = Channel.SendReady;
}
}while(!done); //until the transfer is complete
}
/*
% Channel - Channel
%
% DESCRIPTION
% This function checks whether the sender is ready or not.
% Channels are structs declared in channels.hcc file
%
% RETURN
%
% SendReady - Signal that is active if the sender is ready to transmit data
%
*/
macro expr Send_Ready(Channel) = Channel.SendReady;
/*
% Channel - Channel
%
% DESCRIPTION
% This function checks whether the receiver is ready or not.
% Channels are structs declared in channels.hcc file
%
% RETURN
%
% ReadReady - Signal that is active if the receiver is ready to receive data
%
*/
macro expr Read_Ready(Channel) = Channel.ReadReady;
\ No newline at end of file
attention/attention_v0.1/channels.hch
0 → 100644
View file @
059e050a
/* channels.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#ifndef __CHANNELS__
#define __CHANNELS__
#include "stdlib.hch"
// Channels implemented using signals
// ***************************************************************
struct unsignedchannel
{
signal unsigned 1 ReadReady;
signal unsigned 1 SendReady;
signal unsigned DataTransfer;
};
struct signedchannel
{
signal unsigned 1 ReadReady;
signal unsigned 1 SendReady;
signal signed DataTransfer;
};
// Definition of a channel with default values of 0
#define UNSIGNED_CHANNEL static struct unsignedchannel
#define SIGNED_CHANNEL static struct signedchannel
// Example channel declaration: declare a variable MyChannel
// as channel structure with default value of zero
// UNSIGNED_CHANNEL MyChannel;
macro proc Send(Channel, Input);
macro proc SignedSecureSend(Channel, Input);
macro proc UnsignedSecureSend(Channel, Input);
macro proc Receive(Channel, Output);
// These expressions allow the user to implement non-blocking channels:
// This channel structure has the readiness of the send and
// receive process exposed as signals, allowing the user to check
// the status of a channel. This can be simply expressed as
// expressions in Handel-C thus:
//Check whether the sender is ready
macro expr Send_Ready(Channel);
//Check whether the receiver is ready
macro expr Read_Ready(Channel);
#endif
attention/attention_v0.1/cores.hcc
0 → 100644
View file @
059e050a
/* cores.hcc
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#include "cores.hch"
// Interfaces for the top and core projects
// ***************************************************************
/*
% Input - Input channel for the 3 pixels of 8 bits (from the 3 RGB channels)
% Output - Local descriptor feature maps (27 bits)
% Control - Control word with the different parameters:
% * Control[60:45] - Latencies for the feature estimation (gabor modules)
% * Control[44:36] - Thresholds for the feature estimation
% * Control[35:26] - Number of columns of the input images
% * Control[24:21] - Not used
% * Control[20:13] - Not used
% * Control[12:0] - Latency cycles of the pipeline
% ImSize - Size of the input images
%
% DESCRIPTION
% Interface for a top architecture to interface with the attention estimation core
% RETURN
%
*/
macro proc InterfazTopFlowCore_lf_attention(Input, Output, Control, ImSize)
{
macro expr InWidth=24; //192;//24;
//macro expr OutWidth=51;//24;
macro expr OutWidth=27;//18; 9 bits are useless
interface CoreOpticFlow( signal OutWidth CoreOut, signal unsigned 1 OutSendReady, signal unsigned 1 InReadReady)
MyCore( unsigned 1 clk=__clock, unsigned imSize=ImSize, signal InWidth CoreIn=Input.DataTransfer,
signal unsigned 1 InSendReady=Input.SendReady,
signal unsigned 1 OutReadReady=Output.ReadReady, unsigned cmd=Control)with{retime=0};
while(1)
{
par
{
Output.DataTransfer=MyCore.CoreOut;
Output.SendReady=MyCore.OutSendReady;
Input.ReadReady=MyCore.InReadReady;
}
}
}
/*
% Input - Input channel for the 3 pixels of 8 bits (from the 3 RGB channels)
% Output - Local descriptor feature maps (27 bits)
% Control - Control word with the different parameters:
% * Control[60:45] - Latencies for the feature estimation (gabor modules)
% * Control[44:36] - Thresholds for the feature estimation
% * Control[35:26] - Number of columns of the input images
% * Control[24:21] - Not used
% * Control[20:13] - Not used
% * Control[12:0] - Latency cycles of the pipeline
% ImSize - Size of the input images
%
% DESCRIPTION
% Interface for the attention estimation core (used in the main.hcc)
%
% RETURN
%
*/
macro proc InterfazCore_lf_attention(Input, Output, Control,ImSize)
{
#if CORE==1
// Outcoming data
interface port_out() OutData(signal CoreOut = Output.DataTransfer)with{retime=0};
interface port_out() OutSendStatus(signal unsigned 1 OutSendReady = Output.SendReady)with{retime=0};
interface port_in(signal unsigned 1 OutReadReady) OutReadStatus()with{retime=0} ;
// Incoming data
// interface port_in(unsigned 1 clk with {clockport = 1}) ClockPort() ;
interface port_in(unsigned imSize) CimSize()with{retime=0};
interface port_in(signal CoreIn) InData()with{retime=0};
interface port_in(signal unsigned 1 InSendReady) InSendStatus()with{retime=0};
interface port_out() InReadStatus(signal unsigned 1 InReadReady = Input.ReadReady)with{retime=0};
// Control & Commands
interface port_in(unsigned cmd) Control_Commands()with{retime=0};
#else
// Outcoming data
interface bus_out() OutData(signal CoreOut = Output.DataTransfer)with{retime=0};
interface bus_out() OutSendStatus(signal unsigned 1 OutSendReady = Output.SendReady)with{retime=0};
interface bus_in(signal unsigned 1 OutReadReady) OutReadStatus() with{retime=0};
// Incoming data
// interface port_in(unsigned 1 clk with {clockport = 1}) ClockPort() ;
interface bus_in(unsigned imSize) CimSize()with{retime=0};
interface bus_in(signal CoreIn) InData()with{retime=0};
interface bus_in(signal unsigned 1 InSendReady) InSendStatus()with{retime=0};
interface bus_out() InReadStatus(signal unsigned 1 InReadReady = Input.ReadReady)with{retime=0};
// Control & Commands
interface bus_in(unsigned cmd) Control_Commands()with{retime=0};
#endif
while(1)
{
par
{
Output.ReadReady=OutReadStatus.OutReadReady;
Input.DataTransfer=InData.CoreIn;
Input.SendReady=InSendStatus.InSendReady;
Control=Control_Commands.cmd;
ImSize=CimSize.imSize;
}
}
}
attention/attention_v0.1/cores.hch
0 → 100644
View file @
059e050a
/* cores.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#ifndef __CORES__
#define __CORES__
#include "stdlib.hch"
#include "channels.hch"
//#include "xircav4_lib.hch" //Platform-dependent
#define CORE 1 // 0 for sub-circuit test, 1 for core calls
//Attention cores
macro proc InterfazCore_lf_attention(Input, Output, Control,ImSize);
macro proc InterfazTopFlowCore_lf_attention(Input, Output, Control, ImSize);
#endif
\ No newline at end of file
attention/attention_v0.1/generic.hcc
0 → 100644
View file @
059e050a
/* generic.hcc
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#include "generic.hch"
// Pipeline synchronization delays
/*
% DelayCycles - Number of cycles of the delay
%
% DESCRIPTION
% This function sequentially generates the number of cycles that
% is passed in DelayCycles. It can be used for synchronization.
%
% RETURN
%
*/
macro proc PipelineDelay(DelayCycles)
{
seq(t=0;t<(DelayCycles);t++)
{
delay;
}
}
/*
% input - Input data
%
% DESCRIPTION
% This function creates a NaN valid. The value will depend on
% the width of the input. It will be 1 followed by as many zeros
% as the size of input minus 1.
%
% RETURN
% The NaN value for the width of input.
%
*/
macro expr SetNAN(input) = 1<<(width(input)-1);
attention/attention_v0.1/generic.hch
0 → 100644
View file @
059e050a
/* generic.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#ifndef __GENERIC_HCH__
#define __GENERIC_HCH__
#include "stdlib.hch"
#include "parameters.hch"
#include "cores.hch"
#include "channels.hch"
#include "bilinear_warping_v2.hch"
static struct SECURE_FIFO_CHANNEL_INTERFACE_12
{
signal unsigned 1 wren;
signal unsigned 1 rden;
signal unsigned 12 data_w;
signal unsigned 12 data_r;
signal unsigned 1 full;
signal unsigned 1 empty;
};
#define SECURE_FIFO_CHANNEL_12 static struct SECURE_FIFO_CHANNEL_INTERFACE_12
macro proc SecureFifoChannel_12(PtrInterface);
macro proc MyFIFORead_12(PtrInterface, data);
macro proc MyFIFOWrite_12(PtrInterface, data);
macro expr SetNAN(input);
#endif
\ No newline at end of file
attention/attention_v0.1/lklib.hcc
0 → 100644
View file @
059e050a
/* lklib.hcc
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#include "lklib.hch"
#include "cores.hch"
#include "parameters.hch"
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the kernel
% k = [2 16 28 16 2]/64. It is a band-pass filter. The Retiming is not used
% for the implementation because the performance was good enough.
%
% RETURN
%
*/
macro proc Prefilter5Taps(buffer,Out) // mask=[2 16 28 16 2]/64
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=6;
macro expr DataWidth=(width(buffer[0])+6);
signed DataWidth Register[3], aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)+adjs(buffer[4],DataWidth))<<1;
Register[1]=(adjs(buffer[1],DataWidth)+adjs(buffer[3],DataWidth))<<4;
Register[2]=(adjs(buffer[2],DataWidth))*28;
aux0= Register[0] + Register[1] + Register[2];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
//par(i=1;i<(Retiming);i++)
//{
// aux[i]=aux[i-1];
//}
Out= aux[Retiming-1];
}
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the kernel
% k = [14 35 14]/64. It is a band-pass filter. The Retiming is not used
% for the implementation because the performance was good enough.
%
% RETURN
%
*/
macro proc Prefilter3Taps(buffer,Out) // mask=[14 35 14]/64
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=6;
macro expr DataWidth=(width(buffer[0])+7);
signed DataWidth Register[2], aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)+adjs(buffer[2],DataWidth))*14;
Register[1]=(adjs(buffer[1],DataWidth))*35;
aux0= Register[0] + Register[1];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
Out= aux[0];
}
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the derivative
% kernel k = [7 18 0 -18 -7]/64. The Retiming is not used
% for the implementation because the performance was good enough.
%
% RETURN
%
*/
macro proc Diff5Taps(buffer,Out) // mask=[7 18 0 -18 -7]/64
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=5; // 2^6=64 but we take 1 decimal bit--> 5.
macro expr DataWidth=(width(buffer[0])+6);
signed DataWidth Register[2], aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)-adjs(buffer[4],DataWidth))*7;
Register[1]=(adjs(buffer[1],DataWidth)-adjs(buffer[3],DataWidth))*18;
//xilinxmult(Register[0], (adjs(buffer[0],DataWidth)-adjs(buffer[4],DataWidth)) ,((int 18) 7) );
//xilinxmult(Register[1], (adjs(buffer[1],DataWidth)-adjs(buffer[3],DataWidth)) ,((int 18) 18) );
aux0= Register[0] + Register[1];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
//par(i=1;i<(Retiming);i++)
//{
// aux[i]=aux[i-1];
//}
Out= aux[Retiming-1];
}
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the derivative
% kernel k = [29 0 -29]/64. The Retiming is not used for the implementation
% because the performance was good enough.
%
% RETURN
%
*/
macro proc Diff3Taps(buffer,Out) // mask=[29 0 -29]/64
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=5; // 2^6=64 but we take 1 decimal bit--> 5.
macro expr DataWidth=(width(buffer[0])+6);
signed DataWidth Register, aux0;
//signed (width(Out)) aux[Retiming];
signed (width(Out)) aux;
par
{
Register=(adjs(buffer[0],DataWidth)-adjs(buffer[2],DataWidth))*29;
aux0 = Register;
// Rounding
if(sign(aux0))
aux= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
Out = aux;
}
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the derivative
% kernel k = [1 4 6 4 1]/16. This is a gaussian low band pass filter.
% The Retiming is not used for the implementation because the performance
% was good enough.
%
% RETURN
%
*/
// ***************************************************************************
macro proc Weighting5(buffer,Out) // mask=[1 4 6 4 1]/16
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=3; // 2^4=16 but the whole derivative range is not used.
// --> one bits more is available
macro expr DataWidth=(width(buffer[0])+5); //--> 5 is more accurate!!!
signed DataWidth Register[3],aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)+adjs(buffer[4],DataWidth));
Register[1]=(adjs(buffer[1],DataWidth)+adjs(buffer[3],DataWidth))<<2;
Register[2]=(adjs(buffer[2],DataWidth))*6;
aux0= Register[0] + Register[1] + Register[2];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
//par(i=1;i<(Retiming);i++)
//{
// aux[i]=aux[i-1];
//}
//Out= aux[Retiming-1];
Out= aux[0];
}
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the derivative
% kernel k = [1 2 1]/4. This is a gaussian low band pass filter.
% The Retiming is not used for the implementation because the performance
% was good enough.
%
% RETURN
%
*/
macro proc Weighting3(buffer,Out) // mask=[1 2 1]/4
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=1; // 2^2=4 but the whole derivative range is not used.
// --> one bits more is available
macro expr DataWidth=(width(buffer[0])+2);
signed DataWidth Register[2],aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)+adjs(buffer[2],DataWidth));
Register[1]=(adjs(buffer[1],DataWidth))<<1;
aux0= Register[0] + Register[1];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
par(i=1;i<(Retiming);i++)
{
aux[i]=aux[i-1];
}
Out= aux[Retiming-1];
}
}
/*
% Input - Input value for the convolution
% Output - Result of the convolution
% KernelX - Kernel for the X convolution
% KernelY - Kernel for the Y convolution
% ColumnLength - Number of elements of each column
%
% DESCRIPTION
% This function computes the separable 2D convolution of the input.
% It stores 4 columns before performing it, with the current column
% they are 5. Then, the convolution is carried out using KernelX for
% for the rows and KernelY for the columns.
%
% RETURN
%
*/
macro proc SpatialConvolutions_optf(Input,Output,KernelX,KernelY, ColumnLength)
{
macro expr PipeLatency=6 + 2;
macro expr Retiming=1; // Retiming value = Retiming-1
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[4] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
signed (width(Input)) DataArrayX[5], DataArrayY[5] ;
static unsigned (log2ceil(MAX_RES_X/SCALE)) col=1, colbis=0;
signed (width(Output)) aux[Retiming];
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayX[4]=Input;
// Shift X data through array
par (i = 0; i != 4; i++)
{
DataArrayX[i] = DataArrayX[i+1];
}
KernelX(DataArrayX,DataArrayY[4]);
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Read data into array every cycle
par(r1=0;r1!=4;r1++)
{
// Fill data through array
DataArrayY[r1] = readRAM(adju(r1,3),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=4;r2++)
{
writeRAM(adju(r2,3),colbis,DataArrayY[r2+1]);
}
KernelY(DataArrayY,Output);
} // End Global par
}
/*
% Input0 - Input derivative (first element for the product)
% Input1 - Input derivative (second element for the product)
% Output - Product result of Input0xInput1 previously weighted
% ColumnLength - Number of columns of the input data
%
% DESCRIPTION
% This function computes the weighted product of the derivatives
% in Input0 and Input1, using a Weighting5 function. It requires
% storing 5 rows (the 4 in the MPRAM plus the current one). Then
% it performs the 2d separable convolution using the same kernel
% (Weighting5) for rows and columns
%
% RETURN
%
*/
macro proc WeightingMatrix_optf(Input0, Input1,Output, ColumnLength)
{
macro expr PipeLatency=6 + 2;
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr Weigh=Weighting5; // Weighting5 or Weighting3
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input0)*2)> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input0)*2)> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[4] with {block = "BlockRAM"}; // 10, no 4
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
signed (width(Input0)*2) DataArrayX[5], DataArrayY[5] ; // 11, no 5
static unsigned (log2ceil(MAX_RES_X/SCALE)) col=1, colbis=0;
signed (width(Output)) aux[Retiming];
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayX[4]=adjs(Input0,2*width(Input0))*adjs(Input1,2*width(Input1));
// CASE (A): 5x5, 3x3 weighing function
// Shift X data through array
par (i = 0; i != 4; i++)
{
DataArrayX[i] = DataArrayX[i+1];
}
Weigh(DataArrayX,DataArrayY[4]);
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Read data into array every cycle
par(r1=0;r1!=4;r1++)
{
// Fill data through array
DataArrayY[r1] = readRAM(adju(r1,3),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=4;r2++)
{
writeRAM(adju(r2,3),colbis,DataArrayY[r2+1]);
}
Weigh(DataArrayY,Output);
} // End Global par
}
/*
% DataIn - Input value (from the three frames)
% st - Result of the spatial filter convolution
% dt - Result of the spatial derivative convolution
%
% DESCRIPTION
% This function computes the derivative and spatial filtering from
% the three pixel inputs separately.
%
% RETURN
%
*/
macro proc TemporalDerivative_optf(DataIn, dt, st)
{
par
{
// Processing & sending 3 frames
Prefilter3Taps(DataIn,st);
Diff3Taps(DataIn,dt);
}
}
/*
% FractionalShift - Number of bits for the precision of the division
% detTH - Energy threshold
% Axx - IxIx*weight
% Axy - IxIy*weight
% Ayy - IyIy*weight
% Axt - IxIt*weight
% Ayt - IyIt*weight
% VxOut - X Optical flow result
% VyOut - Y Optical flow result
%
% DESCRIPTION
% This function solves the system (see paper in main.hcc). To
% perform the division, a divisor is required. We use a CoreGenerator
% standard division core to improve the final performance. The number of
% bits of the division are set by FractionalShift. As we are always working
% with integer, to get float precision, we use shifts for the fractional part.
%
% RETURN
%
*/
macro proc FIXPOINTftu_optf(FractionalShift, detTH, Axx, Axy, Ayy, Axt, Ayt, VxOut, VyOut)
{
macro expr Retiming=7;
macro expr FRACTBITS=5;
macro expr FPSIZE=(2*width(Axx)+1);
macro expr PipeLatency=0;
macro expr MAX_24b = 16777215; //2^24 - 1
// fix-point data registers
signed FPSIZE velx, vely, detA, Aux0, Aux1, Aux2, Aux3, Aux4, Aux5;
signed DIVIDER_INPUT detAbis, velxbis, velybis;
signed DIVIDER_INPUT Vx_big, Vy_big;
signed (width(VxOut)) Vx[Retiming], Vy[Retiming];
unsigned 1 AbovedetTH[DIVIDER_LATENCY];
// Macro Begin
// ----------------------------------------------------
par
{
//Computing the values in the determinant
Aux0=adjs(Axy,FPSIZE)*adjs(Ayt,FPSIZE);
Aux1=adjs(Ayy,FPSIZE)*adjs(Axt,FPSIZE);
Aux2=adjs(Axx,FPSIZE)*adjs(Ayt,FPSIZE);
Aux3=adjs(Axy,FPSIZE)*adjs(Axt,FPSIZE);
Aux4=adjs(Axx,FPSIZE)*adjs(Ayy,FPSIZE);
Aux5=adjs(Axy,FPSIZE)*adjs(Axy,FPSIZE);
velx=(Aux0>>4)-(Aux1>>4);
vely=(Aux2>>4)-(Aux3>>4);
detA=(Aux4>>8)-(Aux5>>8);
if((detA > MAX_24b) || (velx > MAX_24b) || (vely > MAX_24b))//Reducing errors
par{
detAbis = 1; //TH is at least 1
velxbis = 1;
velybis = 1;
}
else
par{
detAbis = adjs(detA[FPSIZE-1]@detA[23:0], width(detAbis));
velxbis = adjs(velx[FPSIZE-1]@velx[23:0], width(velxbis));
velybis = adjs(vely[FPSIZE-1]@vely[23:0], width(velybis));
}
// New pipelined division unit
par
{
division_core(velxbis, detAbis, Vx_big);
//Vx_big = velxbis;
division_core(velybis, detAbis, Vy_big);
//Vy_big = velybis;
}
// Control detA > TH
AbovedetTH[0]=(detAbis) > ((signed)adju(detTH,DIVIDER_INPUT));
// delays for threshold and div (synchronization)
par(d=1;d<DIVIDER_LATENCY;d++)
{
AbovedetTH[d]=AbovedetTH[d-1];
}
//Energy threshold
if (AbovedetTH[DIVIDER_LATENCY-1]!=0)
par
{
Vx[0]=adjs(Vx_big, width(VxOut));
Vy[0]=adjs(-Vy_big, width(VyOut));
}
else
par
{
//Set to NaN (non valid values)
Vx[0]=SetNAN(VxOut);
Vy[0]=SetNAN(VyOut);
}
//Retiming stages (improving final performance)
par(k=1;k<Retiming;k++)
{
Vx[k]=Vx[k-1];
Vy[k]=Vy[k-1];
}
//Writing the outputs
VxOut=Vx[Retiming-1];
VyOut=Vy[Retiming-1];
}
}
/*
% Num - Numerator
% Den - Denominator
% Result - Quotient
%
% DESCRIPTION
% This function computes the division of Num and Den, obtaining the
% the quotient that is returned in result. It can be done using the
% standard Handel-C implementation, as simply result = Den/Num.
% The problem is that the performance is affected by the required
% logic and resources. This is why we are using a core from
% the core Generator. The interface is divider_25 because we are using
% 25 bits for the division to obtain a better precision.
%
% RETURN
%
*/
macro proc division_core(Num, Den, result)
{
// Enable for Cores
static signal unsigned 1 enable=0;
interface divider_25 (signed DIVIDER_INPUT quot, signed DIVIDER_INPUT remd, unsigned 1 rfd) divider(signed DIVIDER_INPUT dividend = Num,
signed DIVIDER_INPUT divisor = adjs(Den,DIVIDER_INPUT), unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
par
{
//Enabling division Core: only for 1 clock cycle
enable=1;
result = divider.quot;
}
}
attention/attention_v0.1/lklib.hch
0 → 100644
View file @
059e050a
/* lklib.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#ifndef __LKLIB__
#define __LKLIB__
#include "stdlib.hch"
#include "generic.hch"
#define XYTDERIVATIVESIZE 9
#define PIXELSIZE 8
#define DIVIDER_INPUT 25 //Input size of the divider core
#define DIVIDER_LATENCY DIVIDER_INPUT+4+1 //Latency of the divider core
macro proc Prefilter5Taps(buffer,Out);
macro proc Prefilter3Taps(buffer,Out);
macro proc Diff5Taps(buffer,Out);
macro proc Diff3Taps(buffer,Out);
macro proc Weighting(buffer,Out);
macro proc SpatialConvolutions_optf(Input,Output,KernelX,KernelY, ColumnLength);
macro proc WeightingMatrix_optf(Input0, Input1,Output, ColumnLength);
macro proc TemporalDerivative_optf(DataIn, dt, st);
macro proc FIXPOINTftu_optf(FractionalShift, detTH, Axx, Axy, Ayy, Axt, Ayt, VxOut, VyOut);
macro proc division_core(Num, Den, quot);
#endif
\ No newline at end of file
attention/attention_v0.1/main.hcc
0 → 100644
View file @
059e050a
/* main.hcc
% Pixels - RGB Input from channel
% * Pixels[7:0] - Red color channel
% * Pixels[15:8] - Green color channel
% * Pixels[23:16] - Blue color channel
% Control - Control word with the different parameters:
% * Control[60:45] - Latencies for the feature estimation (gabor modules)
% * Control[44:36] - Thresholds for the feature estimation
% * Control[35:26] - Number of columns of the input images
% * Control[24:21] - Not used
% * Control[20:13] - Not used
% * Control[12:0] - Latency cycles of the pipeline
%
% RETURN
% Output - Energy, 4 orientation maps, and RG and BY color differences
%
% DESCRIPTION
% A Handel-C implementation of the idea of
% L. Itti and C. Koch, Computational modelling of visual attention, Nature Review Neuroscience,
% 2(3), pp. 194 – 203, 2001.
% F. Barranco, J. Diaz, B. Prieto, and E. Ros, Bottom-up visual attention model based on
% FPGA, in Electronics, Circuits and Systems (ICECS), pp. 328 – 331, 2012.
%
% Note that the paper describes most parameters of the algorithm and that it
% also describes a whole architecture for a coarse-to-fine estimation the saliency.
% This file represents the implementation for the feature maps that combined can allow
% the saliency estimation. We also include the normalization operator.
%
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
//Native Handle C libraries
#include "stdlib.hch"
//Handle C custom libraries
#include "cores.hch"
#include "channels.hch"
#include "GaborPrimitives.hch"
#include "generic.hch"
//#include "opticflow.hch" //I think we do not need it but it is included in the package
//#include "bilinear_warping_v2.hch" //I think we do not need it but it is included in the package
//Set the clock values here
//interface port_in (unsigned 1 clk with {clockport = 1}) ClockPort (); //clk =__clock) ClockPort() ;
//set clock = internal ClockPort.clk with { rate = 50 };
/****************************************************************
* Function : main *
****************************************************************/
void main(void)
{
macro expr adjust=36;
macro expr LATENCY_DIFFERENCE = 79-(35-3+1+8); //After removing atan2 cores from primitives (primitives_short)
UNSIGNED_CHANNEL Output;
UNSIGNED_CHANNEL Input;
unsigned int 24 Pixels;
signed int XYTDERIVATIVESIZE Data[3]; //3 color channels
signed int F_BITS fe[NORIENTATIONS], fo[NORIENTATIONS];
signed int F_BITS fetmp[NORIENTATIONS], fotmp[NORIENTATIONS];
signal <unsigned int 61> Control;
signal <unsigned 1> rst;
static unsigned int 4 nc=4;
unsigned int 10 Columns;
unsigned int 13 PipeLatency;
signed int 12 Threshold;
unsigned 9 Pr_Threshold;
unsigned 1 end, enable;
unsigned int 13 PipeDelay;
unsigned 21 counter;
unsigned 21 ImSize;
//static unsigned 16 Latencies=22583; //for the gabor modules
static signed XYTDERIVATIVESIZE threshold =25;// approx. 1/10 of max(R,G,B)
//New variables
signed (XYTDERIVATIVESIZE) Data_gray;
signed (XYTDERIVATIVESIZE+6) gray_value;
unsigned 9 energy;
unsigned int 9 orientation[NORIENTATIONS];
signed int CONV_BITS FNYNX[16];
signed (XYTDERIVATIVESIZE) SmoothPixel[NFRAMES];
signed XYTDERIVATIVESIZE R, G, B; //signed for the subsequent stages
unsigned 1 max_RGB_thd[DIVIDER_LATENCY];
signed XYTDERIVATIVESIZE max_RGB, max_RGB_1;
signed DIVIDER_INPUT R_1, G_1, B_1, min_RG, den, RG_num, BY_num, RG_pre, BY_pre;
signed DIVIDER_INPUT R_2, G_2, B_2, min_RG_1; //relative min
signed XYTDERIVATIVESIZE RG, BY, RG_out[LATENCY_DIFFERENCE], BY_out[LATENCY_DIFFERENCE], RG_last, BY_last;
par
{
//Call interface with Core local features for attention
InterfazCore_lf_attention(Input, Output, Control, ImSize);
//Running continuously
while(1)
{
par
{
enable=0;
end=0;
PipeDelay=0;
counter=0;
Latencies = Control[60:45];
Pr_Threshold = Control[44:36];
Columns = Control[35:26];
nc = Control[24:21]; //Not used
Threshold = (Control[20:13]==0) ? 0b011111111111 : ((signed 12) (0@Control[20:13])); //Not used
PipeLatency = Control[12:0];
}
do
{
// All the instruction being executed at the same time: long pipeline
// There is an initial latency:
par
{
enable=1;
// Reading parameters
Latencies = Control[60:45];
Pr_Threshold = Control[44:36];
Columns = Control[35:26];
nc = Control[24:21]; //Not used
Threshold = (Control[20:13]==0) ? 0b011111111111 : ((signed 12) (0@Control[20:13])); //Not used
PipeLatency = Control[12:0];
//Receive data (three pixels, RGB)
//s1
Receive(Input, Pixels);
//Extracting frame data
//s2
Data[0]= (signed) adju(Pixels[7:0], XYTDERIVATIVESIZE); //R value
Data[1]= (signed) adju(Pixels[15:8], XYTDERIVATIVESIZE);//G value
Data[2]= (signed) adju(Pixels[23:16], XYTDERIVATIVESIZE);//B value
par
{
//s3
//Computing the Gray value
//Constants multiplied by 64. Original formula: Gray = R*0.299 + G*0.587 + B*0.114
gray_value = adjs(Data[0], width(gray_value))*19 + adjs(Data[1], width(gray_value))*38 + adjs(Data[2], width(gray_value))*7;
//s4
//Adjusting the size
Data_gray = adjs(gray_value\\6, XYTDERIVATIVESIZE);
//s5
//-----------------------------------------------------------------
//Par for the Energy and Orientation features (based on Gabor filters)
//-----------------------------------------------------------------
GaborBase(Data_gray, FNYNX, Columns);
BuildGabor(FNYNX, fe, fo);
Primitives_short(fe, fo, energy, orientation, Pr_Threshold, Latencies);
}
par{
//Spatial convolutions: Using E. Simoncelli derivative and smoothing filters
par(f=0;f<NFRAMES;f++) //NFRAMES == 3
{
//Latency == 2*Columns + 11
//SpatialConvolutions(((signed)adju(Data[f], XYTDERIVATIVESIZE)), SmoothPixel[f], Prefilter5Taps, Prefilter5Taps, Columns);
SpatialConvolutions_last(Data[f], SmoothPixel[f], Prefilter5Taps, Prefilter5Taps, Columns);
}
//Splitting frame data
//-----------------------------------------------------------------
R = SmoothPixel[0]; //R value
G = SmoothPixel[1]; //G value
B = SmoothPixel[2]; //B value
//Compute maximum and minimum value for yellow and normalization
//Computing relative RGB maximum and relative RG minimum
//-----------------------------------------------------------------
if ( R > G)
par
{
min_RG = (signed DIVIDER_INPUT)(0@G); //using 18 bits (9 bits for the fractional part for the next division)
if(R > B)
par
{
max_RGB = R;
}
else
par
{
max_RGB = B;
}
}
else
par
{
min_RG = (signed DIVIDER_INPUT)(0@R); //using 18 bits (9 bits for the fractional part for the next division)
if(G > B)
par
{
max_RGB = G;
}
else
par
{
max_RGB = B;
}
}
par
{
R_1 = (signed DIVIDER_INPUT)(0@R);
G_1 = (signed DIVIDER_INPUT)(0@G);
B_1 = (signed DIVIDER_INPUT)(0@B);
}
//adjusting sizes for the division
par
{
R_2 = (signed)(R_1[DIVIDER_INPUT-1]@(R_1[DIVIDER_INPUT-2:0]<<9));
G_2 = (signed)(G_1[DIVIDER_INPUT-1]@(G_1[DIVIDER_INPUT-2:0]<<9));
B_2 = (signed)(B_1[DIVIDER_INPUT-1]@(B_1[DIVIDER_INPUT-2:0]<<9));
min_RG_1 = (signed)(min_RG[DIVIDER_INPUT-1]@(min_RG[DIVIDER_INPUT-2:0]<<9));
max_RGB_1 = max_RGB;
}
//Max_RGB inversion for normalization
//-----------------------------------------------------------------
par
{
RG_num = (R_2 - G_2); //DIVIDER_INPUT;
BY_num = (B_2 - min_RG_1);
max_RGB_thd[0]=(max_RGB_1 > threshold); //Discard unreliable values: less than 1/10 of max. intensity of the image
den = (signed DIVIDER_INPUT)(0@max_RGB_1);
}
//Normalize by the relative maximum
//-----------------------------------------------------------------
par
{
division_core(RG_num, den, RG_pre);//18-bit divider
division_core(BY_num, den, BY_pre);//18-bit divider
}
par(d=1;d<DIVIDER_LATENCY;d++)
{
max_RGB_thd[d]=max_RGB_thd[d-1];
}
//Discard the unreliable values and compute the RG and BY ones
//RG and BY: s + 1 + 6 = 8 bits (PSize)
//RG and BY: s + 2 + 6 = 8 bits (XYTDERIVATIVESIZE)
//-----------------------------------------------------------------
if (max_RGB_thd[DIVIDER_LATENCY-1]!=0)
par
{
////RG = adjs(RG_pre\\3, XYTDERIVATIVESIZE);
////BY = adjs(BY_pre\\3, XYTDERIVATIVESIZE);
RG = adjs(RG_pre\\2, XYTDERIVATIVESIZE);
BY = adjs(BY_pre\\2, XYTDERIVATIVESIZE);
}
else
par //unreliable values
{
RG=0;
BY=0;
}
//Delaying the result (3*Columns + 5) for synchronization
par{
Delaying(RG, RG_out[0], Columns);
Delaying(BY, BY_out[0], Columns);
}
//79 is the difference between the latency of the color opponency and the Energy+Orientation computation
par(cnt=1;cnt<LATENCY_DIFFERENCE;cnt++)
{
RG_out[cnt]=RG_out[cnt-1];
BY_out[cnt]=BY_out[cnt-1];
}
par
{
RG_last = RG_out[LATENCY_DIFFERENCE-1];
BY_last = BY_out[LATENCY_DIFFERENCE-1];
}
}
if(PipeDelay==(PipeLatency))
par
{
//Send the output (6 feature maps)
UnsignedSecureSend(Output, ((unsigned)energy)@((unsigned)orientation[0])@((unsigned)orientation[2])@((unsigned)orientation[4])@((unsigned)orientation[6])@((unsigned)RG_last)@((unsigned)BY_last));
//Pass only orientation[0], orientation[2], orientation[4] and orientation[6]: pi, 3*pi/2, 0 and pi/2
end=(counter==ImSize-1);
counter++;
}
else
PipeDelay++;
}
}while(!end);
} // End while(1)
} // end global par
}
attention/attention_v0.1/opticflow.hcc
0 → 100644
View file @
059e050a
#include <stdlib.hch>
#include "opticflow.hch"
macro proc CoreATAN2CORDIC_fl(y, x, enable, angle)
{
macro expr CoreWidth = ATAN2WIDTH;
macro expr CoreOutputWidth = ATAN2OUTWIDTH;
macro expr CoreLatency = ATAN2LATENCY;
/* component atan2cordic
port (
x_in: IN std_logic_VECTOR(20 downto 0);
y_in: IN std_logic_VECTOR(20 downto 0);
phase_out: OUT std_logic_VECTOR(20 downto 0);
clk: IN std_logic);
end component; */
interface ATAN2NAME (signed CoreOutputWidth phase_out) atan2(signed CoreWidth x_in=x,
signed CoreWidth y_in=y, unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
angle=atan2.phase_out;
}
macro proc CoreDIVIDER(my_dividend, my_divisor, result, enable)
{
macro expr DividerWidth = 21;
macro expr DividerOutputWidth = 21;
macro expr DividerLatency = 0;
interface DIVIDER_NAME (signed DividerOutputWidth quot, signed DividerOutputWidth remd, unsigned 1 rfd) divider(signed DividerWidth dividend = my_dividend,
signed DividerWidth divisor = my_divisor, unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
result=(divider.quot)<-FLOW_BITS;
}
macro proc CoreDIVIDER_2(my_dividend, my_divisor, result, enable)
{
macro expr DividerWidth = 27;
macro expr DividerOutputWidth = 27;
macro expr DividerLatency = 0;
interface DIVIDER_NAME_2 (signed DividerOutputWidth quot, signed DividerOutputWidth remd, unsigned 1 rfd) divider(signed DividerWidth dividend = my_dividend,
signed DividerWidth divisor = my_divisor, unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
result=(divider.quot)<-FLOW_BITS;
}
// Recursive unsigned vector addition with ballanced tree
//************************************************************************************
macro expr UnSumMacro(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom,Extend) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom, adju(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1,Extend) + RecurseAddAux(Array, Middle, Bottom,Extend));
in
RecurseAddAux(Array, Index, begin,Extend);
/*
macro expr SumMacro(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom,Extend) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom, adjs(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1,Extend) + RecurseAddAux(Array, Middle, Bottom,Extend));
in
RecurseAddAux(Array, Index, begin,Extend);
//***************************************************
//Macro component_velocity
//
//LATENCY = 4;
//
//
//bits format:
//IN: P[NFRAMES][NORIENT] --> sign-4-5
//
//OUT: FVreal[NFRAMES], FVimag[NFRAMES] --> sign-14-5
// LE[NFRAMES] --> sign-28-5
//
//***************************************************/
macro proc component_velocity(P, FVreal, FVimag, LE){
//***********************************************/
//Constant definitions for 3 frames
//***********************************************
macro expr PSize = 10;
macro expr SXX = 14;
macro expr SX = 6;
macro expr DEN = 6;
const int 3 XX[NFRAMES] = {1, 2, 3}; //XX3 is XX in the third dimension
const int 8 WREAL[NORIENTATIONS] = {-81, -75, -58, -31, 0, 31, 58, 75}; // 25 * {-F0 * cos(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 8 WIMAG[NORIENTATIONS] = {0, -31, -58, -75, -81, -75, -58, -31}; // 25 * {-F0 * sin(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
//***********************************************
//Declarations: Variables
//***********************************************
int (PSize+2) Sxy[NORIENTATIONS];
int (PSize+1) Sxy_0[NORIENTATIONS][NFRAMES];
int (PSize+1) Sy[NORIENTATIONS];
int PSize Sy_0[NORIENTATIONS][NFRAMES];
int (PSize+3) a[NORIENTATIONS];
int (PSize+5) a_0[NORIENTATIONS];
int (PSize+5) a_1[NORIENTATIONS];
int (PSize+12) a_2[NORIENTATIONS];
int (PSize+2) b[NORIENTATIONS];
int (PSize+4) b_0[NORIENTATIONS];
int (PSize+4) b_1[NORIENTATIONS];
int (PSize+11) b_2[NORIENTATIONS];
int (PSize+3) a3_0[NORIENTATIONS];
int (PSize+2) bs3[NORIENTATIONS];
int (PSize+2) bs3_1[NORIENTATIONS];
int (PSize+2) bs4_0[NORIENTATIONS];
int (PSize+2) bs4_1[NORIENTATIONS];
int (PSize+3) Reg[NFRAMES][NORIENTATIONS];
int (PSize+3) Reg_0[NFRAMES][NORIENTATIONS];
//Pipeline auxiliary variables
int PSize Ps0[NFRAMES][NORIENTATIONS];
int PSize Ps1[NFRAMES][NORIENTATIONS];
int PSize Ps2[NFRAMES][NORIENTATIONS];
int PSize Ps2_1[NFRAMES][NORIENTATIONS];
int PSize Ps2_2[NFRAMES][NORIENTATIONS];
int PSize Ps3[NFRAMES][NORIENTATIONS];
int PSize Ps3_1[NFRAMES][NORIENTATIONS];
int (PSize+3) LE_0[NORIENTATIONS][NFRAMES];
int (2*PSize) LE_1[NORIENTATIONS][NFRAMES];
//***********************************************
//Body of the function
//***********************************************
par(orien=0;orien<NORIENTATIONS;orien++)
{
//Pipeline Stage 0
par(f=0;f<NFRAMES;f++)
{
Sxy_0[orien][f] = adjs(P[f][orien],width(Sxy_0))*adjs(XX[f],width(Sxy_0));
Sy_0[orien][f] = P[f][orien];
//Copying P for the next stage
Ps0[f][orien]=P[f][orien];
}
//Pipeline Stage 1
par
{
//Sxy[orien] = (adjs(P[0][orien],width(Sxy))*adjs(XX[0],width(Sxy)) + adjs(P[1][orien],width(Sxy))*adjs(XX[1],width(Sxy)) + adjs(P[2][orien],width(Sxy))*adjs(XX[2],width(Sxy)) + adjs(P[3][orien],width(Sxy))*adjs(XX[3],width(Sxy)) + adjs(P[4][orien],width(Sxy))*adjs(XX[4],width(Sxy)));
Sxy[orien] = SumMacro(Sxy_0[orien], 0, NFRAMES-1,width(Sxy));
//Sy[orien] = adjs(P[0][orien],width(Sy)) + adjs(P[1][orien],width(Sy)) + adjs(P[2][orien],width(Sy)) + adjs(P[3][orien],width(Sy)) + adjs(P[4][orien],width(Sy));
Sy[orien] = SumMacro(Sy_0[orien], 0, NFRAMES-1,width(Sy));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps1[f][orien]=Ps0[f][orien];
}
}
//Pipeline Stage 2_0
par
{
a_0[orien] = SXX*adjs(Sy[orien],width(a_0));
b_0[orien] = NFRAMES*adjs(Sxy[orien],width(b_0));
a_1[orien] = SX*adjs(Sxy[orien],width(a_1));
b_1[orien] = SX*adjs(Sy[orien],width(b_1));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2[f][orien]=Ps1[f][orien];
}
}
//Pipeline Stage 2_1
par
{
//a_2[orien] = (a_1[orien]-a_2[orien])*5; // 5 frames
//b_2[orien] = (b_1[orien]-b_2[orien])*5; // multiplied by 5 for following /50 division that become <<8 : 5/256 ~= 1/50
a_2[orien] = (adjs(a_0[orien],width(a_2))-adjs(a_1[orien],width(a_2))); // 3 frames
b_2[orien] = (adjs(b_0[orien],width(b_2))-adjs(b_1[orien],width(b_2)));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2_1[f][orien]=Ps2[f][orien];
}
}
//Pipeline Stage 2_2
par
{
//Using 5 decimals for a and b (*25)
//a[orien] = (adjs(SXX,PSize+15)*32*adjs(Sy[orien],PSize+15) - adjs(SX,PSize+15)*32*adjs(Sxy[orien],PSize+15))/adjs(DEN,PSize+15);
//a[orien] = a_2[orien]<<8; //for 5 frames
//a[orien] = a_2[orien]<<3; //for 3 frames
//a[orien] = adjs(((a_2[orien])*21)>>7,width(a)); //for 3 frames
a[orien] = adjs((a_2[orien]*43)>>8,width(a)); //for 3 frames
//b[orien] = adjs((NFRAMES*32*adjs(Sxy[orien],PSize+13) - adjs(SX,PSize+13)*32*adjs(Sy[orien],PSize+13))/adjs(DEN,PSize+13), width(b));
//b[orien] = adjs(b_2[orien]<<8,width(b)); // for 5 frames
//b[orien] = adjs(((b_2[orien])*21)>>7,width(b)); // for 3 frames
b[orien] = adjs((b_2[orien]*43)>>8,width(b)); // for 3 frames
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2_2[f][orien]=Ps2_1[f][orien];
}
}
//Pipeline Stage 3_0
par
{
par(f=0;f<NFRAMES;f++)
{
Reg_0[f][orien] = adjs(b[orien],width(Reg_0))*adjs(XX[f],width(Reg_0));
//Copying P for the next stage
Ps3[f][orien]=Ps2_2[f][orien];
}
//Copying b for the next stage
bs3[orien]=b[orien];
a3_0[orien] = a[orien];
}
//Pipeline Stage 3_1
par
{
par(f=0;f<NFRAMES;f++)
{
//Reg[fr][orien] = adjs(a[orien],width(Reg))+ adjs(b[orien],width(Reg))*adjs(XX[fr],width(Reg));
Reg[f][orien] = adjs(a3_0[orien],width(Reg))+ adjs(Reg_0[f][orien],width(Reg));
//Copying P for the next stage
Ps3_1[f][orien]=Ps3[f][orien];
}
//Copying b for the next stage
bs3_1[orien]=bs3[orien];
}
//Pipeline Stage 4_0
par
{
par(f=0;f<NFRAMES;f++)
{
LE_0[orien][f] = adjs(Reg[f][orien],width(LE_0)) - adjs(Ps3_1[f][orien], width(LE_0));
}
//Copying b for the next stage
bs4_0[orien]=bs3_1[orien];
}
//Pipeline Stage 4_1
par
{
par(f=0;f<NFRAMES;f++)
{
LE_1[orien][f] = adjs(LE_0[orien][f],width(LE_1))*adjs(LE_0[orien][f],width(LE_1));
}
//Copying b for the next stage
bs4_1[orien]=bs4_0[orien];
}
//Pipeline Stage 4_2
par
{
//LE[orien] = adjs(((((adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien], 2*PSize+26)*32)*(adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien],2*PSize+26)*32) + (adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32)*(adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32) + (adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32)*(adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32) + (adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32)*(adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32) + (adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32)*(adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32))/NFRAMES)\\15), width(LE));
//LE[orien] = adjs(((SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize)*21)>>6)\\2,width(LE));
LE[orien] = adjs(((SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize+2)*85)>>8)\\2,width(LE));
//LE[orien]=adjs(((LE_1[0][orien]+LE_1[1][orien]+LE_1[2][orien])>>2)\\15,width(LE));
//LE[orien] = adjs(SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize+26)\\15, width(LE));
//Simplifying the equation: FVreal = - (F0*cos(ang)/2*PI)*b[orien] --> FVreal = Wreal[orien]*b[orien] //Wreal is initialised with factor 25
// FVimag = - (F0*sin(ang)/2*PI)*b[orien] --> FVimag = Wimag[orien]*b[orien] //Wreal is initialised with factor 25
//FVreal[orien]= (adjs(bs4_1[orien],PSize+8)*adjs(WREAL[orien],PSize+8))\\6; //final size of FVreal is PSize+18
//FVimag[orien]= (adjs(bs4_1[orien],PSize+8)*adjs(WIMAG[orien],PSize+8))\\6; //final size of FVimag is PSize+18
//FVreal[orien]= ((adjs(bs4_1[orien],PSize+8)*adjs(WREAL[orien],PSize+8))\\2)<-width(FVreal); //final size of FVreal is PSize+18
//FVimag[orien]= ((adjs(bs4_1[orien],PSize+8)*adjs(WIMAG[orien],PSize+8))\\2)<-width(FVimag); //final size of FVimag is PSize+18
FVreal[orien]= ((adjs(bs4_1[orien],PSize+10)*adjs(WREAL[orien],PSize+10))\\4)<-width(FVreal); //final size of FVreal is PSize+18
FVimag[orien]= ((adjs(bs4_1[orien],PSize+10)*adjs(WIMAG[orien],PSize+10))\\4)<-width(FVimag); //final size of FVimag is PSize+18
}
}
}
//***************************************************
//Macro component_velocity
//
//LATENCY = 4;
//
//
//bits format:
//IN: P[NFRAMES][NORIENT] --> sign-4-5
//
//OUT: FVreal[NFRAMES], FVimag[NFRAMES] --> sign-14-5
// LE[NFRAMES] --> sign-28-5
//
//***************************************************
macro proc component_velocity_mia(P, FVreal, FVimag, LE){
//***********************************************
//Constant definitions
//***********************************************
/* macro expr PSize = 10;
const int 7 SXX = 55;
const int 5 SX = 15;
const int 7 DEN = 50;
macro expr NORIENT = 8;
macro expr NFRAMES = 5;
const int 4 XX[NFRAMES] = {1, 2, 3, 4, 5}; //XX3 is XX in the third dimension
const int 6 WREAL[NORIENT] = {-20, -19, -14, -8, 0, 8, 14, 19}; // 2^5 * {-F0 * cos(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 6 WIMAG[NORIENT] = {0, -8, -14, -19, -20, -19, -14, -8}; // 2^5 * {-F0 * sin(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
*/
//***********************************************/
//Constant definitions for 3 frames
//***********************************************
macro expr PSize = 10;
const int 7 SXX = 14;
const int 5 SX = 6;
const int 7 DEN = 6;
const int 4 XX[NFRAMES] = {1, 2, 3}; //XX3 is XX in the third dimension
const int 6 WREAL[NORIENTATIONS] = {-20, -19, -14, -8, 0, 8, 14, 19}; // 2^5 * {-F0 * cos(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 6 WIMAG[NORIENTATIONS] = {0, -8, -14, -19, -20, -19, -14, -8}; // 2^5 * {-F0 * sin(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
//***********************************************
//***********************************************
//Declarations: Variables
//***********************************************
//unsigned int 3 orien;
//unsigned int 3 fr;
//R
int (PSize+3) Sxy[NORIENTATIONS];
int (PSize+4) Sy[NORIENTATIONS];
int (PSize+15) a[NORIENTATIONS]; //(PSize+10)Q5
int (PSize+13) b[NORIENTATIONS]; //(PSize+8)Q5
int (PSize+15) Reg[NFRAMES][NORIENTATIONS]; //(PSize+11)Q5 - 5 --> because a, b are splited by DEN (==50)
//\R
//Pipeline auxiliary variables
int PSize Ps1[NFRAMES][NORIENTATIONS];
int PSize Ps2[NFRAMES][NORIENTATIONS];
int PSize Ps3[NFRAMES][NORIENTATIONS];
int (PSize+13) bs3[NORIENTATIONS];
//***********************************************
//Body of the function
//***********************************************
par(orien=0;orien<NORIENTATIONS;orien++)
{
//Pipeline Stage 1
par
{
Sxy[orien] = adjs(P[0][orien],width(Sxy))*adjs(XX[0],width(Sxy)) + adjs(P[1][orien],width(Sxy))*adjs(XX[1],width(Sxy)) + adjs(P[2][orien],width(Sxy))*adjs(XX[2],width(Sxy)); //+ adjs(P[3][orien],width(Sxy))*adjs(XX[3],width(Sxy)) + adjs(P[4][orien],width(Sxy))*adjs(XX[4],width(Sxy)));
Sy[orien] = adjs(P[0][orien],width(Sy)) + adjs(P[1][orien],width(Sy)) + adjs(P[2][orien],width(Sy));// + adjs(P[3][orien],width(Sy)) + adjs(P[4][orien],width(Sy));
//Copying P for the next stage
par(fr=0;fr<NFRAMES;fr++)
{
Ps1[fr][orien]=P[fr][orien];
}
}
//Pipeline Stage 2
par
{
//Using 5 decimals for a and b (*2^5)
a[orien] = (adjs(SXX,PSize+15)*32*adjs(Sy[orien],PSize+15) - adjs(SX,PSize+15)*32*adjs(Sxy[orien],PSize+15))/adjs(DEN,PSize+15);
b[orien] = adjs((NFRAMES*32*adjs(Sxy[orien],PSize+13) - adjs(SX,PSize+13)*32*adjs(Sy[orien],PSize+13))/adjs(DEN,PSize+13), width(b));
//Copying P for the next stage
par(fr=0;fr<NFRAMES;fr++)
{
Ps2[fr][orien]=Ps1[fr][orien];
}
}
//Pipeline Stage 3
par
{
par(fr=0;fr<NFRAMES;fr++)
{
Reg[fr][orien] = adjs(a[orien],width(Reg))+ adjs(b[orien],width(Reg))*adjs(XX[fr],width(Reg));
//Copying P for the next stage
Ps3[fr][orien]=Ps2[fr][orien];
}
//Copying b for the next stage
bs3[orien]=b[orien];
}
//Pipeline Stage 4
par
{
LE[orien] = adjs(((((adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien], 2*PSize+26)*32)*(adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien],2*PSize+26)*32) + (adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32)*(adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32) + (adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32)*(adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32))/NFRAMES)\\15), width(LE)); //+ (adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32)*(adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32) + (adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32)*(adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32))/NFRAMES)\\15), width(LE));
//Simplifying the equation: FVreal = - (F0*cos(ang)/2*PI)*b[orien] --> FVreal = Wreal[orien]*b[orien] //Wreal is initialised with factor 2^5
// FVimag = - (F0*sin(ang)/2*PI)*b[orien] --> FVimag = Wimag[orien]*b[orien] //Wreal is initialised with factor 2^5
FVreal[orien]= (adjs(bs3[orien],PSize+20)*adjs(WREAL[orien],PSize+20))\\10; //final size of FVreal is PSize+18
FVimag[orien]= (adjs(bs3[orien],PSize+20)*adjs(WIMAG[orien],PSize+20))\\10; //final size of FVimag is PSize+18
}
}
}
//***************************************************
//Macro compute_phase
//
//LATENCY = 14;
//
//bits format:
//IN: Greal, Gimag --> sign-8-1
//
//OUT: P --> sign-2-6
//
//***************************************************
macro proc compute_phase(Greal, Gimag, P){
macro expr PipeLatency=ATAN2LATENCY;
unsigned int 5 PipeDelay;
signed 10 aux[NFRAMES][NORIENTATIONS]; //, auxGimag[NFRAMES][NORIENTATIONS], auxGreal[NFRAMES][NORIENTATIONS];
static signal unsigned 1 enable=0;
/*/Interface definition
interface atan(int 9 phase_out)
myatan(int 10 x_in=a, int 10 y_in=b, unsigned 1 clk= __clock) with {busformat="BI"}; */
par(orien=0; orien<NORIENTATIONS;orien++)
{
//Enabling atan2 Core: only for 1 clock cycle
enable=1;
//atan2 - core generator:
//Inputs have to be in [-1, 1]
//Outputs are in [-PI, PI]
par(fr=0;fr<NFRAMES;fr++)
{
/*if(abs(Gimag[orien][fr])> abs(Greal[orien][fr])){
par
{
//Pipeline Stage 1
a=Gimag[orien][fr]/(abs(Gimag[orien][fr])+1);
b=Greal[orien][fr]/(abs(Gimag[orien][fr])+1);
//Pipeline Stage 2
P[orien][fr]=myatan.phase_out; //Latency = 13
}
}
else{
par
{
//Pipeline Stage 1
a=Gimag[orien][fr]/(abs(Greal[orien][fr])+1);
b=Greal[orien][fr]/(abs(Greal[orien][fr])+1);
//Pipeline Stage 2
P[orien][fr]=myatan.phase_out; //Latency = 13
}
} */
/*/ to remove undet values of atan2(0,0)
if(Gimag[fr][orien]==0 && Greal[fr][orien]==0)
par
{
auxGimag[fr][orien]=0;
auxGreal[fr][orien]=511;
}
else
par
{
auxGimag[fr][orien]=Gimag[fr][orien];
auxGreal[fr][orien]=Greal[fr][orien];
} */
CoreATAN2CORDIC_fl(Gimag[fr][orien], Greal[fr][orien], enable, aux[fr][orien]);
if(PipeDelay==PipeLatency)
P[fr][orien] = (aux[fr][orien])\\1;
else
PipeDelay++;
}
}
}
macro proc compute_phase_top(Greal, Gimag, P, index)
{
signed F_BITS auxGreal[NORIENTATIONS], auxGimag[NORIENTATIONS];
signed 9 P_Tmp[NORIENTATIONS];
par
{
seq
{
seq(i=0; i<NFRAMES-1; i++)
{
delay;
}
par(s=0;s<NORIENTATIONS;s++)
{
auxGreal[s]=Greal[s];
auxGimag[s]=Gimag[s];
}
}
seq
{
ifselect(index!=0)
{
seq(t=0; t<index; t++)
{
delay;
}
}
function_compute_phase(auxGreal,auxGimag, P_Tmp);
ifselect(index!=NFRAMES-1)
{
seq(k=index; k<NFRAMES-1; k++)
{
delay;
}
}
}//seq
par(o=0;o<NORIENTATIONS;o++)
{
P[o]=P_Tmp[o];
}
} // par
}
void function_compute_phase(signed int F_BITS (*Greal),signed int F_BITS (*Gimag), signed int 9 *P)
{
compute_phase_index(Greal,Gimag,P);
}
//***************************************************
//Macro compute_phase
//
//LATENCY = 14;
//
//bits format:
//IN: Greal, Gimag --> sign-8-1
//
//OUT: P --> sign-2-6
//
//***************************************************
macro proc compute_phase_index(Greal, Gimag, P){
macro expr PipeLatency=ATAN2LATENCY;
unsigned int 5 PipeDelay;
signed 10 aux[NORIENTATIONS]; //, auxGimag[NFRAMES][NORIENTATIONS], auxGreal[NFRAMES][NORIENTATIONS];
static signal unsigned 1 enable=0;
par(orien=0; orien<NORIENTATIONS;orien++)
{
//Enabling atan2 Core: only for 1 clock cycle
enable=1;
CoreATAN2CORDIC_fl(Gimag[orien], Greal[orien], enable, aux[orien]);
if(PipeDelay==PipeLatency)
P[orien] = (aux[orien])\\1;
else
PipeDelay++;
}//par
}
//***************************************************
//Macro compute_single_phase
//
//LATENCY = 14;
//
//bits format:
//IN: Greal, Gimag --> sign-8-1
//
//OUT: P --> sign-2-6
//
//***************************************************
macro proc compute_single_phase(Greal, Gimag, P)
{
macro expr PipeLatency=ATAN2LATENCY;
unsigned int 5 PipeDelay;
signed 10 aux[NORIENTATIONS]; //, auxGimag[NFRAMES][NORIENTATIONS], auxGreal[NFRAMES][NORIENTATIONS];
static signal unsigned 1 enable=0;
/*/Interface definition
interface atan(int 9 phase_out)
myatan(int 10 x_in=a, int 10 y_in=b, unsigned 1 clk= __clock) with {busformat="BI"}; */
par(orien=0; orien<NORIENTATIONS;orien++)
{
//Enabling atan2 Core: only for 1 clock cycle
enable=1;
//atan2 - core generator:
//Inputs have to be in [-1, 1]
//Outputs are in [-PI, PI]
CoreATAN2CORDIC_fl(Gimag[orien], Greal[orien], enable, aux[orien]);
if(PipeDelay==PipeLatency)
P[orien] = (aux[orien])\\1;
else
PipeDelay++;
}
}
//***************************************************
//Macro unwrap
//
//LATENCY = 12;
//
//bits format:
//IN: Pin [NORIENT][NFRAMES] --> sign-2-6
//
//OUT: Pout[NORIENT][NFRAMES] --> sign-4-5
//
//***************************************************
macro proc unwrap(Pin, Pout){
//***********************************************
//Constant definitions
//***********************************************
macro expr DOUBLE_PI = 402;
//macro expr PI = 201;
macro expr PSize = 10;
macro expr NORIENT = 8;
//macro expr NFRAMES = 5;
//***********************************************
//Declarations: Variables
//***********************************************
unsigned 3 fr;
unsigned 3 orien;
//static unsigned int 3 cur_frame= 1;
//unsigned int 3 cur_frame;
unsigned int 1 A[NORIENT];
int PSize D[NORIENT];
unsigned int 3 cf;
int (PSize+1) Pin_2[NFRAMES][NORIENT];
int (PSize+2) Pin_3[NFRAMES][NORIENT];
int (PSize+3) Pin_4[NFRAMES][NORIENT];
int (PSize+2) D_2[NORIENT];
int (PSize+3) D_3[NORIENT];
int (PSize+4) D_4[NORIENT];
unsigned int 1 A_2[NORIENT];
unsigned int 1 A_3[NORIENT];
unsigned int 1 A_4[NORIENT];
//Pipeline auxiliary variable declarations
int PSize Ds2[NORIENT];
int (PSize+2) D_2s5[NORIENT];
int (PSize+3) D_3s8[NORIENT];
int (PSize+4) D_4s11[NORIENT];
int (PSize-1) Pins1[NFRAMES][NORIENT];
int (PSize-1) Pins2[NFRAMES][NORIENT];
int (PSize+1) Pin_2s4[NFRAMES][NORIENT];
int (PSize+1) Pin_2s5[NFRAMES][NORIENT];
int (PSize+2) Pin_3s7[NFRAMES][NORIENT];
int (PSize+2) Pin_3s8[NFRAMES][NORIENT];
int (PSize+3) Pin_4s10[NFRAMES][NORIENT];
int (PSize+3) Pin_4s11[NFRAMES][NORIENT];
int PSize Pouts1[NORIENT];
int PSize Pouts2[NORIENT];
int PSize Pouts3[NORIENT];
int PSize Pouts4[NORIENT];
int PSize Pouts5[NORIENT];
int PSize Pouts6[NORIENT];
int PSize Pouts7[NORIENT];
int PSize Pouts8[NORIENT];
int PSize Pouts9[NORIENT];
int PSize Pouts10[NORIENT];
int PSize Pouts11[NORIENT];
int PSize Pout_1s4[NORIENT];
int PSize Pout_1s5[NORIENT];
int PSize Pout_1s6[NORIENT];
int PSize Pout_1s7[NORIENT];
int PSize Pout_1s8[NORIENT];
int PSize Pout_1s9[NORIENT];
int PSize Pout_1s10[NORIENT];
int PSize Pout_1s11[NORIENT];
int PSize Pout_2s7[NORIENT];
int PSize Pout_2s8[NORIENT];
int PSize Pout_2s9[NORIENT];
int PSize Pout_2s10[NORIENT];
int PSize Pout_2s11[NORIENT];
int PSize Pout_3s10[NORIENT];
int PSize Pout_3s11[NORIENT];
//Initialisations
//cur_frame=1;
//***********************************************
//Body of the function
//***********************************************
par(orien=0;orien<NORIENT;orien++)
{
//-------------------------------------------
//CURRENT FRAME == 1
//Pipeline Stage 1
par
{
//Writing Pout[0]
Pouts1[orien] = adjs((Pin[0][orien])\\1, width(Pout));
D[orien] = adjs(Pin[1][orien],width(D)) - adjs(Pin[0][orien],width(D));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pins1[fr][orien]=Pin[fr][orien];
}
/////////////////////////////////////////
}
//Pipeline Stage 2
par
{
A[orien] = abs(D[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pins2[fr][orien]=Pins1[fr][orien];
}
Ds2[orien]=D[orien];
//Writing Pout[0]
Pouts2[orien] = Pouts1[orien];
////////////////////////////////////////
}
//Pipeline Stage 3
par
{
par(cf=1; cf<NFRAMES;cf++)
{
Pin_2[cf][orien]=adjs(Pins2[cf][orien],width(Pin_2)) - DOUBLE_PI*(adjs(sign(Ds2[orien]),width(Pin_2))*2+1) * (signed)adju(A[orien],width(Pin_2));
}
//cur_frame=cur_frame+1;
//Writing Pout[0]
Pouts3[orien] = Pouts2[orien];
}
//-------------------------------------------
//CURRENT FRAME == 2
//Pipeline Stage 4
par
{
//Writing Pout[1]
Pout_1s4[orien] = adjs((Pin_2[1][orien])\\1, width(Pout));
D_2[orien] = adjs(Pin_2[2][orien],width(D_2)) - adjs(Pin_2[1][orien],width(D_2));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_2s4[fr][orien]=Pin_2[fr][orien];
}
/////////////////////////////////////////
Pouts4[orien]=Pouts3[orien];
}
//Pipeline Stage 5
par
{
A_2[orien] = abs(D_2[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_2s5[fr][orien]=Pin_2s4[fr][orien];
}
D_2s5[orien]=D_2[orien];
Pout_1s5[orien]=Pout_1s4[orien];
Pouts5[orien]=Pouts4[orien];
////////////////////////////////////////
}
//Pipeline Stage 6
par
{
par(cf=2; cf<NFRAMES;cf++)
{
Pin_3[cf][orien]=adjs(Pin_2s5[cf][orien],width(Pin_3)) - DOUBLE_PI*(adjs(sign(D_2s5[orien]),width(Pin_3))*2+1) * (signed)adju(A_2[orien],width(Pin_3));
}
//cur_frame=cur_frame+1;
Pout_1s6[orien]=Pout_1s5[orien];
Pouts6[orien]=Pouts5[orien];
}
//-------------------------------------------
//CURRENT FRAME == 3
//Pipeline Stage 7
par
{
//Writing Pout[2]
Pout_2s7[orien] = adjs((Pin_3[2][orien])\\1, width(Pout));
D_3[orien] = adjs(Pin_3[3][orien],width(D_3)) - adjs(Pin_3[2][orien],width(D_3));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_3s7[fr][orien]=Pin_3[fr][orien];
}
Pout_1s7[orien]=Pout_1s6[orien];
Pouts7[orien]=Pouts6[orien];
/////////////////////////////////////////
}
//Pipeline Stage 8
par
{
A_3[orien] = abs(D_3[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_3s8[fr][orien]=Pin_3s7[fr][orien];
}
D_3s8[orien]=D_3[orien];
Pout_2s8[orien]=Pout_2s7[orien];
Pout_1s8[orien]=Pout_1s7[orien];
Pouts8[orien]=Pouts7[orien];
////////////////////////////////////////
}
//Pipeline Stage 9
par
{
par(cf=3; cf<NFRAMES;cf++)
{
Pin_4[cf][orien]=adjs(Pin_3s8[cf][orien],width(Pin_4)) - DOUBLE_PI*(adjs(sign(D_3s8[orien]),width(Pin_4))*2+1) * (signed)adju(A_3[orien],width(Pin_4));
}
//cur_frame=cur_frame+1;
Pout_2s9[orien]=Pout_2s8[orien];
Pout_1s9[orien]=Pout_1s8[orien];
Pouts9[orien]=Pouts8[orien];
}
//-------------------------------------------
//CURRENT FRAME == 4
//Pipeline Stage 10
par
{
//Writing Pout[3]
Pout_3s10[orien] = adjs((Pin_4[3][orien])\\1, width(Pout));
D_4[orien] = adjs(Pin_4[4][orien],width(D_4)) - adjs(Pin_4[3][orien],width(D_4));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_4s10[fr][orien]=Pin_4[fr][orien];
}
Pout_2s10[orien]=Pout_2s9[orien];
Pout_1s10[orien]=Pout_1s9[orien];
Pouts10[orien]=Pouts9[orien];
/////////////////////////////////////////
}
//Pipeline Stage 11
par
{
A_4[orien] = abs(D_4[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_4s11[fr][orien]=Pin_4s10[fr][orien];
}
D_4s11[orien]=D_4[orien];
Pout_3s11[orien]=Pout_3s10[orien];
Pout_2s11[orien]=Pout_2s10[orien];
Pout_1s11[orien]=Pout_1s10[orien];
Pouts11[orien]=Pouts10[orien];
////////////////////////////////////////
}
//Pipeline Stage 12
//Writing Pout[0..4]
par
{
Pout[0][orien]=Pouts11[orien];
Pout[1][orien]=Pout_1s11[orien];
Pout[2][orien]=Pout_2s11[orien];
Pout[3][orien]=Pout_3s11[orien];
Pout[4][orien]=adjs((adjs(Pin_4s11[4][orien],width(Pin_4s11)) - DOUBLE_PI*(adjs(sign(D_4s11[orien]),width(Pin_4s11))*2+1)*(signed)adju(A_4[orien],width(Pin_4s11)))\\1, width(Pout));
}
}//par orient
}
//***************************************************
//Macro unwrap_3: version for 3 frames
//
//LATENCY = 6;
//
//bits format:
//IN: Pin [NORIENT][NFRAMES] --> sign-2-6
//
//OUT: Pout[NORIENT][NFRAMES] --> sign-4-5
//
//***************************************************
macro proc unwrap_3(Pin, Pout){
//***********************************************
//Constant definitions
//***********************************************
macro expr DOUBLE_PI = 402;
//macro expr PI = 201;
macro expr PSize = 10;
//***********************************************
//Declarations: Variables
//***********************************************
unsigned 3 fr;
unsigned 3 orien;
//static unsigned int 3 cur_frame= 1;
//unsigned int 3 cur_frame;
unsigned int 1 A[NORIENTATIONS];
int PSize D[NORIENTATIONS];
unsigned int 3 cf;
int (PSize+1) Pin_2[NFRAMES][NORIENTATIONS];
int (PSize+2) Pin_3[NFRAMES][NORIENTATIONS];
int (PSize+3) Pin_4[NFRAMES][NORIENTATIONS];
int (PSize+2) D_2[NORIENTATIONS];
int (PSize+3) D_3[NORIENTATIONS];
int (PSize+4) D_4[NORIENTATIONS];
unsigned int 1 A_2[NORIENTATIONS], A_3[NORIENTATIONS], A_4[NORIENTATIONS];
//Pipeline auxiliary variable declarations
int PSize Ds2[NORIENTATIONS];
int (PSize+2) D_2s5[NORIENTATIONS];
int (PSize-1) Pins1[NFRAMES][NORIENTATIONS], Pins2[NFRAMES][NORIENTATIONS];
int (PSize+1) Pin_2s4[NFRAMES][NORIENTATIONS], Pin_2s5[NFRAMES][NORIENTATIONS];
int PSize Pouts1[NORIENTATIONS], Pouts2[NORIENTATIONS], Pouts3[NORIENTATIONS], Pouts4[NORIENTATIONS], Pouts5[NORIENTATIONS];
int PSize Pout_1s4[NORIENTATIONS], Pout_1s5[NORIENTATIONS];
//***********************************************
//Body of the function
//***********************************************
par(orien=0;orien<NORIENTATIONS;orien++)
{
//-------------------------------------------
//CURRENT FRAME == 1
//Pipeline Stage 1
par
{
//Writing Pout[0]
Pouts1[orien] = adjs((Pin[0][orien])\\1, width(Pout));
D[orien] = adjs(Pin[1][orien],width(D)) - adjs(Pin[0][orien],width(D));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pins1[fr][orien]=Pin[fr][orien];
}
/////////////////////////////////////////
}
//Pipeline Stage 2
par
{
A[orien] = abs(D[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pins2[fr][orien]=Pins1[fr][orien];
}
Ds2[orien]=D[orien];
//Writing Pout[0]
Pouts2[orien] = Pouts1[orien];
////////////////////////////////////////
}
//Pipeline Stage 3
par
{
par(cf=1; cf<NFRAMES;cf++)
{
Pin_2[cf][orien]=adjs(Pins2[cf][orien],width(Pin_2)) - DOUBLE_PI*(adjs(sign(Ds2[orien]),width(Pin_2))*2+1) * (signed)adju(A[orien],width(Pin_2));
}
//cur_frame=cur_frame+1;
//Writing Pout[0]
Pouts3[orien] = Pouts2[orien];
}
//-------------------------------------------
//CURRENT FRAME == 2
//Pipeline Stage 4
par
{
//Writing Pout[1]
Pout_1s4[orien] = adjs((Pin_2[1][orien])\\1, width(Pout));
D_2[orien] = adjs(Pin_2[2][orien],width(D_2)) - adjs(Pin_2[1][orien],width(D_2));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_2s4[fr][orien]=Pin_2[fr][orien];
}
/////////////////////////////////////////
Pouts4[orien]=Pouts3[orien];
}
//Pipeline Stage 5
par
{
A_2[orien] = abs(D_2[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_2s5[fr][orien]=Pin_2s4[fr][orien];
}
D_2s5[orien]=D_2[orien];
Pout_1s5[orien]=Pout_1s4[orien];
Pouts5[orien]=Pouts4[orien];
////////////////////////////////////////
}
//Pipeline Stage 6
par
{
//Writing Pout[0..2]
Pout[0][orien]=Pouts5[orien];
Pout[1][orien]=Pout_1s5[orien];
Pout[2][orien]=adjs((adjs(Pin_2s5[2][orien],width(Pout)) - DOUBLE_PI*(adjs(sign(D_2s5[orien]),width(Pout))*2+1)*(signed)adju(A_2[orien],width(Pout)))\\1, width(Pout));
}
}//par orient
}
//*************************************************************************
// macro full_velocity
/*************************************************************************/
macro proc full_velocity(FVx,FVy,LE,thres,nc_min, enable, Ox, Oy)
{
macro expr Frac=4;
macro expr DIVLATENCY=DIVIDER_LATENCY; // added 1 for thresholding in invert function
macro expr THlat=DIVLATENCY+DIVLATENCY+4;
macro expr SUMlat=DIVLATENCY+2;
unsigned int 4 nc[THlat];
unsigned int 1 nc_par[NORIENTATIONS];
signed int FLOW_BITS Vx[NORIENTATIONS], Vy[NORIENTATIONS], auxYY[NORIENTATIONS],auxXX[NORIENTATIONS], auxXY[NORIENTATIONS];
signed int (FLOW_BITS) sumX[SUMlat],sumY[SUMlat], sumYYL_2, sumXXL_2, sumXYL_2;
signed int (DIVIDER_INPUT) NumX[DIVLATENCY], NumY[DIVLATENCY];
signed int DIVIDER_INPUT aux_den_0, aux_den_1, aux_NumX_0, aux_NumX_1,aux_NumY_0, aux_NumY_1;
unsigned int 1 cond[NORIENTATIONS];
signed int (DIVIDER_INPUT) Vxx[NORIENTATIONS][DIVLATENCY+1], Vyy[NORIENTATIONS][DIVLATENCY+1],Vxy[NORIENTATIONS][DIVLATENCY+1];
signed int DIVIDER_INPUT SumXX_YY[NORIENTATIONS];
signed int DIVIDER_INPUT L2[NORIENTATIONS], den;
signed FLOW_BITS quotX, quotY;
//assert (NORIENTATIONS==8, 0, "The code function only for 8 orientations");
//------------------------------------------------------------
// Verify bitwidth in operations
// improve division (ex. divider core)
//------------------------------------------------------------
par
{
/*/ Pipeline 0
par(o=0;o<NORIENTATIONS;o++)
{
cond[o]=(LE[o]<thres); // && (FVx[o]!=NAN) && (FVy[o]!=NAN);
auxFVx[o]=FVx[o];
auxFVy[o]=FVy[o];
} */
// Pipeline 1
par(o=0;o<NORIENTATIONS;o++)
{
//if(cond[o] && (FVxx[o]+FVyy[o])>EPS)
if(LE[o]<thres && (FVx[o]!=0 || FVy[o]!=0) )
par
{
//L2[o]= ((signed)one)/(FVxx[o] + FVyy[o]);
SumXX_YY[o]= (adjs(FVx[o],width(SumXX_YY))*adjs(FVx[o],width(SumXX_YY))) + (adjs(FVy[o],width(SumXX_YY))*adjs(FVy[o],width(SumXX_YY))); //FVxx[o] + FVyy[o];
Vx[o]=FVx[o];
Vy[o]=FVy[o];
Vxx[o][0]=(adjs(FVx[o],width(Vxx))*adjs(FVx[o],width(Vxx))); //\\Frac;
Vyy[o][0]=(adjs(FVy[o],width(Vyy))*adjs(FVy[o],width(Vyy))); //\\Frac;
Vxy[o][0]=(adjs(FVy[o],width(Vxy))*adjs(FVx[o],width(Vxy))); //\\Frac;
nc_par[o]=1;
}
else
par
{
nc_par[o]=0;
//L2[o]=0;
SumXX_YY[o]=0;
Vx[o]=0;
Vy[o]=0;
Vxx[o][0]=0;
Vyy[o][0]=0;
Vxy[o][0]=0;
}
}
par(v=1;v<(DIVLATENCY+1);v++)
{
par(o=0;o<NORIENTATIONS;o++)
{
Vxx[o][v]=Vxx[o][v-1];
Vyy[o][v]=Vyy[o][v-1];
Vxy[o][v]=Vxy[o][v-1];
}
}
//Pipeline 2
par{
// Pipeline 2
//nc=adju((L2[0]>0),4)+adju((L2[1]>0),4)+adju((L2[2]>0),4)+adju((L2[3]>0),4)+adju((L2[4]>0),4)+adju((L2[5]>0),4)+adju((L2[6]>0),4)+adju((L2[7]>0),4);
nc[0] = UnSumMacro(nc_par, 0, NORIENTATIONS-1, width(nc));
//sumX = Vx[0]+Vx[1]+Vx[2]+Vx[3]+Vx[4]+Vx[5]+Vx[6]+Vx[7];
sumX[0] = SumMacro(Vx, 0, NORIENTATIONS-1, width(sumX));
//sumY = Vy[0]+Vy[1]+Vy[2]+Vy[3]+Vy[4]+Vy[5]+Vy[6]+Vy[7];
sumY[0] = SumMacro(Vy, 0, NORIENTATIONS-1, width(sumY));
// Pipeline 2
par(o=0;o<NORIENTATIONS;o++)
{
invert(SumXX_YY[o], enable, L2[o]);
}
// Pipeline 6
par(O=0;O<NORIENTATIONS;O++)
{
//if(L2[O]!=0) // && (Vy[O]*Vx[O])!=0)
par
{
//divide12(Vyy[O], L2[O], auxYY[O]);
auxYY[O] = ((Vyy[O][DIVLATENCY]*adjs(L2[O],width(Vyy)))>>(DIVIDER_INPUT-5)) <-FLOW_BITS;
//divide12(Vxx[O], L2[O], auxXX[O]);
auxXX[O] = ((Vxx[O][DIVLATENCY]*adjs(L2[O],width(Vxx)))>>(DIVIDER_INPUT-5)) <- FLOW_BITS;
//divide12(Vxy[O], L2[O], auxXY[O]);
auxXY[O] = ((Vxy[O][DIVLATENCY]*adjs(L2[O],width(Vxy)))>>(DIVIDER_INPUT-5)) <- FLOW_BITS;
}
//else
//NanCond[O]=1;
}
}
// Pipeline 7
sumYYL_2 = SumMacro(auxYY, 0, NORIENTATIONS-1, width(sumYYL_2));
sumXXL_2 = SumMacro(auxXX, 0, NORIENTATIONS-1, width(sumXXL_2));
sumXYL_2 = SumMacro(auxXY, 0, NORIENTATIONS-1, width(sumXYL_2));
// Pipeline 8
aux_den_0 = (adjs(sumXYL_2,width(aux_den_0)+4)*adjs(sumXYL_2,width(aux_den_0)+4))\\Frac;
aux_den_1 = (adjs(sumXXL_2,width(aux_den_1)+4)*adjs(sumYYL_2,width(aux_den_1)+4))\\Frac;
aux_NumX_0 = (adjs(sumX[SUMlat-1],width(aux_NumX_0)+4)*adjs(sumYYL_2,width(aux_NumX_0)+4))\\Frac;
aux_NumX_1 = (adjs(sumY[SUMlat-1],width(aux_NumX_1)+4)*adjs(sumXYL_2,width(aux_NumX_1)+4))\\Frac;
aux_NumY_0 = (adjs(sumX[SUMlat-1],width(aux_NumY_0)+4)*adjs(sumXYL_2,width(aux_NumY_0)+4))\\Frac;
aux_NumY_1 = (adjs(sumY[SUMlat-1],width(aux_NumY_1)+4)*adjs(sumXXL_2,width(aux_NumY_1)+4))\\Frac;
// Pipeline 9
//den = ((signed)one)/(aux_den_0 - aux_den_1);
invert((aux_den_0 - aux_den_1), enable, den);
NumX[0] = -(aux_NumX_0 - aux_NumX_1);
NumY[0] = aux_NumY_0 - aux_NumY_1;
// Pipeline 10-14
//quotX=adjs((NumX<<4)/den, FLOW_BITS);
quotX=((NumX[DIVLATENCY-1]*adjs(den,width(NumX)))>>(DIVIDER_INPUT-5)) <- FLOW_BITS;
//quotY=adjs((NumY<<4)/den, FLOW_BITS);
quotY=((NumY[DIVLATENCY-1]*adjs(den,width(NumY)))>>(DIVIDER_INPUT-5)) <- FLOW_BITS;
//divide12(NumX, den, quotX);
//divide12(NumY, den, quotY);
// delay for threshold
par(i=1; i<THlat; i++)
{
nc[i]=nc[i-1];
}
par(n=1; n<DIVLATENCY; n++)
{
NumX[n]=NumX[n-1];
NumY[n]=NumY[n-1];
}
par(s=1; s<SUMlat; s++)
{
sumX[s]=sumX[s-1];
sumY[s]=sumY[s-1];
}
// Pipeline 15
if (nc[THlat-1]>=nc_min)
par
{
Ox= quotX; // den = (sumXYL_22-sumXXL_2*sumYYL_2)
//Ox= (-NumX>>(lmo(den<-(width(den)-1))))<-FLOW_BITS;
//Ox= (-NumX);
Oy= quotY;
//Oy= ( NumY>>(lmo(den<-(width(den)-1))))<-FLOW_BITS;
//Oy= (NumY);
}
else
par
{
Ox=SetNAN(Ox); //in matlab is NaN
Oy=SetNAN(Oy); //in matlab is NaN
}
}
}
//*************************************************************************
// macro full_velocity_small
/*************************************************************************/
macro proc full_velocity_small(FVx,FVy,LE,thres, Div_thr,nc_min, Ox, Oy)
{
macro expr Frac=8;
macro expr DIVLATENCY=DIVIDER_LATENCY+1; // added 1 for thresholding in invert function
macro expr THlat=DIVLATENCY+DIVLATENCY+4;
macro expr SUMlat=DIVLATENCY+2;
unsigned int 4 nc[THlat];
unsigned int 1 nc_par[NORIENTATIONS];
signed int FLOW_BITS Vx[NORIENTATIONS], Vy[NORIENTATIONS];
signed int DIVIDER_INPUT auxYY[NORIENTATIONS], auxXX[NORIENTATIONS], auxXY[NORIENTATIONS];
signed int (FLOW_BITS+3) sumX[SUMlat],sumY[SUMlat];
signed int (DIVIDER_INPUT) sumYYL_2, sumXXL_2, sumXYL_2;
signed int (DIVIDER_INPUT) NumX[DIVLATENCY], NumY[DIVLATENCY];
signed int (DIVIDER_INPUT) aux_den_0, aux_den_1, aux_NumX_0, aux_NumX_1,aux_NumY_0, aux_NumY_1;
unsigned int 1 cond[NORIENTATIONS];
signed int (DIVIDER_INPUT) Vxx[NORIENTATIONS][DIVLATENCY+1], Vyy[NORIENTATIONS][DIVLATENCY+1],Vxy[NORIENTATIONS][DIVLATENCY+1];
signed int DIVIDER_INPUT SumXX_YY[NORIENTATIONS];
signed int DIVIDER_INPUT L2[NORIENTATIONS], den;
signed FLOW_BITS quotX, quotY;
//assert (NORIENTATIONS==8, 0, "The code function only for 8 orientations");
//------------------------------------------------------------
// Verify bitwidth in operations
// improve division (ex. divider core)
//------------------------------------------------------------
par
{
/*/ Pipeline 0
par(o=0;o<NORIENTATIONS;o++)
{
cond[o]=(LE[o]<thres); // && (FVx[o]!=NAN) && (FVy[o]!=NAN);
auxFVx[o]=FVx[o];
auxFVy[o]=FVy[o];
} */
// Pipeline 1
par(o=0;o<NORIENTATIONS;o++)
{
//if(cond[o] && (FVxx[o]+FVyy[o])>EPS)
if(LE[o]<thres && ((FVx[o]!=0) || FVy[o]!=0) && (FVx[o]!=NAN) && (FVy[o]!=NAN)) //((FVx[o]*FVx[o])+(FVy[o]*FVy[o]))>EPS)//(FVx[o]!=0 || FVy[o]!=0) )
par
{
//L2[o]= ((signed)one)/(FVxx[o] + FVyy[o]);
SumXX_YY[o]= ( (adjs(FVx[o],width(SumXX_YY)+4)*adjs(FVx[o],width(SumXX_YY)+4)) + (adjs(FVy[o],width(SumXX_YY)+4)*adjs(FVy[o],width(SumXX_YY)+4)))\\4 ; //FVxx[o] + FVyy[o];
Vx[o]=FVx[o];
Vy[o]=FVy[o];
Vxx[o][0]=(adjs(FVx[o],width(Vxx)+4)*adjs(FVx[o],width(Vxx)+4))\\4;
Vyy[o][0]=(adjs(FVy[o],width(Vyy)+4)*adjs(FVy[o],width(Vyy)+4))\\4;
Vxy[o][0]=(adjs(FVy[o],width(Vxy)+4)*adjs(FVx[o],width(Vxy)+4))\\4;
nc_par[o]=1;
}
else
par
{
nc_par[o]=0;
//L2[o]=0;
SumXX_YY[o]=0;
Vx[o]=0;
Vy[o]=0;
Vxx[o][0]=0;
Vyy[o][0]=0;
Vxy[o][0]=0;
}
}
par(v=1;v<(DIVLATENCY+1);v++)
{
par(o=0;o<NORIENTATIONS;o++)
{
Vxx[o][v]=Vxx[o][v-1];
Vyy[o][v]=Vyy[o][v-1];
Vxy[o][v]=Vxy[o][v-1];
}
}
//Pipeline 2
par{
// Pipeline 2
//nc=adju((L2[0]>0),4)+adju((L2[1]>0),4)+adju((L2[2]>0),4)+adju((L2[3]>0),4)+adju((L2[4]>0),4)+adju((L2[5]>0),4)+adju((L2[6]>0),4)+adju((L2[7]>0),4);
nc[0] = UnSumMacro(nc_par, 0, NORIENTATIONS-1, width(nc));
//sumX = Vx[0]+Vx[1]+Vx[2]+Vx[3]+Vx[4]+Vx[5]+Vx[6]+Vx[7];
sumX[0] = SumMacro(Vx, 0, NORIENTATIONS-1, width(sumX));
//sumY = Vy[0]+Vy[1]+Vy[2]+Vy[3]+Vy[4]+Vy[5]+Vy[6]+Vy[7];
sumY[0] = SumMacro(Vy, 0, NORIENTATIONS-1, width(sumY));
// Pipeline 2
par(o=0;o<NORIENTATIONS;o++)
{
invert(SumXX_YY[o], Div_thr, L2[o]);
}
// Pipeline 6
par(O=0;O<NORIENTATIONS;O++)
{
//if(L2[O]!=0) // && (Vy[O]*Vx[O])!=0)
par
{
//auxYY[O] = ((adjs(Vyy[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-5)) <-DIVIDER_INPUT;
auxYY[O] = ((adjs(Vyy[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-9)) <-DIVIDER_INPUT;
//auxXX[O] = ((adjs(Vxx[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-5)) <- DIVIDER_INPUT;
auxXX[O] = ((adjs(Vxx[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-9)) <- DIVIDER_INPUT;
//auxXY[O] = ((adjs(Vxy[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-5)) <- DIVIDER_INPUT;
auxXY[O] = ((adjs(Vxy[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-9)) <- DIVIDER_INPUT;
}
//else
//NanCond[O]=1;
}
}
// Pipeline 7
sumYYL_2 = SumMacro(auxYY, 0, NORIENTATIONS-1, width(sumYYL_2));
sumXXL_2 = SumMacro(auxXX, 0, NORIENTATIONS-1, width(sumXXL_2));
sumXYL_2 = SumMacro(auxXY, 0, NORIENTATIONS-1, width(sumXYL_2));
// Pipeline 8
//aux_den_0 = (adjs(sumXYL_2,width(aux_den_0)+4)*adjs(sumXYL_2,width(aux_den_0)+4))\\4;
aux_den_0 = (adjs(sumXYL_2,width(aux_den_0)+Frac)*adjs(sumXYL_2,width(aux_den_0)+Frac))\\Frac;
//aux_den_1 = (adjs(sumXXL_2,width(aux_den_1)+4)*adjs(sumYYL_2,width(aux_den_1)+4))\\4;
aux_den_1 = (adjs(sumXXL_2,width(aux_den_1)+Frac)*adjs(sumYYL_2,width(aux_den_1)+Frac))\\Frac;
aux_NumX_0 = (adjs(sumX[SUMlat-1],width(aux_NumX_0)+4)*adjs(sumYYL_2,width(aux_NumX_0)+4))\\4;
aux_NumX_1 = (adjs(sumY[SUMlat-1],width(aux_NumX_1)+4)*adjs(sumXYL_2,width(aux_NumX_1)+4))\\4;
aux_NumY_0 = (adjs(sumX[SUMlat-1],width(aux_NumY_0)+4)*adjs(sumXYL_2,width(aux_NumY_0)+4))\\4;
aux_NumY_1 = (adjs(sumY[SUMlat-1],width(aux_NumY_1)+4)*adjs(sumXXL_2,width(aux_NumY_1)+4))\\4;
// Pipeline 9
//den = ((signed)one)/(aux_den_0 - aux_den_1);
invert((aux_den_0 - aux_den_1), Div_thr, den);
NumX[0] = -(aux_NumX_0 - aux_NumX_1);
NumY[0] = aux_NumY_0 - aux_NumY_1;
// Pipeline 10-14
//quotX=adjs((NumX<<4)/den, FLOW_BITS);
if(den!=0)
par
{
quotX=((adjs(NumX[DIVLATENCY-1],26)*adjs(den,26))>>(DIVIDER_INPUT-1)) <- FLOW_BITS;
quotY=((adjs(NumY[DIVLATENCY-1],26)*adjs(den,26))>>(DIVIDER_INPUT-1)) <- FLOW_BITS;
}
else
par
{
quotX=SetNAN(quotX);
quotY=SetNAN(quotY);
}
//quotY=adjs((NumY<<4)/den, FLOW_BITS);
//divide12(NumX, den, quotX);
//divide12(NumY, den, quotY);
// delay for threshold
par(i=1; i<THlat; i++)
{
nc[i]=nc[i-1];
}
par(n=1; n<DIVLATENCY; n++)
{
NumX[n]=NumX[n-1];
NumY[n]=NumY[n-1];
}
par(s=1; s<SUMlat; s++)
{
sumX[s]=sumX[s-1];
sumY[s]=sumY[s-1];
}
// Pipeline 15
if (nc[THlat-1]>=nc_min)
par
{
Ox= quotX; // den = (sumXYL_22-sumXXL_2*sumYYL_2)
//Ox= (-NumX>>(lmo(den<-(width(den)-1))))<-FLOW_BITS;
//Ox= (-NumX);
Oy= quotY;
//Oy= ( NumY>>(lmo(den<-(width(den)-1))))<-FLOW_BITS;
//Oy= (NumY);
}
else
par
{
Ox=SetNAN(Ox); //in matlab is NaN
Oy=SetNAN(Oy); //in matlab is NaN
}
}
}
/*
// Invert function : DIVLATENCY cycles
// -----------------------------
macro proc invert(Den, Div_thr, quot)
{
//signed int DIVIDER_INPUT Den_p0;
unsigned int (log2ceil(width(Den))) MSB_Den;
//static signed int 14 one = 0b01000000000000;
static signed int 18 one = 0b010000000000000000;
unsigned 1 cond[DIVIDER_LATENCY];
// Enable for Cores
static signal unsigned 1 enable;
interface divider_18 (signed DIVIDER_INPUT quot, signed DIVIDER_INPUT remd, unsigned 1 rfd) divider(signed DIVIDER_INPUT dividend = one,
signed DIVIDER_INPUT divisor = adjs(Den,DIVIDER_INPUT), unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
par
{
//Enabling atan2 Core: only for 1 clock cycle
enable=1;
cond[0]=(abs(Den)<((signed)(0@Div_thr)));
par(i=1;i<DIVIDER_LATENCY;i++)
{
cond[i]=cond[i-1];
}
if(cond[DIVIDER_LATENCY-1]==1)
quot = 0;
else
quot = divider.quot;
}
}
*/
// Divide function
// -----------------------------
macro proc divide12(Num, Den, quot)
{
signed int (width(Num)) Num_p0, Num_p1, Den_p0, Den_p1;
signed int (width(quot)) Num_p2, Den_p2[DIVIDER_LATENCY];
unsigned int (log2ceil(width(Num))) shift, MSB_Num, MSB_Den;
unsigned int 1 NotValid, NotValid_2[DIVIDER_LATENCY];
interface divider_12 (signed FLOW_BITS quot, signed FLOW_BITS remd, unsigned 1 rfd) divider(signed FLOW_BITS dividend = Num_p2,
signed FLOW_BITS divisor = Den_p2[0], unsigned 1 clk=__clock, unsigned 1 ce=1) with {busformat="B<I>"};
//*/
par
{
// Pipeline 0
if(Num>0)
MSB_Num = lmo(Num);
else
if(Num==0)
MSB_Num = 0;
else
MSB_Num = lmo(-Num);
if(Den>0)
MSB_Den = lmo(Den);
else
if(Den==0)
MSB_Den = 0;
else
MSB_Den = lmo(-Den);
Den_p0 = Den;
Num_p0 = Num;
// Pipeline 1
Num_p1 = Num_p0;
Den_p1 = Den_p0;
if( (abs(MSB_Num-MSB_Den))>=(width(quot)-2) )
NotValid=1;
else
NotValid=0;
if(MSB_Num > MSB_Den)
shift = width(Num)-MSB_Num-2 ;
else
shift = width(Num)-MSB_Den-2 ;
// Pipeline 2
NotValid_2[0] = NotValid;
//my_dividend = (Num_p1<<shift)\\(width(Num)-width(quot));
Num_p2 = (Num_p1<<shift)\\(width(Num)-width(quot));
//my_divisor = (Den_p1<<shift)\\(width(Num)-width(quot));
Den_p2[0] = (Den_p1<<shift)\\(width(Num)-width(quot));
// delay for thresholds
par(i=1;i<DIVIDER_LATENCY;i++)
{
NotValid_2[i] = NotValid_2[i-1];
Den_p2[i] = Den_p2[i-1];
}
// Pipeline 3
if(NotValid_2[DIVIDER_LATENCY-1]==0 && Den_p2[DIVIDER_LATENCY-1]!=0)
par
{
quot = divider.quot;
//quot = (Num_p2)/(Den_p2);
}
else
quot = SetNAN(quot);
}
}
//--------------------------------------------
// ¡¡¡¡¡¡¡¡¡¡¡ Pay attention !!!!!!!!!!!!!!
// Sign on output is not changed
//-----------------------------------------------
macro proc new_full_velocity(FV, LE,thres,nc_min, Ox, Oy)
{
macro expr Frac=4;
macro expr DIVLATENCY=DIVIDER_LATENCY; // added 1 for thresholding in invert function
macro expr THlat=DIVLATENCY+3;
//macro expr SUMlat=DIVLATENCY+2;
unsigned int 4 nc[THlat];
unsigned int 1 bad_div[DIVLATENCY];
unsigned int 1 nc_par[NORIENTATIONS];
//unsigned int 1 cond[NORIENTATIONS];
signed int 9 auxYY[NORIENTATIONS], auxXX[NORIENTATIONS], auxXY[NORIENTATIONS];
signed int (FLOW_BITS+4) Vx[NORIENTATIONS], Vy[NORIENTATIONS];
signed int (FLOW_BITS+7) sumX,sumY;
signed int (DIVIDER_INPUT) sumYYL_2, sumXXL_2, sumXYL_2;
signed int (DIVIDER_INPUT) aux_den_0, aux_den_1, aux_NumX_0, aux_NumX_1,aux_NumY_0, aux_NumY_1;
//signed int (DIVIDER_INPUT) NumX[DIVLATENCY], NumY[DIVLATENCY];
signed int (DIVIDER_INPUT) NumX, NumY;
signed int (DIVIDER_INPUT) den, den_1;
//signed int (DIVIDER_INPUT) diff[DIVLATENCY];
// signed int (DIVIDER_INPUT) diff;
signed int (DIVIDER_INPUT) quotX, quotY;
const int 9 WREAL_SUMX[NORIENTATIONS] = {-81, -75, -58, -31, 0, 31, 58, 75}; // 2^7 * {-F0 * cos(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 9 WIMAG_SUMY[NORIENTATIONS] = {0, -31, -58, -75, -81, -75, -58, -31}; // 2^7 * {-F0 * sin(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 9 SUMXY_L2[NORIENTATIONS] = {0, 45, 64, 45, 0, -45, -64, -45}; // FRAC: 7 bits
const int 9 SUMXX_L2[NORIENTATIONS] = {128, 109, 64, 19, 0, 19, 64, 109}; // FRAC: 7 bits
const int 9 SUMYY_L2[NORIENTATIONS] = {0, 19, 64, 109, 128, 109, 64, 19}; // FRAC: 7 bits
//assert (NORIENTATIONS==8, 0, "The code function only for 8 orientations");
//------------------------------------------------------------
// Verify bitwidth in operations
// improve division (ex. divider core)
//------------------------------------------------------------
par
{
// Pipeline 1
par(o=0;o<NORIENTATIONS;o++)
{
if(LE[o]<thres && FV[o]!=0 && FV[o]!=NAN)
par
{
Vx[o]=(adjs(FV[o], width(Vx)+4)*adjs(WREAL_SUMX[o],width(Vx)+4))\\4; //s-7-8
Vy[o]=(adjs(FV[o], width(Vy)+4)*adjs(WIMAG_SUMY[o],width(Vy)+4))\\4; //s-7-8
auxXX[o]=adjs(SUMXX_L2[o],width(auxXX)); //s-0-7 bits
auxXY[o]=adjs(SUMXY_L2[o],width(auxXY)); //s-0-7 bits
auxYY[o]=adjs(SUMYY_L2[o],width(auxYY)); //s-0-7 bits
nc_par[o]=1;
}
else
par
{
Vx[o]=0;
Vy[o]=0;
auxXX[o]=0;
auxXY[o]=0;
auxYY[o]=0;
nc_par[o]=0;
}
}
//Pipeline 2
par
{
nc[0] = UnSumMacro(nc_par, 0, NORIENTATIONS-1, width(nc));
sumX = SumMacro(Vx, 0, NORIENTATIONS-1, width(sumX)); //s-10-8
sumY = SumMacro(Vy, 0, NORIENTATIONS-1, width(sumY)); //s-10-8
sumYYL_2 = SumMacro(auxYY, 0, NORIENTATIONS-1, width(sumYYL_2)); //adjs(s-3-7,DIVIDERINPUT)
sumXXL_2 = SumMacro(auxXX, 0, NORIENTATIONS-1, width(sumXXL_2)); //adjs(s-3-7,DIVIDERINPUT)
sumXYL_2 = SumMacro(auxXY, 0, NORIENTATIONS-1, width(sumXYL_2)); //adjs(s-3-7,DIVIDERINPUT)
}
// Pipeline 3
par
{
//aux_den_0 = (adjs(sumXYL_2,width(aux_den_0)+6)*adjs(sumXYL_2,width(aux_den_0)+6))\\6; //s-9-8
//aux_den_1 = (adjs(sumXXL_2,width(aux_den_1)+6)*adjs(sumYYL_2,width(aux_den_1)+6))\\6; //s-9-8
aux_den_0 = (adjs(sumXYL_2,width(aux_den_0)+8)*adjs(sumXYL_2,width(aux_den_0)+8))\\8; //s-11-6
aux_den_1 = (adjs(sumXXL_2,width(aux_den_1)+8)*adjs(sumYYL_2,width(aux_den_1)+8))\\8; //s-11-6
//aux_NumX_0 = ((adjs(sumX,width(aux_NumX_0)+7+4)*adjs(sumYYL_2,width(aux_NumX_0)+7+4))\\7)<-DIVIDER_INPUT; //s-9-8
//aux_NumX_1 = ((adjs(sumY,width(aux_NumX_1)+7+4)*adjs(sumXYL_2,width(aux_NumX_1)+7+4))\\7)<-DIVIDER_INPUT; //s-9-8
//aux_NumY_0 = ((adjs(sumX,width(aux_NumY_0)+7+4)*adjs(sumXYL_2,width(aux_NumY_0)+7+4))\\7)<-DIVIDER_INPUT; //s-9-8
//aux_NumY_1 = ((adjs(sumY,width(aux_NumY_1)+7+4)*adjs(sumXXL_2,width(aux_NumY_1)+7+4))\\7)<-DIVIDER_INPUT; //s-9-8
aux_NumX_0 = ((adjs(sumX,width(aux_NumX_0)+7+4)*adjs(sumYYL_2,width(aux_NumX_0)+7+4))\\3)<-DIVIDER_INPUT; //s-5-12
aux_NumX_1 = ((adjs(sumY,width(aux_NumX_1)+7+4)*adjs(sumXYL_2,width(aux_NumX_1)+7+4))\\3)<-DIVIDER_INPUT; //s-5-12
aux_NumY_0 = ((adjs(sumX,width(aux_NumY_0)+7+4)*adjs(sumXYL_2,width(aux_NumY_0)+7+4))\\3)<-DIVIDER_INPUT; //s-5-12
aux_NumY_1 = ((adjs(sumY,width(aux_NumY_1)+7+4)*adjs(sumXXL_2,width(aux_NumY_1)+7+4))\\3)<-DIVIDER_INPUT; //s-5-12
}
// Pipeline 4
par
{
//invert((aux_den_0 - aux_den_1), den);
NumX = -(aux_NumX_0 - aux_NumX_1); //s-9-8
NumY = (aux_NumY_0 - aux_NumY_1);
den = aux_den_0 - aux_den_1;
}
// Pipeline 5+divlatency
invert(NumX, den, quotX);
invert(NumY, den, quotY);
if(den!=0)
par
{
bad_div[0]=0;
}
else
par
{
bad_div[0]=1;
}
// delays for threshold and div
par(i=1; i<THlat; i++)
{
nc[i]=nc[i-1];
}
par(d=1; d<DIVLATENCY; d++)
{
bad_div[d]=bad_div[d-1];
}
// Pipeline 6 + divlatency
if (nc[THlat-1]>=nc_min && bad_div[DIVLATENCY-1]==0)
par
{
Ox= (quotX <- FLOW_BITS)>>2;
Oy= (quotY <- FLOW_BITS)>>2;
}
else
par
{
Ox=SetNAN(Ox); //in matlab is NaN
Oy=SetNAN(Oy); //in matlab is NaN
}
}
}
// Invert function : DIVLATENCY cycles
// -----------------------------
macro proc invert(Num, Den, quot)
{
//signed int DIVIDER_INPUT Den_p0;
//unsigned int (log2ceil(width(Den))) MSB_Den;
//static signed int 14 one = 0b01000000000000;
//static signed int 18 one = 0b010000000000000000;
//unsigned 1 cond;//[DIVIDER_LATENCY];
// Enable for Cores
static signal unsigned 1 enable;
interface divider_18 (signed DIVIDER_INPUT quot, signed DIVIDER_INPUT remd, unsigned 1 rfd) divider(signed DIVIDER_INPUT dividend = Num,
signed DIVIDER_INPUT divisor = adjs(Den,DIVIDER_INPUT), unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
par
{
//Enabling atan2 Core: only for 1 clock cycle
enable=1;
//cond[0]=(abs(Den)<((signed)(0@Div_thr)));
//cond=(abs(Den)<((signed)(0@Div_thr)));
/*par(i=1;i<DIVIDER_LATENCY;i++)
{
cond[i]=cond[i-1];
}
if(cond[DIVIDER_LATENCY-1]==1)*/
//if(cond==1)
//if(Den<12 && Den>-12)
// quot = 0;
//else
quot = divider.quot;
//quot = ((signed)one) / adjs(Den,width(quot));
}
}
//***************************************************
//Macro component_velocity
//
//LATENCY = 4;
//
//
//bits format:
//IN: P[NFRAMES][NORIENT] --> sign-4-5
//
//OUT: FV[NFRAMES] --> sign-14-5
// LE[NFRAMES] --> sign-28-5
//
//***************************************************/
macro proc new_component_velocity(P, FV, LE){
//***********************************************/
//Constant definitions for 3 frames
//***********************************************
macro expr PSize = 10;
macro expr SXX = 14;
macro expr SX = 6;
macro expr DEN = 6;
const int 3 XX[NFRAMES] = {1, 2, 3}; //XX3 is XX in the third dimension
const int 8 WREAL[NORIENTATIONS] = {-81, -75, -58, -31, 0, 31, 58, 75}; // 25 * {-F0 * cos(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 8 WIMAG[NORIENTATIONS] = {0, -31, -58, -75, -81, -75, -58, -31}; // 25 * {-F0 * sin(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
//***********************************************
//Declarations: Variables
//***********************************************
int (PSize+2) Sxy[NORIENTATIONS];
int (PSize+1) Sxy_0[NORIENTATIONS][NFRAMES];
int (PSize+1) Sy[NORIENTATIONS];
int PSize Sy_0[NORIENTATIONS][NFRAMES];
int (PSize+3) a[NORIENTATIONS];
int (PSize+5) a_0[NORIENTATIONS];
int (PSize+5) a_1[NORIENTATIONS];
int (PSize+12) a_2[NORIENTATIONS];
int (PSize+2) b[NORIENTATIONS];
int (PSize+4) b_0[NORIENTATIONS];
int (PSize+4) b_1[NORIENTATIONS];
int (PSize+11) b_2[NORIENTATIONS];
int (PSize+3) a3_0[NORIENTATIONS];
int (PSize+2) bs3[NORIENTATIONS];
int (PSize+2) bs3_1[NORIENTATIONS];
int (PSize+2) bs4_0[NORIENTATIONS];
int (PSize+2) bs4_1[NORIENTATIONS];
int (PSize+3) Reg[NFRAMES][NORIENTATIONS];
int (PSize+3) Reg_0[NFRAMES][NORIENTATIONS];
//Pipeline auxiliary variables
int PSize Ps0[NFRAMES][NORIENTATIONS];
int PSize Ps1[NFRAMES][NORIENTATIONS];
int PSize Ps2[NFRAMES][NORIENTATIONS];
int PSize Ps2_1[NFRAMES][NORIENTATIONS];
int PSize Ps2_2[NFRAMES][NORIENTATIONS];
int PSize Ps3[NFRAMES][NORIENTATIONS];
int PSize Ps3_1[NFRAMES][NORIENTATIONS];
int (PSize+3) LE_0[NORIENTATIONS][NFRAMES];
int (2*PSize) LE_1[NORIENTATIONS][NFRAMES];
//***********************************************
//Body of the function
//***********************************************
par(orien=0;orien<NORIENTATIONS;orien++)
{
//Pipeline Stage 0
par(f=0;f<NFRAMES;f++)
{
Sxy_0[orien][f] = adjs(P[f][orien],width(Sxy_0))*adjs(XX[f],width(Sxy_0));
Sy_0[orien][f] = P[f][orien];
//Copying P for the next stage
Ps0[f][orien]=P[f][orien];
}
//Pipeline Stage 1
par
{
//Sxy[orien] = (adjs(P[0][orien],width(Sxy))*adjs(XX[0],width(Sxy)) + adjs(P[1][orien],width(Sxy))*adjs(XX[1],width(Sxy)) + adjs(P[2][orien],width(Sxy))*adjs(XX[2],width(Sxy)) + adjs(P[3][orien],width(Sxy))*adjs(XX[3],width(Sxy)) + adjs(P[4][orien],width(Sxy))*adjs(XX[4],width(Sxy)));
Sxy[orien] = SumMacro(Sxy_0[orien], 0, NFRAMES-1,width(Sxy));
//Sy[orien] = adjs(P[0][orien],width(Sy)) + adjs(P[1][orien],width(Sy)) + adjs(P[2][orien],width(Sy)) + adjs(P[3][orien],width(Sy)) + adjs(P[4][orien],width(Sy));
Sy[orien] = SumMacro(Sy_0[orien], 0, NFRAMES-1,width(Sy));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps1[f][orien]=Ps0[f][orien];
}
}
//Pipeline Stage 2_0
par
{
a_0[orien] = SXX*adjs(Sy[orien],width(a_0));
b_0[orien] = NFRAMES*adjs(Sxy[orien],width(b_0));
a_1[orien] = SX*adjs(Sxy[orien],width(a_1));
b_1[orien] = SX*adjs(Sy[orien],width(b_1));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2[f][orien]=Ps1[f][orien];
}
}
//Pipeline Stage 2_1
par
{
//a_2[orien] = (a_1[orien]-a_2[orien])*5; // 5 frames
//b_2[orien] = (b_1[orien]-b_2[orien])*5; // multiplied by 5 for following /50 division that become <<8 : 5/256 ~= 1/50
a_2[orien] = (adjs(a_0[orien],width(a_2))-adjs(a_1[orien],width(a_2))); // 3 frames
b_2[orien] = (adjs(b_0[orien],width(b_2))-adjs(b_1[orien],width(b_2)));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2_1[f][orien]=Ps2[f][orien];
}
}
//Pipeline Stage 2_2
par
{
//Using 5 decimals for a and b (*25)
//a[orien] = (adjs(SXX,PSize+15)*32*adjs(Sy[orien],PSize+15) - adjs(SX,PSize+15)*32*adjs(Sxy[orien],PSize+15))/adjs(DEN,PSize+15);
//a[orien] = a_2[orien]<<8; //for 5 frames
//a[orien] = a_2[orien]<<3; //for 3 frames
//a[orien] = adjs(((a_2[orien])*21)>>7,width(a)); //for 3 frames
a[orien] = adjs((a_2[orien]*43)>>8,width(a)); //for 3 frames
//b[orien] = adjs((NFRAMES*32*adjs(Sxy[orien],PSize+13) - adjs(SX,PSize+13)*32*adjs(Sy[orien],PSize+13))/adjs(DEN,PSize+13), width(b));
//b[orien] = adjs(b_2[orien]<<8,width(b)); // for 5 frames
//b[orien] = adjs(((b_2[orien])*21)>>7,width(b)); // for 3 frames
b[orien] = adjs((b_2[orien]*43)>>8,width(b)); // for 3 frames
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2_2[f][orien]=Ps2_1[f][orien];
}
}
//Pipeline Stage 3_0
par
{
par(f=0;f<NFRAMES;f++)
{
Reg_0[f][orien] = adjs(b[orien],width(Reg_0))*adjs(XX[f],width(Reg_0));
//Copying P for the next stage
Ps3[f][orien]=Ps2_2[f][orien];
}
//Copying b for the next stage
bs3[orien]=b[orien];
a3_0[orien] = a[orien];
}
//Pipeline Stage 3_1
par
{
par(f=0;f<NFRAMES;f++)
{
//Reg[fr][orien] = adjs(a[orien],width(Reg))+ adjs(b[orien],width(Reg))*adjs(XX[fr],width(Reg));
Reg[f][orien] = adjs(a3_0[orien],width(Reg))+ adjs(Reg_0[f][orien],width(Reg));
//Copying P for the next stage
Ps3_1[f][orien]=Ps3[f][orien];
}
//Copying b for the next stage
bs3_1[orien]=bs3[orien];
}
//Pipeline Stage 4_0
par
{
par(f=0;f<NFRAMES;f++)
{
LE_0[orien][f] = adjs(Reg[f][orien],width(LE_0)) - adjs(Ps3_1[f][orien], width(LE_0));
}
//Copying b for the next stage
bs4_0[orien]=bs3_1[orien];
}
//Pipeline Stage 4_1
par
{
par(f=0;f<NFRAMES;f++)
{
LE_1[orien][f] = adjs(LE_0[orien][f],width(LE_1))*adjs(LE_0[orien][f],width(LE_1));
}
//Copying b for the next stage
bs4_1[orien]=bs4_0[orien];
}
//Pipeline Stage 4_2
par
{
//LE[orien] = adjs(((((adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien], 2*PSize+26)*32)*(adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien],2*PSize+26)*32) + (adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32)*(adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32) + (adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32)*(adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32) + (adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32)*(adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32) + (adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32)*(adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32))/NFRAMES)\\15), width(LE));
//LE[orien] = adjs(((SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize)*21)>>6)\\2,width(LE));
LE[orien] = adjs(((SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize+2)*85)>>8)\\2,width(LE));
//LE[orien]=adjs(((LE_1[0][orien]+LE_1[1][orien]+LE_1[2][orien])>>2)\\15,width(LE));
//LE[orien] = adjs(SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize+26)\\15, width(LE));
//Simplifying the equation: FVreal = - (F0*cos(ang)/2*PI)*b[orien] --> FVreal = Wreal[orien]*b[orien] //Wreal is initialised with factor 25
// FVimag = - (F0*sin(ang)/2*PI)*b[orien] --> FVimag = Wimag[orien]*b[orien] //Wreal is initialised with factor 25
//FVreal[orien]= (adjs(bs4_1[orien],PSize+8)*adjs(WREAL[orien],PSize+8))\\6; //final size of FVreal is PSize+18
//FVimag[orien]= (adjs(bs4_1[orien],PSize+8)*adjs(WIMAG[orien],PSize+8))\\6; //final size of FVimag is PSize+18
//FVreal[orien]= ((adjs(bs4_1[orien],PSize+8)*adjs(WREAL[orien],PSize+8))\\2)<-width(FVreal); //final size of FVreal is PSize+18
//FVimag[orien]= ((adjs(bs4_1[orien],PSize+8)*adjs(WIMAG[orien],PSize+8))\\2)<-width(FVimag); //final size of FVimag is PSize+18
//FVreal[orien]= ((adjs(bs4_1[orien],PSize+10)*adjs(WREAL[orien],PSize+10))\\4)<-width(FVreal); //final size of FVreal is PSize+18
//FVimag[orien]= ((adjs(bs4_1[orien],PSize+10)*adjs(WIMAG[orien],PSize+10))\\4)<-width(FVimag); //final size of FVimag is PSize+18
FV[orien]=adjs(bs4_1[orien], width(FV)); //Only 5 bits for Frac !!!!
}
}
}
attention/attention_v0.1/opticflow.hch
0 → 100644
View file @
059e050a
#ifndef __OPTICFLOW__
#define __OPTICFLOW__
#include "GaborPrimitives.hch"
#define NFRAMES 3
#define NORIENTATIONS 8
#define ATAN2WIDTH 10 //19 //24
#define ATAN2OUTWIDTH 10 //9 //19 //24
#define ATAN2LATENCY (ATAN2OUTWIDTH+4)
#define ATAN2NAME atan2_10bit //atan2_19bit
#define DIVIDER_NAME divider_21
#define DIVIDER_NAME_2 divider_27
#define DIVIDER_INPUT 18
#define DIVIDER_LATENCY DIVIDER_INPUT+4 // is +4 if divider has clks/div==1
#define FLOW_BITS 12
#define THRESHOLD 16
#define NC_MIN 4
#define EPS 0
#define NAN 0b100000000000
// CORES
macro proc CoreATAN2CORDIC_fl(y, x, enable, angle);
macro proc CoreDIVIDER(my_dividend, my_divisor, result, enable);
macro proc CoreDIVIDER_2(my_dividend, my_divisor, result, enable);
//***************************************************
//Macro component_velocity
//***************************************************
macro proc component_velocity(P, FVreal, FVimag, LE);
macro proc component_velocity_mia(P, FVreal, FVimag, LE);
macro proc new_component_velocity(P, FV, LE);
//***************************************************
//Macro compute_phase
//***************************************************
macro proc compute_phase(Greal, Gimag, P);
macro proc compute_single_phase(Greal, Gimag, P);
//***************************************************
//Macro unwrap
//***************************************************
macro proc unwrap(Pin, Pout);
macro proc unwrap_3(Pin, Pout);
//***************************************************
//Macro full_velocity
//***************************************************
macro proc full_velocity(FVx,FVy,LE,thres,nc_min, enable, Ox, Oy);
macro proc full_velocity_small(FVx,FVy,LE,thres, Div_thr, nc_min, Ox, Oy);
macro proc new_full_velocity(FV, LE,thres, nc_min, Ox, Oy);
macro proc divide12(Num, Den, quot);
macro proc invert(Num, Den, quot);
//***************************************************
//Resource sharing functions
//***************************************************
macro proc compute_phase_top(Greal,Gimag, P, index);
void function_compute_phase(signed int F_BITS (*Greal),signed int F_BITS (*Gimag), signed int 9 *P);
macro proc compute_phase_index(Greal, Gimag, P);
#endif
\ No newline at end of file
attention/attention_v0.1/parameters.hch
0 → 100644
View file @
059e050a
/* parameters.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#ifndef __PARAMETERS__
#define __PARAMETERS__
// Number of cameras (1 for single camera, 2 for stereo system)
//#define NCAMERAS 2
// Max image resolution
#define MAX_RES_X 1024
#define MAX_RES_Y 1024
#define MAX_IMSIZE (MAX_RES_X*MAX_RES_Y)
//Number of frames we are using
#define NFRAMES 3
#endif
\ No newline at end of file
optical_flow/gradient_based_method/flow_v0.1/cores.hcc
View file @
059e050a
...
...
@@ -20,7 +20,7 @@
% ImSize - Size of the input images
%
% DESCRIPTION
% Interface for a top architecture to interface with the
disparity
estimation core
% Interface for a top architecture to interface with the
optic flow
estimation core
% RETURN
%
*/
...
...
optical_flow/gradient_based_method/flow_v0.1/cores.hch
View file @
059e050a
...
...
@@ -8,7 +8,7 @@
#include "stdlib.hch"
#include "channels.hch"
//#include "xircav4_lib.hch" Platform-dependent
//#include "xircav4_lib.hch"
//
Platform-dependent
#define CORE 1 // 0 for sub-circuit test, 1 for core calls
...
...
optical_flow/gradient_based_method/flow_v0.1/lklib.hcc
View file @
059e050a
/* lklib.hcc
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#include "lklib.hch"
#include "cores.hch"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment