Commit 059e050a authored by Francisco Barranco's avatar Francisco Barranco

Uploading attention core v0.1

parent e86681b9
//********************************************************************
//
// Programmed by Javier Díaz, DRIVSCO project
// Granada, October 2009, version 1.0
//
// Note: Francisco Barranco added some changes to this file. The functions
// added have been documented. The rest has been used as they are.
//********************************************************************
#include "GaborPrimitives.hch"
/*
// Gabor filters kernels coefficients for the 14 bits (normalization to 2^14
// **************************************************************************
macro expr NORMY= 16384; //exp2(KERN_BITS);
macro expr NORMX= NORMY/2; // one bit more precision
// FILTERS AND NORM COMPUTED BY KERNEL COEFICIENTS SUM = 1 --> this allow division by power of 2
macro expr Gab1={ 243 , 601 , 1214 , 1989 , 2671 , 2948 }; // sym
macro expr Gab2={ -12 , 590 , -23 , -2012 , -23 , 2960 }; // sym
macro expr Gab3={ -254 , 0 , 1214 , 0 , -2682 , 0 }; // antisym
macro expr Gab4={ 127 , -220 , -1237 , -1260 , 1133 , 2914 }; // sym
macro expr Gab5={ 162 , 590 , 231 , -1584 , -2393 , 0 }; // antisym
macro expr Gab6={ 127 , 520 , -439 , -1966 , 289 , 2938 }; // sym
macro expr Gab7={ -208 , 289 , 1145 , -474 , -2659 , 0 }; // antisym
macro expr Gab8={ -254 , -451 , -289 , 717 , 2197 , 2948 }; // sym
macro expr Gab9={ -23 , -405 , -1179 , -1862 , -1526 , 0 }; // antisym */
// Gabor filters kernels coefficients for the 12 bits (normalization to 2^12)
// **************************************************************************
macro expr NORMY= 4096/2; //exp2(KERN_BITS); // one bit more precision
macro expr NORMX= 4096/2;
macro expr Gab1={ 61, 150, 304, 497, 668, 736}; // changed 737 for the norm
macro expr Gab2={ -6, 147, -6, -502, -3, 740}; // changed -503 for the DC component
macro expr Gab3={ -64, 0, 304, 0, -671, 0}; // antisym
macro expr Gab4={ 32, -55, -309, -315, 283, 728}; // sym
macro expr Gab5={ 40, 147, 58, -396, -598, 0}; // antisym
macro expr Gab6={ 32, 130, -110, -491, 72, 734}; // sym
macro expr Gab7={ -52, 72, 286, -119, -665, 0}; // antisym
macro expr Gab8={ -64, -113, -72, 179, 549, 737}; // sym
macro expr Gab9={ -6, -101, -295, -465, -382, 0}; // antisym
// Recursive vector addition with ballanced tree
// ************************************************************************************
macro expr SumMacro(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom,Extend) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom, adjs(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1,Extend) + RecurseAddAux(Array, Middle, Bottom,Extend));
in
RecurseAddAux(Array, Index, begin,Extend);
// Recursive vector addition with ballanced tree for unsigned
//************************************************************************************
macro expr UnSumMacro2(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom,Extend) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom, adju(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1,Extend) + RecurseAddAux(Array, Middle, Bottom,Extend));
in
RecurseAddAux(Array, Index, begin,Extend);
// Generic convolution kernel multiplication
// *******************************************************************************/
macro proc GenKernel_Gabor(buffer,Out,mask,norm, Symmetry)
{
macro expr Retiming=7; // 8 no / 9 no
macro expr PipeLatency=3+Retiming-1;
macro expr DataWidth=(width(buffer[0])+KERN_BITS); // Norm needs KERN_BITS + 1 (sign) + 8 (from 256 gray levels)
const signed KERN_BITS kernel[6]=mask;
signed DataWidth Register[6];
signed (DataWidth) aux0;
signed (width(Out)) aux[Retiming];
par
{
ifselect (Symmetry==1)
{
par
{
xilinxmult(Register[0], (adjs(buffer[0],(width(buffer[0])+1)) + adjs(buffer[10],(width(buffer[0])+1))), kernel[0]);
xilinxmult(Register[1], (adjs(buffer[1],(width(buffer[0])+1)) + adjs(buffer[9],(width(buffer[0])+1))), kernel[1]);
xilinxmult(Register[2], (adjs(buffer[2],(width(buffer[0])+1)) + adjs(buffer[8],(width(buffer[0])+1))), kernel[2]);
xilinxmult(Register[3], (adjs(buffer[3],(width(buffer[0])+1)) + adjs(buffer[7],(width(buffer[0])+1))), kernel[3]);
xilinxmult(Register[4], (adjs(buffer[4],(width(buffer[0])+1)) + adjs(buffer[6],(width(buffer[0])+1))), kernel[4]);
xilinxmult(Register[5], (adjs(buffer[5],(width(buffer[0])+1)) + 0 ), kernel[5]);
}
}
else // antisymmetric kernel
{
par
{
xilinxmult(Register[0], (adjs(buffer[10],(width(buffer[0])+1)) - adjs(buffer[0],(width(buffer[0])+1))), kernel[0]);
xilinxmult(Register[1], (adjs(buffer[9],(width(buffer[0])+1)) - adjs(buffer[1],(width(buffer[0])+1))), kernel[1]);
xilinxmult(Register[2], (adjs(buffer[8],(width(buffer[0])+1)) - adjs(buffer[2],(width(buffer[0])+1))), kernel[2]);
xilinxmult(Register[3], (adjs(buffer[7],(width(buffer[0])+1)) - adjs(buffer[3],(width(buffer[0])+1))), kernel[3]);
xilinxmult(Register[4], (adjs(buffer[6],(width(buffer[0])+1)) - adjs(buffer[4],(width(buffer[0])+1))), kernel[4]);
Register[5]= 0;
}
} // end symemtry
//aux0= adjs(Register[0],width(aux0)) + adjs(Register[1],width(aux0)) + adjs(Register[2],width(aux0))+ adjs(Register[3],width(aux0))+ adjs(Register[4],width(aux0))+ adjs(Register[5],width(aux0));
aux0= SumMacro(Register, 0, 5, width(aux0));
// CAREFULL, NORM/2 NEVER SHOULD OVERFLOW DATA!!!
/* if (aux0>=0)
aux[0]= ((aux0+norm/2)/norm)<-(width(Out)); //
else
aux[0]= ((aux0-norm/2)/norm)<-(width(Out)); // */
aux[0]= ((aux0)/norm)<-(width(Out)); // */
//aux[0]= (aux0>>11)<-(width(Out));
par(i=1;i<(Retiming);i++)
{
aux[i]=aux[i-1];
}
Out= aux[Retiming-1];
//Out=adjs((adjs(buffer[2],11)*kernel[0]),width(Out));
} // End main par
}
// ************************************************************************************
// GENERIC X-Y SEPARABLE CONVOLUTION --> TESTEADA!!!
// ************************************************************************************
macro proc GenericConvolution(Input, Output, X_FIR, Y_FIR, NTaps, NTapsMinus1, ColumnLength,normx, normy, Sx,Sy)
{
macro expr PipeLatency= 2 + 1 +1 + 4*2; // 2 from main, 1 input, 1 output, 4*2 kernels
macro expr Retiming=1; // Retiming value = Retiming-1
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[NTapsMinus1] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
unsigned (log2ceil((MAX_RES_X/SCALE))) col, colbis;
signed (width(Input)) DataArrayX[NTaps], DataArrayY[NTaps], aux[Retiming] ;
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayX[NTaps-1]=Input;
/// Shift X data through array
par (i = 0; i != (NTaps-1); i++) // NOTE WIDTH(i)=LOG2CEIL(NTaps)
{
DataArrayX[i] = DataArrayX[i+1];
}
GenKernel_Gabor(DataArrayX,DataArrayY[NTapsMinus1],X_FIR,normx,Sx);
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Read data into array every cycle
par(r1=0;r1!=NTapsMinus1;r1++)
{
// Fill data through array
DataArrayY[r1] = readRAM(r1<-(log2ceil(NTapsMinus1)),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=NTapsMinus1;r2++)
{
writeRAM(r2<-(log2ceil(NTapsMinus1)),colbis,DataArrayY[r2+1]);
}
GenKernel_Gabor(DataArrayY,Output,Y_FIR,normy,Sy) ;
/* Y_FIR(DataArrayY,aux[0]) ;
par(i=1;i<(Retiming);i++)
{
aux[i]=aux[i-1];
}
Output= aux[Retiming-1];*/
} // End Global par
}
void GenKernel_Gabor_Gab0[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab1,NORMY, 1);
}
void GenKernel_Gabor_Gab1[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab2,NORMY, 1);
}
void GenKernel_Gabor_Gab2[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab3,NORMY, -1);
}
void GenKernel_Gabor_Gab3[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab4,NORMY, 1);
}
void GenKernel_Gabor_Gab4[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab5,NORMY, -1);
}
void GenKernel_Gabor_Gab5[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab6,NORMY, 1);
}
void GenKernel_Gabor_Gab6[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab7,NORMY, -1);
}
void GenKernel_Gabor_Gab7[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab8,NORMY, 1);
}
void GenKernel_Gabor_Gab8[2](signed int CONV_BITS *buffer, signed CONV_BITS *Out, unsigned FLOW_INDEX_BITS index)
{
GenKernel_Gabor(buffer,(*Out),Gab9,NORMY, -1);
}
// Y CONVOLUTION FILTERS
// ************************************************************************************
macro proc GaborY(Input, FNY, NTaps, NTapsMinus1, ColumnLength)
{
macro expr PipeLatency= 2 + 1 +1 + 4*2; // 2 from main, 1 input, 1 output, 4*2 kernels
macro expr Retiming=1; // Retiming value = Retiming-1
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[NTapsMinus1] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
static unsigned (log2ceil((MAX_RES_X/SCALE))) col=((MAX_RES_X/SCALE)+1 -2-1-4), colbis=((MAX_RES_X/SCALE) -2-1-8);
signed (width(Input)) DataArrayY[NTaps];//, DataArray1[NTaps],DataArray2[NTaps],aux[Retiming] ;
signal signed GHaux;
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayY[NTapsMinus1]=Input;
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
par
{
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
}
// Read data into array every cycle
par(r1=0;r1!=(NTaps-1);r1++) // NOTE WIDTH(i)=LOG2CEIL(NTaps)
{
// Fill data through array
DataArrayY[r1] = readRAM(r1<-(log2ceil(NTapsMinus1)),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=(NTaps-1);r2++)
{
writeRAM(r2<-(log2ceil(NTapsMinus1)),colbis,DataArrayY[r2+1]);
}
GenKernel_Gabor(DataArrayY,(FNY[0]),Gab1,NORMY, 1);
GenKernel_Gabor(DataArrayY,(FNY[1]),Gab2,NORMY, 1);
GenKernel_Gabor(DataArrayY,(FNY[2]),Gab3,NORMY, -1);
GenKernel_Gabor(DataArrayY,(FNY[3]),Gab4,NORMY, 1);
GenKernel_Gabor(DataArrayY,(FNY[4]),Gab5,NORMY, -1);
GenKernel_Gabor(DataArrayY,(FNY[5]),Gab6,NORMY, 1);
GenKernel_Gabor(DataArrayY,(FNY[6]),Gab7,NORMY, -1);
GenKernel_Gabor(DataArrayY,(FNY[7]),Gab8,NORMY, 1);
GenKernel_Gabor(DataArrayY,(FNY[8]),Gab9,NORMY, -1);
} // End Global par
}
// GABOR FILTERS BASE SET
// ************************************************************************************
// NO SHARING
// ************************************************************************************
macro proc GenericConvolutionX(Input, Output, X_FIR, NTaps,norm, sym)
{
signed (width(Input)) DataArrayX[NTaps];
// Macro Begin
// ----------------------------------------------------
//assert (1 == 24, 0, "Application requires %d",log2ceil(8));
par
{
// Read data into array every cycle
DataArrayX[NTaps-1]=Input;
// Shift X data through array
par (i = 0; i != (NTaps-1); i++) // NOTE WIDTH(i)=LOG2CEIL(NTaps)
{
DataArrayX[i] = DataArrayX[i+1];
}
GenKernel_Gabor(DataArrayX,Output,X_FIR,norm,sym) ;
}
}
macro proc GaborBase(DataIn, FNYNX,Columns)
{
macro expr NTAPS=11;
signed CONV_BITS FNY[9];
/*
FNYNX[0]= F1Y2X
FNYNX[1]= F1Y3X
FNYNX[2]= F2Y1X
FNYNX[3]= F3Y1X
FNYNX[4]= F4YF4X
FNYNX[5]= F5YF5X
FNYNX[6]= F5YF4X
FNYNX[7]= F4YF5X
FNYNX[8]= F8YF6X
FNYNX[9]= F9YF7X
FNYNX[10]=F9YF6X
FNYNX[11]=F8YF7X
FNYNX[12]=F6YF8X
FNYNX[13]=F7YF9X
FNYNX[14]=F7YF8X
FNYNX[15]=F6YF9X
*/
par // ELIMINAR _INDEX!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
{
//Y-convolution
GaborY(DataIn, FNY, NTAPS, (NTAPS-1), Columns);
//X-convolutions
GenericConvolutionX(FNY[0], FNYNX[0] , Gab2, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[0], FNYNX[1] , Gab3, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[1], FNYNX[2] , Gab1, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[2], FNYNX[3] , Gab1, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[3], FNYNX[4] , Gab4, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[4], FNYNX[5] , Gab5, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[4], FNYNX[6] , Gab4, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[3], FNYNX[7] , Gab5, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[7], FNYNX[8] , Gab6, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[8], FNYNX[9] , Gab7, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[8], FNYNX[10], Gab6, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[7], FNYNX[11], Gab7, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[5], FNYNX[12], Gab8, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[6], FNYNX[13], Gab9, NTAPS, (NORMX), -1);
GenericConvolutionX(FNY[6], FNYNX[14], Gab8, NTAPS, (NORMX), 1);
GenericConvolutionX(FNY[5], FNYNX[15], Gab9, NTAPS, (NORMX), -1);
}
}
// SHARING
// ************************************************************************************
macro proc GenericConvolutionX_index(Input, Output, X_FIR, NTaps,norm, sym, sharerProcesses, index)
{
static signed (width(Input)) DataArrayX[sharerProcesses][NTaps];
// Macro Begin
// ----------------------------------------------------
//assert (1 == 24, 0, "Application requires %d",log2ceil(8));
par
{
// Read data into array every cycle
DataArrayX[index][NTaps-1]=Input;
// Shift X data through array
par (i = 0; i != (NTaps-1); i++) // NOTE WIDTH(i)=LOG2CEIL(NTaps)
{
DataArrayX[index][i] = DataArrayX[index][i+1];
}
GenKernel_Gabor(DataArrayX[index],Output,X_FIR,norm,sym);
//Output=DataArrayX[0];
}
}
void functionGenericConvolutionX_index_stereo(signed CONV_BITS *FNY, signed CONV_BITS *FNYNX, unsigned DISPARITY_INDEX_BITS index)
{
macro expr NTAPS=11;
/*
FNYNX[0]= F1Y2X
FNYNX[1]= F1Y3X
FNYNX[2]= F2Y1X
FNYNX[3]= F3Y1X
FNYNX[4]= F4YF4X
FNYNX[5]= F5YF5X
FNYNX[6]= F5YF4X
FNYNX[7]= F4YF5X
FNYNX[8]= F8YF6X
FNYNX[9]= F9YF7X
FNYNX[10]=F9YF6X
FNYNX[11]=F8YF7X
FNYNX[12]=F6YF8X
FNYNX[13]=F7YF9X
FNYNX[14]=F7YF8X
FNYNX[15]=F6YF9X
*/
par
{
GenericConvolutionX_index(FNY[0], FNYNX[0] , Gab2, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[0], FNYNX[1] , Gab3, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[1], FNYNX[2] , Gab1, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[2], FNYNX[3] , Gab1, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[3], FNYNX[4] , Gab4, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[4], FNYNX[5] , Gab5, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[4], FNYNX[6] , Gab4, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[3], FNYNX[7] , Gab5, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[7], FNYNX[8] , Gab6, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[8], FNYNX[9] , Gab7, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[8], FNYNX[10], Gab6, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[7], FNYNX[11], Gab7, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[5], FNYNX[12], Gab8, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[6], FNYNX[13], Gab9, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[6], FNYNX[14], Gab8, NTAPS, (NORMX), 1, MAX_PROC_DISPARITY, index);
GenericConvolutionX_index(FNY[5], FNYNX[15], Gab9, NTAPS, (NORMX), -1, MAX_PROC_DISPARITY, index);
}
}
void functionGenericConvolutionX_index_flow(signed CONV_BITS *FNY, signed CONV_BITS *FNYNX, unsigned FLOW_INDEX_BITS index)
{
macro expr NTAPS=11;
/*
FNYNX[0]= F1Y2X
FNYNX[1]= F1Y3X
FNYNX[2]= F2Y1X
FNYNX[3]= F3Y1X
FNYNX[4]= F4YF4X
FNYNX[5]= F5YF5X
FNYNX[6]= F5YF4X
FNYNX[7]= F4YF5X
FNYNX[8]= F8YF6X
FNYNX[9]= F9YF7X
FNYNX[10]=F9YF6X
FNYNX[11]=F8YF7X
FNYNX[12]=F6YF8X
FNYNX[13]=F7YF9X
FNYNX[14]=F7YF8X
FNYNX[15]=F6YF9X
*/
par
{
GenericConvolutionX_index(FNY[0], FNYNX[0] , Gab2, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[0], FNYNX[1] , Gab3, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[1], FNYNX[2] , Gab1, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[2], FNYNX[3] , Gab1, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[3], FNYNX[4] , Gab4, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[4], FNYNX[5] , Gab5, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[4], FNYNX[6] , Gab4, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[3], FNYNX[7] , Gab5, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[7], FNYNX[8] , Gab6, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[8], FNYNX[9] , Gab7, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[8], FNYNX[10], Gab6, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[7], FNYNX[11], Gab7, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[5], FNYNX[12], Gab8, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[6], FNYNX[13], Gab9, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[6], FNYNX[14], Gab8, NTAPS, (NORMX), 1, MAX_PROC_FLOW, index);
GenericConvolutionX_index(FNY[5], FNYNX[15], Gab9, NTAPS, (NORMX), -1, MAX_PROC_FLOW, index);
}
}
// BUILDING GABOR FILTERS
// ************************************************************************************
macro proc BuildGabor(FNYNX,fe,fo)
{
/*
1) even F1Y2X, odd F1Y3X
5) even F2Y1X, odd F3Y1X
3) even = F4YF4X - F5YF5X; odd = F5YF4X + F4YF5X;
7) even = F4YF4X + F5YF5X; odd = F5YF4X - F4YF5X;
2) even = F8YF6X - F9YF7X; odd = F9YF6X + F8YF7X;
8) even = F8YF6X + F9YF7X; odd = F9YF6X - F8YF7X;
4) even = F6YF8X - F7YF9X; odd = F7YF8X + F7YF8X;
6) even = F6YF8X + F7YF9X; odd = F7YF8X - F6YF9X;
FNYNX[0]= F1Y2X
FNYNX[1]= F1Y3X
FNYNX[2]= F2Y1X
FNYNX[3]= F3Y1X
FNYNX[4]= F4YF4X
FNYNX[5]= F5YF5X
FNYNX[6]= F5YF4X
FNYNX[7]= F4YF5X
FNYNX[8]= F8YF6X
FNYNX[9]= F9YF7X
FNYNX[10]=F9YF6X
FNYNX[11]=F8YF7X
FNYNX[12]=F6YF8X
FNYNX[13]=F7YF9X
FNYNX[14]=F7YF8X
FNYNX[15]=F6YF9X
1) even 0, odd 1
5) even 2, odd 3
3) even = 4 - 5; odd = 6 + 7;
7) even = 4 + 5; odd = 6 - 7;
2) even = 8 - 9; odd = 10+ 11;
8) even = 8 + 9; odd = 10- 11;
4) even = 12 - 13; odd = 14+ 15;
6) even = 12+ 13; odd = 14- 15;
0) even 0, odd 1
1) even = 8 - 9; odd = 10+ 11;
2) even = 4 - 5; odd = 6 + 7;
3) even = 12 - 13; odd = 14+ 15;
4) even 2, odd 3
5) even = 12+ 13; odd = 14- 15;
6) even = 4 + 5; odd = 6 - 7;
7) even = 8 + 9; odd = 10- 11;
*/
par
{
fe[0]=FNYNX[0];
fo[0]=FNYNX[1];
fe[1]=FNYNX[8] - FNYNX[9];
fo[1]=FNYNX[10] + FNYNX[11];
fe[2]=FNYNX[4] - FNYNX[5];
fo[2]=FNYNX[6] + FNYNX[7];
fe[3]=FNYNX[12] - FNYNX[13];
fo[3]=FNYNX[14] + FNYNX[15];
fe[4]=FNYNX[2];
fo[4]=FNYNX[3];
fe[5]=FNYNX[12] + FNYNX[13];
fo[5]=FNYNX[14] - FNYNX[15];
fe[6]=FNYNX[4] + FNYNX[5];
fo[6]=FNYNX[6] - FNYNX[7];
fe[7]=FNYNX[8] + FNYNX[9];
fo[7]=FNYNX[10] - FNYNX[11];
}
}
void BuildGabor_function[2](signed CONV_BITS *FNYNX, signed F_BITS *fe, signed F_BITS *fo)
{
BuildGabor(FNYNX,fe,fo);
}
// CORES FOR ATAN FUNCTION COMPUTATION AND SQRT
// ************************************************************************************
macro proc CoreATAN2CORDICPHI(y, x, ena, angle,data_rdy)
{
macro expr CoreWidthIn = ATAN2COREWIDTHIN_P;
macro expr CoreWidthOut = ATAN2COREWIDTHOUT_P;
macro expr CoreLatency = ATAN2CORELATENCY_P;
signal aux;
#ifdef DEBUG
angle=0@y@x;
#else
/*
component atan2cordic24
port (
x_in: IN std_logic_VECTOR(23 downto 0);
y_in: IN std_logic_VECTOR(23 downto 0);
phase_out: OUT std_logic_VECTOR(9 downto 0);
rdy: OUT std_logic;
clk: IN std_logic;
ce: IN std_logic);
end component;
*/
interface ATAN2CORENAME_P(signed CoreWidthOut phase_out, unsigned 1 rdy) atan2(signed CoreWidthIn x_in=-x,
signed CoreWidthIn y_in=y, unsigned 1 clk=__clock, unsigned 1 ce=ena) with {busformat="B<I>"};
par
{
//assert (width(aux)==3, 0, "Width of x is not 3 (it is %d)", width(aux));
// Left shift to utilize the unused bit of the 2QN core output format (range +/- 1, not above)
aux=(-(atan2.phase_out<<1));
data_rdy=atan2.rdy;
angle=aux;
}
#endif
}
// Core reppliaction for chipscope debugging
macro proc CoreATAN2CORDICORI(y, x, ena, angle,data_rdy)
{
macro expr CoreWidthIn = ATAN2COREWIDTHIN;
macro expr CoreWidthOut = ATAN2COREWIDTHOUT;
macro expr CoreLatency = ATAN2CORELATENCY;
signal aux;
#ifdef DEBUG
angle=y@x;
#else
/*
component atan2cordic24
port (
x_in: IN std_logic_VECTOR(23 downto 0);
y_in: IN std_logic_VECTOR(23 downto 0);
phase_out: OUT std_logic_VECTOR(9 downto 0);
rdy: OUT std_logic;
clk: IN std_logic;
ce: IN std_logic);
end component;
*/
interface ATAN2CORENAME(signed CoreWidthOut phase_out, unsigned 1 rdy) atan2(signed CoreWidthIn x_in=x,
signed CoreWidthIn y_in=-y, unsigned 1 clk=__clock, unsigned 1 ce=ena) with {busformat="B<I>"};
par
{
aux=(atan2.phase_out)>>1;
data_rdy=atan2.rdy;
if (aux<0) // Left shift 2 bits to utilize the unused bit of the 2QN core output format + the sign bit (range 0,1, not above, nor negative)
angle=((aux + ((signed 24)0x200000))<<2)\\(ATAN2COREWIDTHOUT-PHASE_ORI_BITS); // adding 0.5x2 (2Q9 format) to warp orientation to [0,pi)
else
angle=(aux<<2)\\(ATAN2COREWIDTHOUT-PHASE_ORI_BITS);
} // Shift left is allow because in fact, angle values are positive
#endif
}
// SQRT Core
//---------------------------------------
macro proc CoreSQRT(input, ena, output, data_rdy)
{
macro expr SqrtWidthIn = SQRTCOREWIDTHIN;
macro expr SqrtWidthOut = SQRTCOREWIDTHOUT+1;
macro expr SqrtLatency = SQRTCORELATENCY;
#ifndef DEBUG
/*
component sqrtcordic20
port (
x_in: IN std_logic_VECTOR(19 downto 0);
x_out: OUT std_logic_VECTOR(10 downto 0); --> theorethically 11 bits, but this is a very rare case, 10 bits is enouh
rdy: OUT std_logic;
clk: IN std_logic;
ce: IN std_logic);
end component; */
interface SQRTCORENAME(unsigned SqrtWidthOut x_out, unsigned 1 rdy) sqrt (unsigned SqrtWidthIn x_in=input,
unsigned 1 ce= ena, unsigned 1 clk=__clock) with {busformat="B<I>"};
par
{
output = sqrt.x_out<-SQRTCOREWIDTHOUT;
data_rdy=sqrt.rdy;
}
#else
output=input;
#endif
}
/* extern "C"
{
int cocosine(int a);
} */
// COSLUT Core
//---------------------------------------
macro proc CoreCosLUT(input, output)
{
macro expr cosLUTWidth = COSLUTCOREWIDTH;
#ifndef DEBUG
/*
component wrapped_cosLUT
port (
THETA: IN std_logic_VECTOR(9 downto 0);
COSINE: OUT std_logic_VECTOR(9 downto 0));
end component; */
interface COSLUTCORENAME(signed cosLUTWidth COSINE) cosineLUT (unsigned cosLUTWidth THETA=input) with {busformat="B<I>"};
par
{
output = cosineLUT.COSINE;
}
#else
output=input;//cocosine(adjs(input,32));
#endif
}
// COMPUTING PHASE, MAGNITUDE AND ORIENTATION
// ************************************************************************************
macro proc Primitives(fe,fo,Energy, Orientation, TH ,Latencies)
{
// Sine and cosine LUTs for orientation computation
// ******************************************************
const signed TRIG_BITS sin[NORIENTATIONS]={ 0, 91, 128, 91, 0, -91, -128, -91};
const signed TRIG_BITS cos[NORIENTATIONS]={ 128, 91 , 0 ,-91 ,-128 , -91 , 0 ,91};
// MAL!!! const signed COSLUTCOREWIDTH angleLUT[NORIENTATIONS]={ 0, 50 , 101, 151, 201, 251, 302, 352};
const unsigned COSLUTCOREWIDTH angleLUT[NORIENTATIONS]={0 , 64 , 128 , 192 , 256 , 320 , 384 , 448};
// Pipeline equalization delays.
macro expr LATENCIESOFFSET = 15; // + 15 for Softwarre adjustment, 0 for final hardware implementation
macro expr PIPEenergy = 17;
macro expr PIPEOri = 40;
macro expr PIPEPhase = PIPEOri+40;
macro expr EQPIPEenergy = PIPEPhase-PIPEenergy-1+1+LATENCIESOFFSET ; // -1 from Software adjustment
macro expr EQPIPEOri = PIPEPhase-PIPEOri-1+1+LATENCIESOFFSET ; // + 4 from Software adjustment
macro expr EQPIPEphase = 0+1+LATENCIESOFFSET; // longest stage // + 0 from Software adjustment
//macro expr EQPIPEphase = 15+1+LATENCIESOFFSET; // longest stage // + 0 from Software adjustment
macro expr WAIT_FOR_ORI = PIPEOri+2-8 + LATENCIESOFFSET; // + 10 from Software adjustment
// Data
unsigned phiangle[NORIENTATIONS];
signed cosLUTphiangle[NORIENTATIONS];
signed (F_BITS*2) EnergyA[NORIENTATIONS], EnergyB[NORIENTATIONS];
unsigned (F_BITS*2) EnergyC[NORIENTATIONS];
unsigned SQRTCOREWIDTHIN meanEnergy;
signed (ORIENTED_ENERGY_BITS+TRIG_BITS-1) OriA[NORIENTATIONS], OriB[NORIENTATIONS];
signed (ACORI_BITS) AcNumOri[2], AcDenOri[2];
signed ACPHI_BITS AcNumPhi[NORIENTATIONS], AcDenPhi[NORIENTATIONS], AcNumPhiBIS, AcDenPhiBIS;
signed fecopy[WAIT_FOR_ORI][NORIENTATIONS], focopy[WAIT_FOR_ORI][NORIENTATIONS];
//signal <signed (PHASE_ORI_BITS*2)> OriForPhaseSignal;
unsigned COSLUTCOREWIDTH OriForPhase;
// Outputs
unsigned E[EQPIPEenergy];
signed PHASE_ORI_BITS OriAngle[EQPIPEOri], PhiAngle[NORIENTATIONS];
signed PHASE_ORI_BITS Phi[EQPIPEphase];
// Cores control signals
static signal <unsigned 1> en0=0;
static signal <unsigned 1> en1=0;
static signal <unsigned 1> en2=0;
unsigned int 1 rdy0, rdy1, rdy2;
/*#ifdef DEBUG
int 16 auxOri;
chanin <signed 16> chanori with { infile= "C:/RC2000/RC2000Local_features/DKsimulator/matlab/ori.dk" };
#endif*/
par
{
// Enabling using signals the sqrt and atan2 cores for one clock cycle
en0=1;
en1=1;
en2=1;
// ********************************************************************* //
// ENERGY
// ********************************************************************* //
par(o=0;o<NORIENTATIONS;o++)
{
// Pipe 1, Energy
xilinxmult(EnergyA[o], fe[o],fe[o]);
xilinxmult(EnergyB[o], fo[o],fo[o]);
// Pipe 2, Energy
EnergyC[o]= (unsigned)EnergyA[o]+(unsigned)EnergyB[o];
}
// Pipe 3, Energy BE CAREFULL WITH OVERFLOW EFFECTS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
meanEnergy=(UnSumMacro2(EnergyC,0,(NORIENTATIONS-1),(width(EnergyC[0])+3))\\(3+width(EnergyC[0])-width(meanEnergy)))<-width(meanEnergy);
// Pipe 4-20, Energy.
CoreSQRT(meanEnergy, en0, E[0], rdy0); // 17 cycles latency
// ********************************************************************* //
// ORIENTATION
// ********************************************************************* //
// Pipe 3, Orientation WARNNING: EnergyC has two clock cycles delay to Ori
par(o=0;o<NORIENTATIONS;o++)
{ // 34 to -> 28 bits, fractional part from 14 to 8
xilinxmult(OriA[o], ((signed)adju((EnergyC[o]>>(width(EnergyC[0])-ORIENTED_ENERGY_BITS)),width(EnergyC[0])+2)), sin[o]);
xilinxmult(OriB[o], ((signed)adju((EnergyC[o]>>(width(EnergyC[0])-ORIENTED_ENERGY_BITS)),width(EnergyC[0])+2)), cos[o]);
} // 28+9-1 (sign) = 36 bits for OriAB => +2 required
// Pipe 4, Orientation acumulation lower than 2 (in fact is 2.8)
AcNumOri[0]=SumMacro(OriA,0,(NORIENTATIONS-1),(width(OriA[0])+1))\\(width(OriA[0])+1-width(AcNumOri[0]));
AcDenOri[0]=SumMacro(OriB,0,(NORIENTATIONS-1),(width(OriB[0])+1))\\(width(OriA[0])+1-width(AcNumOri[0]));
//assert (width(OriA)==3, 0, "Width of Ori is not 3 (it is %d)", width(OriA));
// Pipe 5, Orientation
AcNumOri[1]=AcNumOri[0]>>0; // core needs inputs in -1<=x<=1 format
AcDenOri[1]=AcDenOri[0]>>0; // PERHAPS >>1 is needed in case the whole range be used
//assert (width(OriAngle[1])==3, 0, "Width of x is not 3 (it is %d)", width(OriAngle[1]));
// Pipe 6-40, Orientation (atan2 core latency =35)
CoreATAN2CORDICORI(AcNumOri[1], AcDenOri[1], en1, OriAngle[0], rdy1);
// ********************************************************************* //
// PHASE
// ********************************************************************* //
// Pipe synchronization, waiting orientation data and storing filters outputs
/* par(k=0; k<WAIT_FOR_ORI;k++)
{
ifselect(k==0)
{
par(o=0; o<NORIENTATIONS;o++)
{
fecopy[k][o]=fe[o];
focopy[k][o]=fo[o];
}
}
else
{
par(o=0; o<NORIENTATIONS;o++)
{
fecopy[k][o]=fecopy[k-1][o];
focopy[k][o]=focopy[k-1][o];
}
}
}
// WAIT_FOR_ORI + 1 cycles // 2QN x 2QN => 5QN (duplicated sign bit)
//xilinxmult(OriForPhaseSignal, (signed PHASE_ORI_BITS) PI, ((OriAngle[0])>>2));
// Note that for coherence we go back to the 2QN format for OriAngle[0]
// Core input range 0-> 2pi (core input = 1024*angle(rad)/2pi) => because our max input is pi we have to /2
OriForPhase=(((unsigned)OriAngle[0])>>1)\\(width(OriAngle[0])-width(OriForPhase));
par(o=0;o<NORIENTATIONS;o++)
{ // WAIT_FOR_ORI + 2 cycles
if (OriForPhase>=angleLUT[o])
phiangle[o]=OriForPhase - angleLUT[o];
else
phiangle[o]= angleLUT[o]- OriForPhase;
// WAIT_FOR_ORI + 3 cycles
CoreCosLUT(phiangle[o], cosLUTphiangle[o]);
// WAIT_FOR_ORI + 4 cycles
xilinxmult(AcNumPhi[o], focopy[WAIT_FOR_ORI-1-adju(Latencies[15:12],6)][o], cosLUTphiangle[o] );
xilinxmult(AcDenPhi[o], fecopy[WAIT_FOR_ORI-1-adju(Latencies[15:12],6)][o], abs(cosLUTphiangle[o]) );
} //
// WAIT_FOR_ORI + 5 cycles
AcNumPhiBIS=SumMacro(AcNumPhi,0,(NORIENTATIONS-1),(ACPHI_BITS));
AcDenPhiBIS=SumMacro(AcDenPhi,0,(NORIENTATIONS-1),(ACPHI_BITS));
// WAIT_FOR_ORI + 5-40 cycles
CoreATAN2CORDICPHI(adjs(AcNumPhiBIS,ATAN2COREWIDTHIN), adjs(AcDenPhiBIS,ATAN2COREWIDTHIN), en2, Phi[0], rdy2);
AcNumPhiBIS=SumMacro(fo,0,(NORIENTATIONS-1),(ACPHI_BITS));
AcDenPhiBIS=SumMacro(fe,0,(NORIENTATIONS-1),(ACPHI_BITS));
// WAIT_FOR_ORI + 5-40 cycles
CoreATAN2CORDICPHI(AcNumPhiBIS, AcDenPhiBIS, en2, Phi[0], rdy2); */
// ********************************************************************* //
// Pipe equalization and sending processed data
// ********************************************************************* //
// Pipe 20-??
par(i=0; i<(EQPIPEenergy-1);i++)
{
E[i+1]=E[i];
}
par(i=0; i<(EQPIPEOri-1);i++)
{
OriAngle[i+1]=OriAngle[i];
}
/*par(i=0; i<(EQPIPEphase-1);i++)
{
Phi[i+1]=Phi[i];
}*/
Energy=(E[EQPIPEenergy-1-adju(Latencies[11:8],7)]> 0@TH) ? E[EQPIPEenergy-1-adju(Latencies[11:8],7)]\\1 : 0; //SetNAN(E[0]);
// Divide by 2 when reading in software because we use the double angle representation
Orientation=(E[EQPIPEenergy-1-adju(Latencies[11:8],7)]> 0@TH ) ? OriAngle[EQPIPEOri-1-adju(Latencies[7:4],6)]\\1 : 0;//SetNAN(Orientation);
//Phase=(E[EQPIPEenergy-1-adju(Latencies[11:8],7)]> 0@TH ) ? Phi[EQPIPEphase-1-adju(Latencies[3:0],4)]\\1 : SetNAN(Phase);
}
}
// COMPUTING PHASE, MAGNITUDE AND ORIENTATION
// ************************************************************************************
macro proc Primitives_short(fe,fo,Energy, Orientation, TH ,Latencies)
{
// Pipeline equalization delays.
macro expr LATENCIESOFFSET = 15; // + 15 for Softwarre adjustment, 0 for final hardware implementation
macro expr PIPEenergy = 17;
//macro expr PIPEOri = 40;
macro expr PIPEOri = 40-35-3+1;
macro expr PIPEPhase = PIPEOri+40;
macro expr EQPIPEenergy = PIPEPhase-PIPEenergy-1+1+LATENCIESOFFSET ; // -1 from Software adjustment
macro expr EQPIPEOri = PIPEPhase-PIPEOri-1+1+LATENCIESOFFSET ; // + 4 from Software adjustment
macro expr EQPIPEphase = 0+1+LATENCIESOFFSET; // longest stage // + 0 from Software adjustment
macro expr WAIT_FOR_ORI = PIPEOri+2-8 + LATENCIESOFFSET; // + 10 from Software adjustment
// Data
unsigned phiangle[NORIENTATIONS];
signed cosLUTphiangle[NORIENTATIONS];
signed (F_BITS*2) EnergyA[NORIENTATIONS], EnergyB[NORIENTATIONS];
unsigned (F_BITS*2) EnergyC[NORIENTATIONS];
unsigned SQRTCOREWIDTHIN meanEnergy;
signed (ORIENTED_ENERGY_BITS+TRIG_BITS-1) OriA[NORIENTATIONS], OriB[NORIENTATIONS];
signed (ACORI_BITS) AcNumOri[2], AcDenOri[2];
signed ACPHI_BITS AcNumPhi[NORIENTATIONS], AcDenPhi[NORIENTATIONS], AcNumPhiBIS, AcDenPhiBIS;
signed fecopy[WAIT_FOR_ORI][NORIENTATIONS], focopy[WAIT_FOR_ORI][NORIENTATIONS];
unsigned COSLUTCOREWIDTH OriForPhase;
// Outputs
unsigned 10 E[EQPIPEenergy];
unsigned PHASE_ORI_BITS OriAngle[EQPIPEOri][NORIENTATIONS];
signed PHASE_ORI_BITS Phi[EQPIPEphase];
// Cores control signals
static signal <unsigned 1> en0=0;
static signal <unsigned 1> en1=0;
static signal <unsigned 1> en2=0;
unsigned int 1 rdy0, rdy1, rdy2;
par
{
// Enabling using signals the sqrt and atan2 cores for one clock cycle
en0=1;
en1=1;
en2=1;
// ********************************************************************* //
// ENERGY
// ********************************************************************* //
par(o=0;o<NORIENTATIONS;o++)
{
// Pipe 1, Energy
xilinxmult(EnergyA[o], fe[o],fe[o]);
xilinxmult(EnergyB[o], fo[o],fo[o]);
// Pipe 2, Energy
EnergyC[o]= (unsigned)EnergyA[o]+(unsigned)EnergyB[o];
}
// Pipe 3, Energy BE CAREFULL WITH OVERFLOW EFFECTS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
meanEnergy=(UnSumMacro2(EnergyC,0,(NORIENTATIONS-1),(width(EnergyC[0])+3))\\(3+width(EnergyC[0])-width(meanEnergy)))<-width(meanEnergy);
// Pipe 4-20, Energy.
CoreSQRT(meanEnergy, en0, E[0], rdy0); // 17 cycles latency
// ********************************************************************* //
// ORIENTATION
// ********************************************************************* //
par(cnt=0; cnt<NORIENTATIONS; cnt++)
{
OriAngle[0][cnt]= adju(EnergyC[cnt]\\4, width(OriAngle[0][0]));
}
// ********************************************************************* //
// PHASE
// ********************************************************************* //
// ********************************************************************* //
// Pipe equalization and sending processed data
// ********************************************************************* //
// Pipe 20-??
par(i=0; i<(EQPIPEenergy-1);i++)
{
E[i+1]=E[i];
}
par(i=0; i<(EQPIPEOri-1);i++)
{
par(cnt2=0; cnt2<NORIENTATIONS; cnt2++)
{
OriAngle[i+1][cnt2]=OriAngle[i][cnt2];
}
}
Energy=(E[EQPIPEenergy-1-adju(Latencies[11:8],6)]> 0@TH) ? E[EQPIPEenergy-1-adju(Latencies[11:8],6)]\\1 : 0; //SetNAN(E[0]);
// Divide by 2 when reading in software because we use the double angle representation
par(cnt3=0; cnt3<NORIENTATIONS; cnt3++)
{
Orientation[cnt3]=(E[EQPIPEenergy-1-adju(Latencies[11:8],6)]> 0@TH ) ? OriAngle[EQPIPEOri-1-adju(Latencies[7:4],6)][cnt3]\\1 : 0;//SetNAN(Orientation);
}
}
}
macro proc PhasePrimitive (fe, fo, phase, latency)
{
macro expr maxlatency=80;
unsigned 1 rdy;
signal static unsigned 1 en=0;
signed ACPHI_BITS NumPhi, DenPhi, phi;
signed 9 phi9[maxlatency];
par
{
en=1;
NumPhi=SumMacro(fo,0,(NORIENTATIONS-1),(ACPHI_BITS));
DenPhi=SumMacro(fe,0,(NORIENTATIONS-1),(ACPHI_BITS));
// WAIT_FOR_ORI + 5-40 cycles
CoreATAN2CORDICPHI(NumPhi, DenPhi, en, phi, rdy);
phi9[0]=phi\\1;
par(d=0;d<maxlatency-1;d++)
{
phi9[d+1]=phi9[d];
}
phase = phi9[latency-1];
}
}
// *******************************************************************************
// AUXILIARY MACROS (ONLY BETA VERSIONS, UNDER TEST)
// *******************************************************************************
// *******************************************************************************
// *******************************************************************************
// *******************************************************************************
// Sorting input data
// *******************************************************************************
// Input data must be signed
macro proc Sort(bufferIn,bufferOut, bufferLength)
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3;
macro expr DataWidth=(width(bufferIn[0]));
//macro expr SumMacro(vector,begin,end,Extend)= select(end==begin, adju(vector[begin],Extend),
// adju(vector[end],Extend)+SumMacro(vector,begin,end-1,Extend));
macro expr SumMacro(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom,adju(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1) + RecurseAddAux(Array, Middle, Bottom));
in
RecurseAddAux(Array, Index, begin);
signed DataWidth bufferInternal[Retiming+1][bufferLength];
unsigned 1 sum[bufferLength][(bufferLength-0)]; // In fact is -1 but the compiler fails,
unsigned (log2ceil(bufferLength)) position[Retiming][bufferLength]; // time to synthizer optimization
par(i1=0;i1<bufferLength;i1++)
{
par(i2=0;i2<(bufferLength-1);i2++)
{
//assert ((MAX_RES_X/SCALE) == 24, 0, "Application requires %d",width(i2));
// Comparisons
ifselect(i1>(0@i2)) // left side
{
if(bufferIn[i1]>bufferIn[0@i2])
sum[i1][i2]=1;
else
sum[i1][i2]=0;
}
else //ifselect(i1<=i2) // i1<=i2, right side, except center pixel
{
if(bufferIn[i1]>=bufferIn[0@i2+1])
sum[i1][i2]=1;
else
sum[i1][i2]=0;
}
bufferInternal[0][i1]=bufferIn[i1];
// Positions estimation
position[0][i1]=SumMacro(sum[i1],0,(bufferLength-2),width(position[0]));
bufferInternal[1][i1]=bufferInternal[0][i1];
// Retiming
/*par(t=1;t<Retiming;t++)
{
position[t][i1]=position[t-1][i1];
bufferInternal[t+1][i1]=bufferInternal[t][i1];
}*/
// Sorting vector
bufferOut[position[Retiming-1][i1]]=bufferInternal[Retiming][i1];
}
}
}
// *******************************************************************************
// Sorting input data with invalid values
// *******************************************************************************
// Input data must be signed
macro proc SortNaN(bufferIn,bufferOut, bufferLength, offset)
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3;
macro expr DataWidth=(width(bufferIn[0]));
//macro expr SumMacro(vector,begin,end,Extend)= select(end==begin, adju(vector[begin],Extend),
// adju(vector[end],Extend)+SumMacro(vector,begin,end-1,Extend));
macro expr SumMacro(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom,adju(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1) + RecurseAddAux(Array, Middle, Bottom));
in
RecurseAddAux(Array, Index, begin);
signed DataWidth bufferInternal[Retiming+1][bufferLength];
unsigned 1 sum[bufferLength][(bufferLength-0)]; // In fact is -1 but the compiler fails,
unsigned (log2ceil(bufferLength)) position[Retiming][bufferLength]; // time to synthizer optimization
unsigned (log2ceil(bufferLength)) NumInvalid;
unsigned 1 SumInvalid[bufferLength];
par(i1=0;i1<bufferLength;i1++)
{
par(i2=0;i2<(bufferLength-1);i2++)
{
//assert ((MAX_RES_X/SCALE) == 24, 0, "Application requires %d",width(i2));
// Comparisons
ifselect(i1>(0@i2)) // left side
{
if(bufferIn[i1]>bufferIn[0@i2])
sum[i1][i2]=1;
else
sum[i1][i2]=0;
}
else //ifselect(i1<=i2) // i1<=i2, right side, except center pixel
{
if(bufferIn[i1]>=bufferIn[0@i2+1])
sum[i1][i2]=1;
else
sum[i1][i2]=0;
}
bufferInternal[0][i1]=bufferIn[i1];
// Positions estimation
position[0][i1]=SumMacro(sum[i1],0,(bufferLength-2),width(position[0]));
bufferInternal[1][i1]=bufferInternal[0][i1];
// Retiming
/*par(t=1;t<Retiming;t++)
{
position[t][i1]=position[t-1][i1];
bufferInternal[t+1][i1]=bufferInternal[t][i1];
}*/
// Sorting vector
bufferOut[position[Retiming-1][i1]]=bufferInternal[Retiming][i1];
// counting invalid values
if(bufferInternal[0][i1]==0b100000000000)
SumInvalid[i1]=1;
else
SumInvalid[i1]=0;
offset = SumMacro(SumInvalid,0,(bufferLength-1),width(offset));
}
}
}
// ************************************************************************************
// MEDIAN FITLERING FOR IMAGE SALT & PEPPER DENOISING
// ************************************************************************************
macro proc Median(Input, Output, ColumnLength)
{
macro expr PipeLatency= 1 +1 + 3; // 1 input, 1 output,3 sorting data
macro expr NTaps=3;
macro expr Retiming=1; // Retiming value = Retiming-1
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[NTaps-1] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
static unsigned (log2ceil((MAX_RES_X/SCALE))) col=((MAX_RES_X/SCALE)+1 -2-1-4), colbis=((MAX_RES_X/SCALE) -2-1-8);
signed (width(Input)) DataMatrix[NTaps][NTaps], bufferIn[(NTaps*NTaps)], bufferOut[(NTaps*NTaps)];
// Macro Begin
// ----------------------------------------------------
par
{
//assert (1 == 24, 0, "Application requires %d",log2ceil(8));
// Updating matrix. Read data into array every cycle
par (r = 0; r != NTaps; r++)
{
par (c = 0; c != NTaps; c++)
{
ifselect(c==0)
{
ifselect(r==0)
DataMatrix[0][0]=Input; // Read new data
else
{
DataMatrix[r][c] = readRAM((r-1)<-log2ceil(NTaps-1),col);
}
}
else // shift data through the matrix
{
DataMatrix[r][c]=DataMatrix[r][c-1];
}
}
}
// Storing previous data
par(r1=0;r1!=(NTaps-1);r1++)
{
writeRAM(r1,colbis,DataMatrix[0@r1][0]);
}
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col==(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Sorting data and median filtering
par(r2=0;r2!=NTaps;r2++)
{
par(c2=0;c2!=NTaps;c2++)
{
bufferIn[adju(r2,log2ceil(NTaps*NTaps))*NTaps+adju(c2,log2ceil(NTaps*NTaps))]=DataMatrix[r2][c2];
}
}
Sort(bufferIn,bufferOut, (NTaps*NTaps));
Output=bufferOut[(NTaps*NTaps)/2];
/*par(i=1;i<(Retiming);i++)
{
aux[i]=aux[i-1];
}
Output= aux[Retiming-1];*/
} // End Global par
}
/*
% Input - Input value for the convolution
% Output - Result of the convolution
% KernelX - Kernel for the X convolution
% KernelY - Kernel for the Y convolution
% ColumnLength - Number of elements of each column
%
% DESCRIPTION
% This function computes the separable 2D convolution of the input.
% It stores 4 columns before performing it, with the current column
% they are 5. Then, the convolution is carried out using KernelX for
% for the rows and KernelY for the columns.
%
% RETURN
%
*/
macro proc SpatialConvolutions_last(Input,Output,KernelX,KernelY, ColumnLength)
{
macro expr PipeLatency=6 + 2;
macro expr Retiming=1; // Retiming value = Retiming-1
//const unsigned int col_size=log2ceil(ColumnLength);
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[4] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
signed (width(Input)) DataArrayX[5], DataArrayY[5] ;
//static unsigned (log2ceil((VIDEOINCOLUMNS/SCALE))) col=1, colbis=0;
//static unsigned (log2ceil((ColumnLength))) col=1, colbis=0;
static unsigned (log2ceil(MAX_RES_X/SCALE)) col=1, colbis=0;
signed (width(Output)) aux[Retiming];
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayX[4]=Input;
// Shift X data through array
par (i = 0; i != 4; i++)
{
DataArrayX[i] = DataArrayX[i+1];
}
KernelX(DataArrayX,DataArrayY[4]);
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Read data into array every cycle
par(r1=0;r1!=4;r1++)
{
// Fill data through array
DataArrayY[r1] = readRAM(adju(r1,3),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=4;r2++)
{
writeRAM(adju(r2,3),colbis,DataArrayY[r2+1]);
}
KernelY(DataArrayY,Output);
/*par(i=1;i<(Retiming);i++)
{
aux[i]=aux[i-1];
}
Output= aux[Retiming-1];*/
} // End Global par
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the kernel
% k = [2 16 28 16 2]/64. It is a band-pass filter. The Retiming is not used
% for the implementation because the performance was good enough.
%
% RETURN
%
*/
macro proc Prefilter5Taps(buffer,Out) // mask=[2 16 28 16 2]/64
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=6;
macro expr DataWidth=(width(buffer[0])+6);
signed DataWidth Register[3], aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)+adjs(buffer[4],DataWidth))<<1;
Register[1]=(adjs(buffer[1],DataWidth)+adjs(buffer[3],DataWidth))<<4;
Register[2]=(adjs(buffer[2],DataWidth))*28;
//xilinxmult(Register[2], (adjs(buffer[2],DataWidth)) ,((int 18) 28) );
aux0= Register[0] + Register[1] + Register[2];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
//par(i=1;i<(Retiming);i++)
//{
// aux[i]=aux[i-1];
//}
Out= aux[Retiming-1];
//Out=buffer[0];
}
}
/*
% Num - Numerator
% Den - Denominator
% Result - Quotient
%
% DESCRIPTION
% This function computes the division of Num and Den, obtaining the
% the quotient that is returned in result. It can be done using the
% standard Handel-C implementation, as simply result = Den/Num.
% The problem is that the performance is affected by the required
% logic and resources. This is why we are using a core from
% the core Generator. The interface is divider_18 because we are using
% 18 bits for the division to obtain a better precision.
%
% RETURN
%
*/
macro proc division_core(Num, Den, result)
{
// Enable for Cores
static signal unsigned 1 enable=0;
//signed DIVIDER_INPUT quot2;
interface divider_18(signed DIVIDER_INPUT quot, signed DIVIDER_INPUT remd, unsigned 1 rfd) divider(signed DIVIDER_INPUT dividend = Num,
signed DIVIDER_INPUT divisor = adjs(Den,DIVIDER_INPUT), unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
par
{
//Enabling division Core: only for 1 clock cycle
enable=1;
result = divider.quot;
//quot2 = Num/Den;
//quot = qout2;
}
}
/*
% Input - Input value for the convolution
% Output - Delayed input value
% ColumnLength - Number of elements of each column
%
% DESCRIPTION
% This function computes delays the input as many cycles as the
% function SpatialConvolutions_last. It is used for synchronization
%
% RETURN
%
*/
macro proc Delaying(Input, Output, ColumnLength)
{
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[6] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
signed (width(Input)) DataArrayX[7], DataArrayY[7] ;
static unsigned (log2ceil(MAX_RES_X/SCALE)) col=1, colbis=0;
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayX[6]=Input;
// Shift X data through array
par (i = 0; i != 6; i++)
{
DataArrayX[i] = DataArrayX[i+1];
}
DataArrayY[6] = DataArrayX[4];
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Read data into array every cycle
par(r1=0;r1!=6;r1++)
{
// Fill data through array
DataArrayY[r1] = readRAM(adju(r1,3),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=6;r2++)
{
writeRAM(adju(r2,3),colbis,DataArrayY[r2+1]);
}
Output=DataArrayY[3];
} // End Global par
}
\ No newline at end of file
//********************************************************************
//
// Programmed by Javier Díaz, DRIVSCO project
// Granada, March 2008, version 2.1
//
//********************************************************************
#ifndef __GABORPRIMITIVES__
#define __GABORPRIMITIVES__
#include <stdlib.hch>
#include "generic.hch"
#include "parameters.hch"
// Data bit-widths
//*****************************************
#define KERN_BITS 12 //14
#define CONV_BITS 10
#define CONV_FRACT_BITS 0
#define F_BITS (CONV_BITS) // USE CONV_BITS+1 FOR OPTICAL FLOW AND STEREO OR 17 FOR LOCAL FEATURES
#define TRIG_BITS 9
#define ORIENTED_ENERGY_BITS 20
#define ACORI_BITS (24)
#define PHASE_ORI_BITS 10 // 16 for hardware, 64 for debugging
#define ENER_BITS 10 // 16 for hardware, 32 for debugging
#define ACPHI_BITS 10//(COSLUTCOREWIDTH+F_BITS+3) // in fact this is larger than the software simulator
//#define PI 25736 // it uses 16 bits in format 2QN (1 bit sign, 2 bit integer part, 13 bits fractional part)
#define PI 201 // it uses 10 bits ( 6 bits of frac part)
// SQRT CORE (SCALED RADIANS 2Q24 FORMAT (1 for sign, 2 as integer part and the others as fractional part)
#define SQRTCOREWIDTHIN 20
#define SQRTCOREWIDTHOUT 10
#define SQRTCORELATENCY (SQRTCOREWIDTHOUT+2)
#define SQRTCORENAME sqrtcordic
// ARC TAN CORE
#define ATAN2COREWIDTHIN 24
#define ATAN2COREWIDTHOUT 24
#define ATAN2CORELATENCY (ATAN2COREWIDTHOUT+4)
#define ATAN2CORENAME atan2cordic24
// ARC TAN CORE PHASE
#define ATAN2COREWIDTHIN_P 10 //32
#define ATAN2COREWIDTHOUT_P 10 //32
#define ATAN2CORELATENCY_P (ATAN2COREWIDTHOUT_P+4)
#define ATAN2CORENAME_P atan2cordic10//atan2cordic24
// cosLUT CORE
#define COSLUTCOREWIDTH 10
#define COSLUTCORENAME cosLUT
/* BIT CONFIGUATION EXAMPLES
*****************************************
1) #define KERN_BITS 11
#define CONV_BITS 9
#define ATAN2COREWIDTH 20
#define ATAN2CORENAME atan2cordic20
2) #define KERN_BITS 13
#define CONV_BITS 11
#define ATAN2COREWIDTH 24
#define ATAN2CORENAME atan2cordic24
3) #define KERN_BITS 15 / 17 / 17 / 19 / 21
#define CONV_BITS 14 / 16 / 18 / 20 / 22
#define ATAN2COREWIDTH 30 / 34 / 38 / 42 / 46
#define ATAN2CORENAME atan2cordic30 / atan2cordic34 / atan2cordic38 / atan2cordic42 / atan2cordic46
*/
// Extra parameters
#define NORIENTATIONS 8
//#define PI 201 // 3.14 (3 bit integer, 6 bit fractional)
//#define NAN 0b100000000000
//#define NSCALES 1
#define MAX_PROC_DISPARITY 2
#define MAX_PROC_FLOW 3
#define FLOW_INDEX_BITS 2
#define DISPARITY_INDEX_BITS 1
#define DIVIDER_INPUT 18
#define DIVIDER_LATENCY DIVIDER_INPUT+4 // is +4 if divider has clks/div==1
// Generic Macros
macro expr SumMacro(Array, begin, Index,Extend);
// Computing Macros
/*********************************************************************/
macro proc GenericConvolution(Input, Output, X_FIR, Y_FIR, NTaps, NTapsMinus1, ColumnLength,normx, normy,Sx,Sy);
macro proc GaborY(Input, FNY, NTaps, NTapsMinus1, ColumnLength);
macro proc GaborBase(DataIn, FNYNX,Columns);
macro proc BuildGabor(FNYNX,fe,fo);
macro proc SortNaN(bufferIn,bufferOut, bufferLength, offset);
macro proc PhasePrimitive (fe, fo, phase, latency);
macro proc Primitives(fe,fo,Energy, Orientation, TH ,Latencies);
macro proc Primitives_short(fe,fo,Energy, Orientation, TH ,Latencies);
// Added macros (F Barranco)
/*********************************************************************/
macro proc SpatialConvolutions_last(Input,Output,KernelX,KernelY, ColumnLength);
macro proc Delaying(Input, Output, ColumnLength);
macro proc Prefilter5Taps(buffer,Out);
macro proc division_core(Num, Den, result);
#endif
\ No newline at end of file
/* channels.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#include "channels.hch"
// ***************************************************************
// Channels implemented using signals
// ***************************************************************
/*
% Channel - Send data through this channel
% Input - Data to be sent through the channel
%
% DESCRIPTION
% This function sends Input through Channel.
% Channels are structs declared
% in channels.hcc file
%
% RETURN
%
*/
macro proc Send(Channel, Input)
{
//register indicating that the procedure has completed
unsigned 1 done;
//do this at least once
do
{
par
{
//set the transfer wires to the input value
Channel.DataTransfer = Input;
//indicate that the send process is ready
Channel.SendReady = 1;
//set the done register if the read process is ready
done = Channel.ReadReady;
}
}while(!done); //until the transfer is complete
}
/*
% Channel - Send signed data through this channel
% Input - Data to be sent through the channel
%
% DESCRIPTION
% This function safely sends Input through Channel: to work, there must be
% a channel reading in the other side. It waits until the ready signal is activated
% and then sends the correct data. Otherwise, it is sending 0.
% Channels are structs declared in channels.hcc file
%
% RETURN
%
*/
macro proc SignedSecureSend(Channel, Input)
{
signed auxInput;
if (Read_Ready(Channel))
Send(Channel, Input);
else
{
auxInput=Input;
Send(Channel, auxInput);
}
}
/*
% Channel - Send unsigned data through this channel
% Input - Data to be sent through the channel
%
% DESCRIPTION
% This function safely sends Input through Channel: to work, there must be
% a channel reading in the other side. It waits until the ready signal is activated
% and then sends the correct data. Otherwise, it is sending 0.
% Channels are structs declared in channels.hcc file
%
% RETURN
%
*/
macro proc UnsignedSecureSend(Channel, Input)
{
unsigned auxInput;
if (Read_Ready(Channel))
Send(Channel, Input);
else
{
auxInput=Input;
Send(Channel, auxInput);
}
}
/*
% Channel - Receive data coming through this channel
% Output - Data to be received through the channel
%
% DESCRIPTION
% This function safely receives Output through Channel.
% It waits until the ready signal is activated (meaning that
% the sending part is ready) and then receives the data.
% This function is blocked until the reception of the first
% transference.
% Channels are structs declared in channels.hcc file
%
% RETURN
%
*/
macro proc Receive(Channel, Output)
{
//register indicating that the procedure has completed
unsigned 1 done;
//do this at least once
do
{
par
{
//is the send process is ready
if (Channel.SendReady)
{
//ready the value on the data transfer wires
Output = Channel.DataTransfer;
}
else
delay;
//indicate that the receive process is ready
Channel.ReadReady = 1;
//set the done register if the send process is ready
done = Channel.SendReady;
}
}while(!done); //until the transfer is complete
}
/*
% Channel - Channel
%
% DESCRIPTION
% This function checks whether the sender is ready or not.
% Channels are structs declared in channels.hcc file
%
% RETURN
%
% SendReady - Signal that is active if the sender is ready to transmit data
%
*/
macro expr Send_Ready(Channel) = Channel.SendReady;
/*
% Channel - Channel
%
% DESCRIPTION
% This function checks whether the receiver is ready or not.
% Channels are structs declared in channels.hcc file
%
% RETURN
%
% ReadReady - Signal that is active if the receiver is ready to receive data
%
*/
macro expr Read_Ready(Channel) = Channel.ReadReady;
\ No newline at end of file
/* channels.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#ifndef __CHANNELS__
#define __CHANNELS__
#include "stdlib.hch"
// Channels implemented using signals
// ***************************************************************
struct unsignedchannel
{
signal unsigned 1 ReadReady;
signal unsigned 1 SendReady;
signal unsigned DataTransfer;
};
struct signedchannel
{
signal unsigned 1 ReadReady;
signal unsigned 1 SendReady;
signal signed DataTransfer;
};
// Definition of a channel with default values of 0
#define UNSIGNED_CHANNEL static struct unsignedchannel
#define SIGNED_CHANNEL static struct signedchannel
// Example channel declaration: declare a variable MyChannel
// as channel structure with default value of zero
// UNSIGNED_CHANNEL MyChannel;
macro proc Send(Channel, Input);
macro proc SignedSecureSend(Channel, Input);
macro proc UnsignedSecureSend(Channel, Input);
macro proc Receive(Channel, Output);
// These expressions allow the user to implement non-blocking channels:
// This channel structure has the readiness of the send and
// receive process exposed as signals, allowing the user to check
// the status of a channel. This can be simply expressed as
// expressions in Handel-C thus:
//Check whether the sender is ready
macro expr Send_Ready(Channel);
//Check whether the receiver is ready
macro expr Read_Ready(Channel);
#endif
/* cores.hcc
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#include "cores.hch"
// Interfaces for the top and core projects
// ***************************************************************
/*
% Input - Input channel for the 3 pixels of 8 bits (from the 3 RGB channels)
% Output - Local descriptor feature maps (27 bits)
% Control - Control word with the different parameters:
% * Control[60:45] - Latencies for the feature estimation (gabor modules)
% * Control[44:36] - Thresholds for the feature estimation
% * Control[35:26] - Number of columns of the input images
% * Control[24:21] - Not used
% * Control[20:13] - Not used
% * Control[12:0] - Latency cycles of the pipeline
% ImSize - Size of the input images
%
% DESCRIPTION
% Interface for a top architecture to interface with the attention estimation core
% RETURN
%
*/
macro proc InterfazTopFlowCore_lf_attention(Input, Output, Control, ImSize)
{
macro expr InWidth=24; //192;//24;
//macro expr OutWidth=51;//24;
macro expr OutWidth=27;//18; 9 bits are useless
interface CoreOpticFlow( signal OutWidth CoreOut, signal unsigned 1 OutSendReady, signal unsigned 1 InReadReady)
MyCore( unsigned 1 clk=__clock, unsigned imSize=ImSize, signal InWidth CoreIn=Input.DataTransfer,
signal unsigned 1 InSendReady=Input.SendReady,
signal unsigned 1 OutReadReady=Output.ReadReady, unsigned cmd=Control)with{retime=0};
while(1)
{
par
{
Output.DataTransfer=MyCore.CoreOut;
Output.SendReady=MyCore.OutSendReady;
Input.ReadReady=MyCore.InReadReady;
}
}
}
/*
% Input - Input channel for the 3 pixels of 8 bits (from the 3 RGB channels)
% Output - Local descriptor feature maps (27 bits)
% Control - Control word with the different parameters:
% * Control[60:45] - Latencies for the feature estimation (gabor modules)
% * Control[44:36] - Thresholds for the feature estimation
% * Control[35:26] - Number of columns of the input images
% * Control[24:21] - Not used
% * Control[20:13] - Not used
% * Control[12:0] - Latency cycles of the pipeline
% ImSize - Size of the input images
%
% DESCRIPTION
% Interface for the attention estimation core (used in the main.hcc)
%
% RETURN
%
*/
macro proc InterfazCore_lf_attention(Input, Output, Control,ImSize)
{
#if CORE==1
// Outcoming data
interface port_out() OutData(signal CoreOut = Output.DataTransfer)with{retime=0};
interface port_out() OutSendStatus(signal unsigned 1 OutSendReady = Output.SendReady)with{retime=0};
interface port_in(signal unsigned 1 OutReadReady) OutReadStatus()with{retime=0} ;
// Incoming data
// interface port_in(unsigned 1 clk with {clockport = 1}) ClockPort() ;
interface port_in(unsigned imSize) CimSize()with{retime=0};
interface port_in(signal CoreIn) InData()with{retime=0};
interface port_in(signal unsigned 1 InSendReady) InSendStatus()with{retime=0};
interface port_out() InReadStatus(signal unsigned 1 InReadReady = Input.ReadReady)with{retime=0};
// Control & Commands
interface port_in(unsigned cmd) Control_Commands()with{retime=0};
#else
// Outcoming data
interface bus_out() OutData(signal CoreOut = Output.DataTransfer)with{retime=0};
interface bus_out() OutSendStatus(signal unsigned 1 OutSendReady = Output.SendReady)with{retime=0};
interface bus_in(signal unsigned 1 OutReadReady) OutReadStatus() with{retime=0};
// Incoming data
// interface port_in(unsigned 1 clk with {clockport = 1}) ClockPort() ;
interface bus_in(unsigned imSize) CimSize()with{retime=0};
interface bus_in(signal CoreIn) InData()with{retime=0};
interface bus_in(signal unsigned 1 InSendReady) InSendStatus()with{retime=0};
interface bus_out() InReadStatus(signal unsigned 1 InReadReady = Input.ReadReady)with{retime=0};
// Control & Commands
interface bus_in(unsigned cmd) Control_Commands()with{retime=0};
#endif
while(1)
{
par
{
Output.ReadReady=OutReadStatus.OutReadReady;
Input.DataTransfer=InData.CoreIn;
Input.SendReady=InSendStatus.InSendReady;
Control=Control_Commands.cmd;
ImSize=CimSize.imSize;
}
}
}
/* cores.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#ifndef __CORES__
#define __CORES__
#include "stdlib.hch"
#include "channels.hch"
//#include "xircav4_lib.hch" //Platform-dependent
#define CORE 1 // 0 for sub-circuit test, 1 for core calls
//Attention cores
macro proc InterfazCore_lf_attention(Input, Output, Control,ImSize);
macro proc InterfazTopFlowCore_lf_attention(Input, Output, Control, ImSize);
#endif
\ No newline at end of file
/* generic.hcc
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#include "generic.hch"
// Pipeline synchronization delays
/*
% DelayCycles - Number of cycles of the delay
%
% DESCRIPTION
% This function sequentially generates the number of cycles that
% is passed in DelayCycles. It can be used for synchronization.
%
% RETURN
%
*/
macro proc PipelineDelay(DelayCycles)
{
seq(t=0;t<(DelayCycles);t++)
{
delay;
}
}
/*
% input - Input data
%
% DESCRIPTION
% This function creates a NaN valid. The value will depend on
% the width of the input. It will be 1 followed by as many zeros
% as the size of input minus 1.
%
% RETURN
% The NaN value for the width of input.
%
*/
macro expr SetNAN(input) = 1<<(width(input)-1);
/* generic.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#ifndef __GENERIC_HCH__
#define __GENERIC_HCH__
#include "stdlib.hch"
#include "parameters.hch"
#include "cores.hch"
#include "channels.hch"
#include "bilinear_warping_v2.hch"
static struct SECURE_FIFO_CHANNEL_INTERFACE_12
{
signal unsigned 1 wren;
signal unsigned 1 rden;
signal unsigned 12 data_w;
signal unsigned 12 data_r;
signal unsigned 1 full;
signal unsigned 1 empty;
};
#define SECURE_FIFO_CHANNEL_12 static struct SECURE_FIFO_CHANNEL_INTERFACE_12
macro proc SecureFifoChannel_12(PtrInterface);
macro proc MyFIFORead_12(PtrInterface, data);
macro proc MyFIFOWrite_12(PtrInterface, data);
macro expr SetNAN(input);
#endif
\ No newline at end of file
/* lklib.hcc
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#include "lklib.hch"
#include "cores.hch"
#include "parameters.hch"
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the kernel
% k = [2 16 28 16 2]/64. It is a band-pass filter. The Retiming is not used
% for the implementation because the performance was good enough.
%
% RETURN
%
*/
macro proc Prefilter5Taps(buffer,Out) // mask=[2 16 28 16 2]/64
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=6;
macro expr DataWidth=(width(buffer[0])+6);
signed DataWidth Register[3], aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)+adjs(buffer[4],DataWidth))<<1;
Register[1]=(adjs(buffer[1],DataWidth)+adjs(buffer[3],DataWidth))<<4;
Register[2]=(adjs(buffer[2],DataWidth))*28;
aux0= Register[0] + Register[1] + Register[2];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
//par(i=1;i<(Retiming);i++)
//{
// aux[i]=aux[i-1];
//}
Out= aux[Retiming-1];
}
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the kernel
% k = [14 35 14]/64. It is a band-pass filter. The Retiming is not used
% for the implementation because the performance was good enough.
%
% RETURN
%
*/
macro proc Prefilter3Taps(buffer,Out) // mask=[14 35 14]/64
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=6;
macro expr DataWidth=(width(buffer[0])+7);
signed DataWidth Register[2], aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)+adjs(buffer[2],DataWidth))*14;
Register[1]=(adjs(buffer[1],DataWidth))*35;
aux0= Register[0] + Register[1];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
Out= aux[0];
}
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the derivative
% kernel k = [7 18 0 -18 -7]/64. The Retiming is not used
% for the implementation because the performance was good enough.
%
% RETURN
%
*/
macro proc Diff5Taps(buffer,Out) // mask=[7 18 0 -18 -7]/64
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=5; // 2^6=64 but we take 1 decimal bit--> 5.
macro expr DataWidth=(width(buffer[0])+6);
signed DataWidth Register[2], aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)-adjs(buffer[4],DataWidth))*7;
Register[1]=(adjs(buffer[1],DataWidth)-adjs(buffer[3],DataWidth))*18;
//xilinxmult(Register[0], (adjs(buffer[0],DataWidth)-adjs(buffer[4],DataWidth)) ,((int 18) 7) );
//xilinxmult(Register[1], (adjs(buffer[1],DataWidth)-adjs(buffer[3],DataWidth)) ,((int 18) 18) );
aux0= Register[0] + Register[1];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
//par(i=1;i<(Retiming);i++)
//{
// aux[i]=aux[i-1];
//}
Out= aux[Retiming-1];
}
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the derivative
% kernel k = [29 0 -29]/64. The Retiming is not used for the implementation
% because the performance was good enough.
%
% RETURN
%
*/
macro proc Diff3Taps(buffer,Out) // mask=[29 0 -29]/64
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=5; // 2^6=64 but we take 1 decimal bit--> 5.
macro expr DataWidth=(width(buffer[0])+6);
signed DataWidth Register, aux0;
//signed (width(Out)) aux[Retiming];
signed (width(Out)) aux;
par
{
Register=(adjs(buffer[0],DataWidth)-adjs(buffer[2],DataWidth))*29;
aux0 = Register;
// Rounding
if(sign(aux0))
aux= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
Out = aux;
}
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the derivative
% kernel k = [1 4 6 4 1]/16. This is a gaussian low band pass filter.
% The Retiming is not used for the implementation because the performance
% was good enough.
%
% RETURN
%
*/
// ***************************************************************************
macro proc Weighting5(buffer,Out) // mask=[1 4 6 4 1]/16
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=3; // 2^4=16 but the whole derivative range is not used.
// --> one bits more is available
macro expr DataWidth=(width(buffer[0])+5); //--> 5 is more accurate!!!
signed DataWidth Register[3],aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)+adjs(buffer[4],DataWidth));
Register[1]=(adjs(buffer[1],DataWidth)+adjs(buffer[3],DataWidth))<<2;
Register[2]=(adjs(buffer[2],DataWidth))*6;
aux0= Register[0] + Register[1] + Register[2];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
//par(i=1;i<(Retiming);i++)
//{
// aux[i]=aux[i-1];
//}
//Out= aux[Retiming-1];
Out= aux[0];
}
}
/*
% buffer - Buffer with the current pixel (center) and the neighborhood
% Out - Output value for the center element
%
% DESCRIPTION
% This function computes the filtered pixel (center) using the derivative
% kernel k = [1 2 1]/4. This is a gaussian low band pass filter.
% The Retiming is not used for the implementation because the performance
% was good enough.
%
% RETURN
%
*/
macro proc Weighting3(buffer,Out) // mask=[1 2 1]/4
{
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr PipeLatency=3+Retiming-1;
macro expr DivisorShift=1; // 2^2=4 but the whole derivative range is not used.
// --> one bits more is available
macro expr DataWidth=(width(buffer[0])+2);
signed DataWidth Register[2],aux0;
signed (width(Out)) aux[Retiming];
par
{
Register[0]=(adjs(buffer[0],DataWidth)+adjs(buffer[2],DataWidth));
Register[1]=(adjs(buffer[1],DataWidth))<<1;
aux0= Register[0] + Register[1];
// Rounding
if(sign(aux0))
aux[0]= ((aux0-((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
else
aux[0]= ((aux0+((signed)0@exp2(DivisorShift-1)))>> DivisorShift)<-(width(Out));
par(i=1;i<(Retiming);i++)
{
aux[i]=aux[i-1];
}
Out= aux[Retiming-1];
}
}
/*
% Input - Input value for the convolution
% Output - Result of the convolution
% KernelX - Kernel for the X convolution
% KernelY - Kernel for the Y convolution
% ColumnLength - Number of elements of each column
%
% DESCRIPTION
% This function computes the separable 2D convolution of the input.
% It stores 4 columns before performing it, with the current column
% they are 5. Then, the convolution is carried out using KernelX for
% for the rows and KernelY for the columns.
%
% RETURN
%
*/
macro proc SpatialConvolutions_optf(Input,Output,KernelX,KernelY, ColumnLength)
{
macro expr PipeLatency=6 + 2;
macro expr Retiming=1; // Retiming value = Retiming-1
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input))> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input))> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[4] with {block = "BlockRAM"};
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
signed (width(Input)) DataArrayX[5], DataArrayY[5] ;
static unsigned (log2ceil(MAX_RES_X/SCALE)) col=1, colbis=0;
signed (width(Output)) aux[Retiming];
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayX[4]=Input;
// Shift X data through array
par (i = 0; i != 4; i++)
{
DataArrayX[i] = DataArrayX[i+1];
}
KernelX(DataArrayX,DataArrayY[4]);
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Read data into array every cycle
par(r1=0;r1!=4;r1++)
{
// Fill data through array
DataArrayY[r1] = readRAM(adju(r1,3),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=4;r2++)
{
writeRAM(adju(r2,3),colbis,DataArrayY[r2+1]);
}
KernelY(DataArrayY,Output);
} // End Global par
}
/*
% Input0 - Input derivative (first element for the product)
% Input1 - Input derivative (second element for the product)
% Output - Product result of Input0xInput1 previously weighted
% ColumnLength - Number of columns of the input data
%
% DESCRIPTION
% This function computes the weighted product of the derivatives
% in Input0 and Input1, using a Weighting5 function. It requires
% storing 5 rows (the 4 in the MPRAM plus the current one). Then
% it performs the 2d separable convolution using the same kernel
% (Weighting5) for rows and columns
%
% RETURN
%
*/
macro proc WeightingMatrix_optf(Input0, Input1,Output, ColumnLength)
{
macro expr PipeLatency=6 + 2;
macro expr Retiming=1; // Retiming value = Retiming-1
macro expr Weigh=Weighting5; // Weighting5 or Weighting3
// Declare MPRAM and access macros
static mpram
{
rom <signed (width(Input0)*2)> Read[(MAX_RES_X/SCALE)]; // Read port
wom <signed (width(Input0)*2)> Write[(MAX_RES_X/SCALE)]; // Write port
} ColumnsBuffer[4] with {block = "BlockRAM"}; // 10, no 4
macro expr readRAM (row,col) = (ColumnsBuffer[row]).Read[col];
macro proc writeRAM (row,col,data)
{
(ColumnsBuffer[row]).Write[col]=data;
}
signed (width(Input0)*2) DataArrayX[5], DataArrayY[5] ; // 11, no 5
static unsigned (log2ceil(MAX_RES_X/SCALE)) col=1, colbis=0;
signed (width(Output)) aux[Retiming];
// Macro Begin
// ----------------------------------------------------
par
{
// Read data into array every cycle
DataArrayX[4]=adjs(Input0,2*width(Input0))*adjs(Input1,2*width(Input1));
// CASE (A): 5x5, 3x3 weighing function
// Shift X data through array
par (i = 0; i != 4; i++)
{
DataArrayX[i] = DataArrayX[i+1];
}
Weigh(DataArrayX,DataArrayY[4]);
/* :::::::::::::::::::::::::::::::::::::::::: */
// Operations by columns
col= col>=(ColumnLength-1) ? 0 : col+1;
colbis= col;
// Read data into array every cycle
par(r1=0;r1!=4;r1++)
{
// Fill data through array
DataArrayY[r1] = readRAM(adju(r1,3),col);
}
// Shift array and write data into block RAMs every cycle
par(r2=0;r2!=4;r2++)
{
writeRAM(adju(r2,3),colbis,DataArrayY[r2+1]);
}
Weigh(DataArrayY,Output);
} // End Global par
}
/*
% DataIn - Input value (from the three frames)
% st - Result of the spatial filter convolution
% dt - Result of the spatial derivative convolution
%
% DESCRIPTION
% This function computes the derivative and spatial filtering from
% the three pixel inputs separately.
%
% RETURN
%
*/
macro proc TemporalDerivative_optf(DataIn, dt, st)
{
par
{
// Processing & sending 3 frames
Prefilter3Taps(DataIn,st);
Diff3Taps(DataIn,dt);
}
}
/*
% FractionalShift - Number of bits for the precision of the division
% detTH - Energy threshold
% Axx - IxIx*weight
% Axy - IxIy*weight
% Ayy - IyIy*weight
% Axt - IxIt*weight
% Ayt - IyIt*weight
% VxOut - X Optical flow result
% VyOut - Y Optical flow result
%
% DESCRIPTION
% This function solves the system (see paper in main.hcc). To
% perform the division, a divisor is required. We use a CoreGenerator
% standard division core to improve the final performance. The number of
% bits of the division are set by FractionalShift. As we are always working
% with integer, to get float precision, we use shifts for the fractional part.
%
% RETURN
%
*/
macro proc FIXPOINTftu_optf(FractionalShift, detTH, Axx, Axy, Ayy, Axt, Ayt, VxOut, VyOut)
{
macro expr Retiming=7;
macro expr FRACTBITS=5;
macro expr FPSIZE=(2*width(Axx)+1);
macro expr PipeLatency=0;
macro expr MAX_24b = 16777215; //2^24 - 1
// fix-point data registers
signed FPSIZE velx, vely, detA, Aux0, Aux1, Aux2, Aux3, Aux4, Aux5;
signed DIVIDER_INPUT detAbis, velxbis, velybis;
signed DIVIDER_INPUT Vx_big, Vy_big;
signed (width(VxOut)) Vx[Retiming], Vy[Retiming];
unsigned 1 AbovedetTH[DIVIDER_LATENCY];
// Macro Begin
// ----------------------------------------------------
par
{
//Computing the values in the determinant
Aux0=adjs(Axy,FPSIZE)*adjs(Ayt,FPSIZE);
Aux1=adjs(Ayy,FPSIZE)*adjs(Axt,FPSIZE);
Aux2=adjs(Axx,FPSIZE)*adjs(Ayt,FPSIZE);
Aux3=adjs(Axy,FPSIZE)*adjs(Axt,FPSIZE);
Aux4=adjs(Axx,FPSIZE)*adjs(Ayy,FPSIZE);
Aux5=adjs(Axy,FPSIZE)*adjs(Axy,FPSIZE);
velx=(Aux0>>4)-(Aux1>>4);
vely=(Aux2>>4)-(Aux3>>4);
detA=(Aux4>>8)-(Aux5>>8);
if((detA > MAX_24b) || (velx > MAX_24b) || (vely > MAX_24b))//Reducing errors
par{
detAbis = 1; //TH is at least 1
velxbis = 1;
velybis = 1;
}
else
par{
detAbis = adjs(detA[FPSIZE-1]@detA[23:0], width(detAbis));
velxbis = adjs(velx[FPSIZE-1]@velx[23:0], width(velxbis));
velybis = adjs(vely[FPSIZE-1]@vely[23:0], width(velybis));
}
// New pipelined division unit
par
{
division_core(velxbis, detAbis, Vx_big);
//Vx_big = velxbis;
division_core(velybis, detAbis, Vy_big);
//Vy_big = velybis;
}
// Control detA > TH
AbovedetTH[0]=(detAbis) > ((signed)adju(detTH,DIVIDER_INPUT));
// delays for threshold and div (synchronization)
par(d=1;d<DIVIDER_LATENCY;d++)
{
AbovedetTH[d]=AbovedetTH[d-1];
}
//Energy threshold
if (AbovedetTH[DIVIDER_LATENCY-1]!=0)
par
{
Vx[0]=adjs(Vx_big, width(VxOut));
Vy[0]=adjs(-Vy_big, width(VyOut));
}
else
par
{
//Set to NaN (non valid values)
Vx[0]=SetNAN(VxOut);
Vy[0]=SetNAN(VyOut);
}
//Retiming stages (improving final performance)
par(k=1;k<Retiming;k++)
{
Vx[k]=Vx[k-1];
Vy[k]=Vy[k-1];
}
//Writing the outputs
VxOut=Vx[Retiming-1];
VyOut=Vy[Retiming-1];
}
}
/*
% Num - Numerator
% Den - Denominator
% Result - Quotient
%
% DESCRIPTION
% This function computes the division of Num and Den, obtaining the
% the quotient that is returned in result. It can be done using the
% standard Handel-C implementation, as simply result = Den/Num.
% The problem is that the performance is affected by the required
% logic and resources. This is why we are using a core from
% the core Generator. The interface is divider_25 because we are using
% 25 bits for the division to obtain a better precision.
%
% RETURN
%
*/
macro proc division_core(Num, Den, result)
{
// Enable for Cores
static signal unsigned 1 enable=0;
interface divider_25 (signed DIVIDER_INPUT quot, signed DIVIDER_INPUT remd, unsigned 1 rfd) divider(signed DIVIDER_INPUT dividend = Num,
signed DIVIDER_INPUT divisor = adjs(Den,DIVIDER_INPUT), unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
par
{
//Enabling division Core: only for 1 clock cycle
enable=1;
result = divider.quot;
}
}
/* lklib.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#ifndef __LKLIB__
#define __LKLIB__
#include "stdlib.hch"
#include "generic.hch"
#define XYTDERIVATIVESIZE 9
#define PIXELSIZE 8
#define DIVIDER_INPUT 25 //Input size of the divider core
#define DIVIDER_LATENCY DIVIDER_INPUT+4+1 //Latency of the divider core
macro proc Prefilter5Taps(buffer,Out);
macro proc Prefilter3Taps(buffer,Out);
macro proc Diff5Taps(buffer,Out);
macro proc Diff3Taps(buffer,Out);
macro proc Weighting(buffer,Out);
macro proc SpatialConvolutions_optf(Input,Output,KernelX,KernelY, ColumnLength);
macro proc WeightingMatrix_optf(Input0, Input1,Output, ColumnLength);
macro proc TemporalDerivative_optf(DataIn, dt, st);
macro proc FIXPOINTftu_optf(FractionalShift, detTH, Axx, Axy, Ayy, Axt, Ayt, VxOut, VyOut);
macro proc division_core(Num, Den, quot);
#endif
\ No newline at end of file
/* main.hcc
% Pixels - RGB Input from channel
% * Pixels[7:0] - Red color channel
% * Pixels[15:8] - Green color channel
% * Pixels[23:16] - Blue color channel
% Control - Control word with the different parameters:
% * Control[60:45] - Latencies for the feature estimation (gabor modules)
% * Control[44:36] - Thresholds for the feature estimation
% * Control[35:26] - Number of columns of the input images
% * Control[24:21] - Not used
% * Control[20:13] - Not used
% * Control[12:0] - Latency cycles of the pipeline
%
% RETURN
% Output - Energy, 4 orientation maps, and RG and BY color differences
%
% DESCRIPTION
% A Handel-C implementation of the idea of
% L. Itti and C. Koch, Computational modelling of visual attention, Nature Review Neuroscience,
% 2(3), pp. 194 – 203, 2001.
% F. Barranco, J. Diaz, B. Prieto, and E. Ros, Bottom-up visual attention model based on
% FPGA, in Electronics, Circuits and Systems (ICECS), pp. 328 – 331, 2012.
%
% Note that the paper describes most parameters of the algorithm and that it
% also describes a whole architecture for a coarse-to-fine estimation the saliency.
% This file represents the implementation for the feature maps that combined can allow
% the saliency estimation. We also include the normalization operator.
%
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
//Native Handle C libraries
#include "stdlib.hch"
//Handle C custom libraries
#include "cores.hch"
#include "channels.hch"
#include "GaborPrimitives.hch"
#include "generic.hch"
//#include "opticflow.hch" //I think we do not need it but it is included in the package
//#include "bilinear_warping_v2.hch" //I think we do not need it but it is included in the package
//Set the clock values here
//interface port_in (unsigned 1 clk with {clockport = 1}) ClockPort (); //clk =__clock) ClockPort() ;
//set clock = internal ClockPort.clk with { rate = 50 };
/****************************************************************
* Function : main *
****************************************************************/
void main(void)
{
macro expr adjust=36;
macro expr LATENCY_DIFFERENCE = 79-(35-3+1+8); //After removing atan2 cores from primitives (primitives_short)
UNSIGNED_CHANNEL Output;
UNSIGNED_CHANNEL Input;
unsigned int 24 Pixels;
signed int XYTDERIVATIVESIZE Data[3]; //3 color channels
signed int F_BITS fe[NORIENTATIONS], fo[NORIENTATIONS];
signed int F_BITS fetmp[NORIENTATIONS], fotmp[NORIENTATIONS];
signal <unsigned int 61> Control;
signal <unsigned 1> rst;
static unsigned int 4 nc=4;
unsigned int 10 Columns;
unsigned int 13 PipeLatency;
signed int 12 Threshold;
unsigned 9 Pr_Threshold;
unsigned 1 end, enable;
unsigned int 13 PipeDelay;
unsigned 21 counter;
unsigned 21 ImSize;
//static unsigned 16 Latencies=22583; //for the gabor modules
static signed XYTDERIVATIVESIZE threshold =25;// approx. 1/10 of max(R,G,B)
//New variables
signed (XYTDERIVATIVESIZE) Data_gray;
signed (XYTDERIVATIVESIZE+6) gray_value;
unsigned 9 energy;
unsigned int 9 orientation[NORIENTATIONS];
signed int CONV_BITS FNYNX[16];
signed (XYTDERIVATIVESIZE) SmoothPixel[NFRAMES];
signed XYTDERIVATIVESIZE R, G, B; //signed for the subsequent stages
unsigned 1 max_RGB_thd[DIVIDER_LATENCY];
signed XYTDERIVATIVESIZE max_RGB, max_RGB_1;
signed DIVIDER_INPUT R_1, G_1, B_1, min_RG, den, RG_num, BY_num, RG_pre, BY_pre;
signed DIVIDER_INPUT R_2, G_2, B_2, min_RG_1; //relative min
signed XYTDERIVATIVESIZE RG, BY, RG_out[LATENCY_DIFFERENCE], BY_out[LATENCY_DIFFERENCE], RG_last, BY_last;
par
{
//Call interface with Core local features for attention
InterfazCore_lf_attention(Input, Output, Control, ImSize);
//Running continuously
while(1)
{
par
{
enable=0;
end=0;
PipeDelay=0;
counter=0;
Latencies = Control[60:45];
Pr_Threshold = Control[44:36];
Columns = Control[35:26];
nc = Control[24:21]; //Not used
Threshold = (Control[20:13]==0) ? 0b011111111111 : ((signed 12) (0@Control[20:13])); //Not used
PipeLatency = Control[12:0];
}
do
{
// All the instruction being executed at the same time: long pipeline
// There is an initial latency:
par
{
enable=1;
// Reading parameters
Latencies = Control[60:45];
Pr_Threshold = Control[44:36];
Columns = Control[35:26];
nc = Control[24:21]; //Not used
Threshold = (Control[20:13]==0) ? 0b011111111111 : ((signed 12) (0@Control[20:13])); //Not used
PipeLatency = Control[12:0];
//Receive data (three pixels, RGB)
//s1
Receive(Input, Pixels);
//Extracting frame data
//s2
Data[0]= (signed) adju(Pixels[7:0], XYTDERIVATIVESIZE); //R value
Data[1]= (signed) adju(Pixels[15:8], XYTDERIVATIVESIZE);//G value
Data[2]= (signed) adju(Pixels[23:16], XYTDERIVATIVESIZE);//B value
par
{
//s3
//Computing the Gray value
//Constants multiplied by 64. Original formula: Gray = R*0.299 + G*0.587 + B*0.114
gray_value = adjs(Data[0], width(gray_value))*19 + adjs(Data[1], width(gray_value))*38 + adjs(Data[2], width(gray_value))*7;
//s4
//Adjusting the size
Data_gray = adjs(gray_value\\6, XYTDERIVATIVESIZE);
//s5
//-----------------------------------------------------------------
//Par for the Energy and Orientation features (based on Gabor filters)
//-----------------------------------------------------------------
GaborBase(Data_gray, FNYNX, Columns);
BuildGabor(FNYNX, fe, fo);
Primitives_short(fe, fo, energy, orientation, Pr_Threshold, Latencies);
}
par{
//Spatial convolutions: Using E. Simoncelli derivative and smoothing filters
par(f=0;f<NFRAMES;f++) //NFRAMES == 3
{
//Latency == 2*Columns + 11
//SpatialConvolutions(((signed)adju(Data[f], XYTDERIVATIVESIZE)), SmoothPixel[f], Prefilter5Taps, Prefilter5Taps, Columns);
SpatialConvolutions_last(Data[f], SmoothPixel[f], Prefilter5Taps, Prefilter5Taps, Columns);
}
//Splitting frame data
//-----------------------------------------------------------------
R = SmoothPixel[0]; //R value
G = SmoothPixel[1]; //G value
B = SmoothPixel[2]; //B value
//Compute maximum and minimum value for yellow and normalization
//Computing relative RGB maximum and relative RG minimum
//-----------------------------------------------------------------
if ( R > G)
par
{
min_RG = (signed DIVIDER_INPUT)(0@G); //using 18 bits (9 bits for the fractional part for the next division)
if(R > B)
par
{
max_RGB = R;
}
else
par
{
max_RGB = B;
}
}
else
par
{
min_RG = (signed DIVIDER_INPUT)(0@R); //using 18 bits (9 bits for the fractional part for the next division)
if(G > B)
par
{
max_RGB = G;
}
else
par
{
max_RGB = B;
}
}
par
{
R_1 = (signed DIVIDER_INPUT)(0@R);
G_1 = (signed DIVIDER_INPUT)(0@G);
B_1 = (signed DIVIDER_INPUT)(0@B);
}
//adjusting sizes for the division
par
{
R_2 = (signed)(R_1[DIVIDER_INPUT-1]@(R_1[DIVIDER_INPUT-2:0]<<9));
G_2 = (signed)(G_1[DIVIDER_INPUT-1]@(G_1[DIVIDER_INPUT-2:0]<<9));
B_2 = (signed)(B_1[DIVIDER_INPUT-1]@(B_1[DIVIDER_INPUT-2:0]<<9));
min_RG_1 = (signed)(min_RG[DIVIDER_INPUT-1]@(min_RG[DIVIDER_INPUT-2:0]<<9));
max_RGB_1 = max_RGB;
}
//Max_RGB inversion for normalization
//-----------------------------------------------------------------
par
{
RG_num = (R_2 - G_2); //DIVIDER_INPUT;
BY_num = (B_2 - min_RG_1);
max_RGB_thd[0]=(max_RGB_1 > threshold); //Discard unreliable values: less than 1/10 of max. intensity of the image
den = (signed DIVIDER_INPUT)(0@max_RGB_1);
}
//Normalize by the relative maximum
//-----------------------------------------------------------------
par
{
division_core(RG_num, den, RG_pre);//18-bit divider
division_core(BY_num, den, BY_pre);//18-bit divider
}
par(d=1;d<DIVIDER_LATENCY;d++)
{
max_RGB_thd[d]=max_RGB_thd[d-1];
}
//Discard the unreliable values and compute the RG and BY ones
//RG and BY: s + 1 + 6 = 8 bits (PSize)
//RG and BY: s + 2 + 6 = 8 bits (XYTDERIVATIVESIZE)
//-----------------------------------------------------------------
if (max_RGB_thd[DIVIDER_LATENCY-1]!=0)
par
{
////RG = adjs(RG_pre\\3, XYTDERIVATIVESIZE);
////BY = adjs(BY_pre\\3, XYTDERIVATIVESIZE);
RG = adjs(RG_pre\\2, XYTDERIVATIVESIZE);
BY = adjs(BY_pre\\2, XYTDERIVATIVESIZE);
}
else
par //unreliable values
{
RG=0;
BY=0;
}
//Delaying the result (3*Columns + 5) for synchronization
par{
Delaying(RG, RG_out[0], Columns);
Delaying(BY, BY_out[0], Columns);
}
//79 is the difference between the latency of the color opponency and the Energy+Orientation computation
par(cnt=1;cnt<LATENCY_DIFFERENCE;cnt++)
{
RG_out[cnt]=RG_out[cnt-1];
BY_out[cnt]=BY_out[cnt-1];
}
par
{
RG_last = RG_out[LATENCY_DIFFERENCE-1];
BY_last = BY_out[LATENCY_DIFFERENCE-1];
}
}
if(PipeDelay==(PipeLatency))
par
{
//Send the output (6 feature maps)
UnsignedSecureSend(Output, ((unsigned)energy)@((unsigned)orientation[0])@((unsigned)orientation[2])@((unsigned)orientation[4])@((unsigned)orientation[6])@((unsigned)RG_last)@((unsigned)BY_last));
//Pass only orientation[0], orientation[2], orientation[4] and orientation[6]: pi, 3*pi/2, 0 and pi/2
end=(counter==ImSize-1);
counter++;
}
else
PipeDelay++;
}
}while(!end);
} // End while(1)
} // end global par
}
#include <stdlib.hch>
#include "opticflow.hch"
macro proc CoreATAN2CORDIC_fl(y, x, enable, angle)
{
macro expr CoreWidth = ATAN2WIDTH;
macro expr CoreOutputWidth = ATAN2OUTWIDTH;
macro expr CoreLatency = ATAN2LATENCY;
/* component atan2cordic
port (
x_in: IN std_logic_VECTOR(20 downto 0);
y_in: IN std_logic_VECTOR(20 downto 0);
phase_out: OUT std_logic_VECTOR(20 downto 0);
clk: IN std_logic);
end component; */
interface ATAN2NAME (signed CoreOutputWidth phase_out) atan2(signed CoreWidth x_in=x,
signed CoreWidth y_in=y, unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
angle=atan2.phase_out;
}
macro proc CoreDIVIDER(my_dividend, my_divisor, result, enable)
{
macro expr DividerWidth = 21;
macro expr DividerOutputWidth = 21;
macro expr DividerLatency = 0;
interface DIVIDER_NAME (signed DividerOutputWidth quot, signed DividerOutputWidth remd, unsigned 1 rfd) divider(signed DividerWidth dividend = my_dividend,
signed DividerWidth divisor = my_divisor, unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
result=(divider.quot)<-FLOW_BITS;
}
macro proc CoreDIVIDER_2(my_dividend, my_divisor, result, enable)
{
macro expr DividerWidth = 27;
macro expr DividerOutputWidth = 27;
macro expr DividerLatency = 0;
interface DIVIDER_NAME_2 (signed DividerOutputWidth quot, signed DividerOutputWidth remd, unsigned 1 rfd) divider(signed DividerWidth dividend = my_dividend,
signed DividerWidth divisor = my_divisor, unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
result=(divider.quot)<-FLOW_BITS;
}
// Recursive unsigned vector addition with ballanced tree
//************************************************************************************
macro expr UnSumMacro(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom,Extend) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom, adju(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1,Extend) + RecurseAddAux(Array, Middle, Bottom,Extend));
in
RecurseAddAux(Array, Index, begin,Extend);
/*
macro expr SumMacro(Array, begin, Index,Extend) =
let macro expr RecurseAddAux(Array, Top, Bottom,Extend) =
let macro expr Middle = Bottom + (Top-Bottom)/2; in
select (Top == Bottom, adjs(Array[Top],Extend),
RecurseAddAux(Array, Top, Middle + 1,Extend) + RecurseAddAux(Array, Middle, Bottom,Extend));
in
RecurseAddAux(Array, Index, begin,Extend);
//***************************************************
//Macro component_velocity
//
//LATENCY = 4;
//
//
//bits format:
//IN: P[NFRAMES][NORIENT] --> sign-4-5
//
//OUT: FVreal[NFRAMES], FVimag[NFRAMES] --> sign-14-5
// LE[NFRAMES] --> sign-28-5
//
//***************************************************/
macro proc component_velocity(P, FVreal, FVimag, LE){
//***********************************************/
//Constant definitions for 3 frames
//***********************************************
macro expr PSize = 10;
macro expr SXX = 14;
macro expr SX = 6;
macro expr DEN = 6;
const int 3 XX[NFRAMES] = {1, 2, 3}; //XX3 is XX in the third dimension
const int 8 WREAL[NORIENTATIONS] = {-81, -75, -58, -31, 0, 31, 58, 75}; // 25 * {-F0 * cos(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 8 WIMAG[NORIENTATIONS] = {0, -31, -58, -75, -81, -75, -58, -31}; // 25 * {-F0 * sin(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
//***********************************************
//Declarations: Variables
//***********************************************
int (PSize+2) Sxy[NORIENTATIONS];
int (PSize+1) Sxy_0[NORIENTATIONS][NFRAMES];
int (PSize+1) Sy[NORIENTATIONS];
int PSize Sy_0[NORIENTATIONS][NFRAMES];
int (PSize+3) a[NORIENTATIONS];
int (PSize+5) a_0[NORIENTATIONS];
int (PSize+5) a_1[NORIENTATIONS];
int (PSize+12) a_2[NORIENTATIONS];
int (PSize+2) b[NORIENTATIONS];
int (PSize+4) b_0[NORIENTATIONS];
int (PSize+4) b_1[NORIENTATIONS];
int (PSize+11) b_2[NORIENTATIONS];
int (PSize+3) a3_0[NORIENTATIONS];
int (PSize+2) bs3[NORIENTATIONS];
int (PSize+2) bs3_1[NORIENTATIONS];
int (PSize+2) bs4_0[NORIENTATIONS];
int (PSize+2) bs4_1[NORIENTATIONS];
int (PSize+3) Reg[NFRAMES][NORIENTATIONS];
int (PSize+3) Reg_0[NFRAMES][NORIENTATIONS];
//Pipeline auxiliary variables
int PSize Ps0[NFRAMES][NORIENTATIONS];
int PSize Ps1[NFRAMES][NORIENTATIONS];
int PSize Ps2[NFRAMES][NORIENTATIONS];
int PSize Ps2_1[NFRAMES][NORIENTATIONS];
int PSize Ps2_2[NFRAMES][NORIENTATIONS];
int PSize Ps3[NFRAMES][NORIENTATIONS];
int PSize Ps3_1[NFRAMES][NORIENTATIONS];
int (PSize+3) LE_0[NORIENTATIONS][NFRAMES];
int (2*PSize) LE_1[NORIENTATIONS][NFRAMES];
//***********************************************
//Body of the function
//***********************************************
par(orien=0;orien<NORIENTATIONS;orien++)
{
//Pipeline Stage 0
par(f=0;f<NFRAMES;f++)
{
Sxy_0[orien][f] = adjs(P[f][orien],width(Sxy_0))*adjs(XX[f],width(Sxy_0));
Sy_0[orien][f] = P[f][orien];
//Copying P for the next stage
Ps0[f][orien]=P[f][orien];
}
//Pipeline Stage 1
par
{
//Sxy[orien] = (adjs(P[0][orien],width(Sxy))*adjs(XX[0],width(Sxy)) + adjs(P[1][orien],width(Sxy))*adjs(XX[1],width(Sxy)) + adjs(P[2][orien],width(Sxy))*adjs(XX[2],width(Sxy)) + adjs(P[3][orien],width(Sxy))*adjs(XX[3],width(Sxy)) + adjs(P[4][orien],width(Sxy))*adjs(XX[4],width(Sxy)));
Sxy[orien] = SumMacro(Sxy_0[orien], 0, NFRAMES-1,width(Sxy));
//Sy[orien] = adjs(P[0][orien],width(Sy)) + adjs(P[1][orien],width(Sy)) + adjs(P[2][orien],width(Sy)) + adjs(P[3][orien],width(Sy)) + adjs(P[4][orien],width(Sy));
Sy[orien] = SumMacro(Sy_0[orien], 0, NFRAMES-1,width(Sy));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps1[f][orien]=Ps0[f][orien];
}
}
//Pipeline Stage 2_0
par
{
a_0[orien] = SXX*adjs(Sy[orien],width(a_0));
b_0[orien] = NFRAMES*adjs(Sxy[orien],width(b_0));
a_1[orien] = SX*adjs(Sxy[orien],width(a_1));
b_1[orien] = SX*adjs(Sy[orien],width(b_1));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2[f][orien]=Ps1[f][orien];
}
}
//Pipeline Stage 2_1
par
{
//a_2[orien] = (a_1[orien]-a_2[orien])*5; // 5 frames
//b_2[orien] = (b_1[orien]-b_2[orien])*5; // multiplied by 5 for following /50 division that become <<8 : 5/256 ~= 1/50
a_2[orien] = (adjs(a_0[orien],width(a_2))-adjs(a_1[orien],width(a_2))); // 3 frames
b_2[orien] = (adjs(b_0[orien],width(b_2))-adjs(b_1[orien],width(b_2)));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2_1[f][orien]=Ps2[f][orien];
}
}
//Pipeline Stage 2_2
par
{
//Using 5 decimals for a and b (*25)
//a[orien] = (adjs(SXX,PSize+15)*32*adjs(Sy[orien],PSize+15) - adjs(SX,PSize+15)*32*adjs(Sxy[orien],PSize+15))/adjs(DEN,PSize+15);
//a[orien] = a_2[orien]<<8; //for 5 frames
//a[orien] = a_2[orien]<<3; //for 3 frames
//a[orien] = adjs(((a_2[orien])*21)>>7,width(a)); //for 3 frames
a[orien] = adjs((a_2[orien]*43)>>8,width(a)); //for 3 frames
//b[orien] = adjs((NFRAMES*32*adjs(Sxy[orien],PSize+13) - adjs(SX,PSize+13)*32*adjs(Sy[orien],PSize+13))/adjs(DEN,PSize+13), width(b));
//b[orien] = adjs(b_2[orien]<<8,width(b)); // for 5 frames
//b[orien] = adjs(((b_2[orien])*21)>>7,width(b)); // for 3 frames
b[orien] = adjs((b_2[orien]*43)>>8,width(b)); // for 3 frames
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2_2[f][orien]=Ps2_1[f][orien];
}
}
//Pipeline Stage 3_0
par
{
par(f=0;f<NFRAMES;f++)
{
Reg_0[f][orien] = adjs(b[orien],width(Reg_0))*adjs(XX[f],width(Reg_0));
//Copying P for the next stage
Ps3[f][orien]=Ps2_2[f][orien];
}
//Copying b for the next stage
bs3[orien]=b[orien];
a3_0[orien] = a[orien];
}
//Pipeline Stage 3_1
par
{
par(f=0;f<NFRAMES;f++)
{
//Reg[fr][orien] = adjs(a[orien],width(Reg))+ adjs(b[orien],width(Reg))*adjs(XX[fr],width(Reg));
Reg[f][orien] = adjs(a3_0[orien],width(Reg))+ adjs(Reg_0[f][orien],width(Reg));
//Copying P for the next stage
Ps3_1[f][orien]=Ps3[f][orien];
}
//Copying b for the next stage
bs3_1[orien]=bs3[orien];
}
//Pipeline Stage 4_0
par
{
par(f=0;f<NFRAMES;f++)
{
LE_0[orien][f] = adjs(Reg[f][orien],width(LE_0)) - adjs(Ps3_1[f][orien], width(LE_0));
}
//Copying b for the next stage
bs4_0[orien]=bs3_1[orien];
}
//Pipeline Stage 4_1
par
{
par(f=0;f<NFRAMES;f++)
{
LE_1[orien][f] = adjs(LE_0[orien][f],width(LE_1))*adjs(LE_0[orien][f],width(LE_1));
}
//Copying b for the next stage
bs4_1[orien]=bs4_0[orien];
}
//Pipeline Stage 4_2
par
{
//LE[orien] = adjs(((((adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien], 2*PSize+26)*32)*(adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien],2*PSize+26)*32) + (adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32)*(adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32) + (adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32)*(adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32) + (adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32)*(adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32) + (adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32)*(adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32))/NFRAMES)\\15), width(LE));
//LE[orien] = adjs(((SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize)*21)>>6)\\2,width(LE));
LE[orien] = adjs(((SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize+2)*85)>>8)\\2,width(LE));
//LE[orien]=adjs(((LE_1[0][orien]+LE_1[1][orien]+LE_1[2][orien])>>2)\\15,width(LE));
//LE[orien] = adjs(SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize+26)\\15, width(LE));
//Simplifying the equation: FVreal = - (F0*cos(ang)/2*PI)*b[orien] --> FVreal = Wreal[orien]*b[orien] //Wreal is initialised with factor 25
// FVimag = - (F0*sin(ang)/2*PI)*b[orien] --> FVimag = Wimag[orien]*b[orien] //Wreal is initialised with factor 25
//FVreal[orien]= (adjs(bs4_1[orien],PSize+8)*adjs(WREAL[orien],PSize+8))\\6; //final size of FVreal is PSize+18
//FVimag[orien]= (adjs(bs4_1[orien],PSize+8)*adjs(WIMAG[orien],PSize+8))\\6; //final size of FVimag is PSize+18
//FVreal[orien]= ((adjs(bs4_1[orien],PSize+8)*adjs(WREAL[orien],PSize+8))\\2)<-width(FVreal); //final size of FVreal is PSize+18
//FVimag[orien]= ((adjs(bs4_1[orien],PSize+8)*adjs(WIMAG[orien],PSize+8))\\2)<-width(FVimag); //final size of FVimag is PSize+18
FVreal[orien]= ((adjs(bs4_1[orien],PSize+10)*adjs(WREAL[orien],PSize+10))\\4)<-width(FVreal); //final size of FVreal is PSize+18
FVimag[orien]= ((adjs(bs4_1[orien],PSize+10)*adjs(WIMAG[orien],PSize+10))\\4)<-width(FVimag); //final size of FVimag is PSize+18
}
}
}
//***************************************************
//Macro component_velocity
//
//LATENCY = 4;
//
//
//bits format:
//IN: P[NFRAMES][NORIENT] --> sign-4-5
//
//OUT: FVreal[NFRAMES], FVimag[NFRAMES] --> sign-14-5
// LE[NFRAMES] --> sign-28-5
//
//***************************************************
macro proc component_velocity_mia(P, FVreal, FVimag, LE){
//***********************************************
//Constant definitions
//***********************************************
/* macro expr PSize = 10;
const int 7 SXX = 55;
const int 5 SX = 15;
const int 7 DEN = 50;
macro expr NORIENT = 8;
macro expr NFRAMES = 5;
const int 4 XX[NFRAMES] = {1, 2, 3, 4, 5}; //XX3 is XX in the third dimension
const int 6 WREAL[NORIENT] = {-20, -19, -14, -8, 0, 8, 14, 19}; // 2^5 * {-F0 * cos(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 6 WIMAG[NORIENT] = {0, -8, -14, -19, -20, -19, -14, -8}; // 2^5 * {-F0 * sin(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
*/
//***********************************************/
//Constant definitions for 3 frames
//***********************************************
macro expr PSize = 10;
const int 7 SXX = 14;
const int 5 SX = 6;
const int 7 DEN = 6;
const int 4 XX[NFRAMES] = {1, 2, 3}; //XX3 is XX in the third dimension
const int 6 WREAL[NORIENTATIONS] = {-20, -19, -14, -8, 0, 8, 14, 19}; // 2^5 * {-F0 * cos(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 6 WIMAG[NORIENTATIONS] = {0, -8, -14, -19, -20, -19, -14, -8}; // 2^5 * {-F0 * sin(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
//***********************************************
//***********************************************
//Declarations: Variables
//***********************************************
//unsigned int 3 orien;
//unsigned int 3 fr;
//R
int (PSize+3) Sxy[NORIENTATIONS];
int (PSize+4) Sy[NORIENTATIONS];
int (PSize+15) a[NORIENTATIONS]; //(PSize+10)Q5
int (PSize+13) b[NORIENTATIONS]; //(PSize+8)Q5
int (PSize+15) Reg[NFRAMES][NORIENTATIONS]; //(PSize+11)Q5 - 5 --> because a, b are splited by DEN (==50)
//\R
//Pipeline auxiliary variables
int PSize Ps1[NFRAMES][NORIENTATIONS];
int PSize Ps2[NFRAMES][NORIENTATIONS];
int PSize Ps3[NFRAMES][NORIENTATIONS];
int (PSize+13) bs3[NORIENTATIONS];
//***********************************************
//Body of the function
//***********************************************
par(orien=0;orien<NORIENTATIONS;orien++)
{
//Pipeline Stage 1
par
{
Sxy[orien] = adjs(P[0][orien],width(Sxy))*adjs(XX[0],width(Sxy)) + adjs(P[1][orien],width(Sxy))*adjs(XX[1],width(Sxy)) + adjs(P[2][orien],width(Sxy))*adjs(XX[2],width(Sxy)); //+ adjs(P[3][orien],width(Sxy))*adjs(XX[3],width(Sxy)) + adjs(P[4][orien],width(Sxy))*adjs(XX[4],width(Sxy)));
Sy[orien] = adjs(P[0][orien],width(Sy)) + adjs(P[1][orien],width(Sy)) + adjs(P[2][orien],width(Sy));// + adjs(P[3][orien],width(Sy)) + adjs(P[4][orien],width(Sy));
//Copying P for the next stage
par(fr=0;fr<NFRAMES;fr++)
{
Ps1[fr][orien]=P[fr][orien];
}
}
//Pipeline Stage 2
par
{
//Using 5 decimals for a and b (*2^5)
a[orien] = (adjs(SXX,PSize+15)*32*adjs(Sy[orien],PSize+15) - adjs(SX,PSize+15)*32*adjs(Sxy[orien],PSize+15))/adjs(DEN,PSize+15);
b[orien] = adjs((NFRAMES*32*adjs(Sxy[orien],PSize+13) - adjs(SX,PSize+13)*32*adjs(Sy[orien],PSize+13))/adjs(DEN,PSize+13), width(b));
//Copying P for the next stage
par(fr=0;fr<NFRAMES;fr++)
{
Ps2[fr][orien]=Ps1[fr][orien];
}
}
//Pipeline Stage 3
par
{
par(fr=0;fr<NFRAMES;fr++)
{
Reg[fr][orien] = adjs(a[orien],width(Reg))+ adjs(b[orien],width(Reg))*adjs(XX[fr],width(Reg));
//Copying P for the next stage
Ps3[fr][orien]=Ps2[fr][orien];
}
//Copying b for the next stage
bs3[orien]=b[orien];
}
//Pipeline Stage 4
par
{
LE[orien] = adjs(((((adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien], 2*PSize+26)*32)*(adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien],2*PSize+26)*32) + (adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32)*(adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32) + (adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32)*(adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32))/NFRAMES)\\15), width(LE)); //+ (adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32)*(adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32) + (adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32)*(adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32))/NFRAMES)\\15), width(LE));
//Simplifying the equation: FVreal = - (F0*cos(ang)/2*PI)*b[orien] --> FVreal = Wreal[orien]*b[orien] //Wreal is initialised with factor 2^5
// FVimag = - (F0*sin(ang)/2*PI)*b[orien] --> FVimag = Wimag[orien]*b[orien] //Wreal is initialised with factor 2^5
FVreal[orien]= (adjs(bs3[orien],PSize+20)*adjs(WREAL[orien],PSize+20))\\10; //final size of FVreal is PSize+18
FVimag[orien]= (adjs(bs3[orien],PSize+20)*adjs(WIMAG[orien],PSize+20))\\10; //final size of FVimag is PSize+18
}
}
}
//***************************************************
//Macro compute_phase
//
//LATENCY = 14;
//
//bits format:
//IN: Greal, Gimag --> sign-8-1
//
//OUT: P --> sign-2-6
//
//***************************************************
macro proc compute_phase(Greal, Gimag, P){
macro expr PipeLatency=ATAN2LATENCY;
unsigned int 5 PipeDelay;
signed 10 aux[NFRAMES][NORIENTATIONS]; //, auxGimag[NFRAMES][NORIENTATIONS], auxGreal[NFRAMES][NORIENTATIONS];
static signal unsigned 1 enable=0;
/*/Interface definition
interface atan(int 9 phase_out)
myatan(int 10 x_in=a, int 10 y_in=b, unsigned 1 clk= __clock) with {busformat="BI"}; */
par(orien=0; orien<NORIENTATIONS;orien++)
{
//Enabling atan2 Core: only for 1 clock cycle
enable=1;
//atan2 - core generator:
//Inputs have to be in [-1, 1]
//Outputs are in [-PI, PI]
par(fr=0;fr<NFRAMES;fr++)
{
/*if(abs(Gimag[orien][fr])> abs(Greal[orien][fr])){
par
{
//Pipeline Stage 1
a=Gimag[orien][fr]/(abs(Gimag[orien][fr])+1);
b=Greal[orien][fr]/(abs(Gimag[orien][fr])+1);
//Pipeline Stage 2
P[orien][fr]=myatan.phase_out; //Latency = 13
}
}
else{
par
{
//Pipeline Stage 1
a=Gimag[orien][fr]/(abs(Greal[orien][fr])+1);
b=Greal[orien][fr]/(abs(Greal[orien][fr])+1);
//Pipeline Stage 2
P[orien][fr]=myatan.phase_out; //Latency = 13
}
} */
/*/ to remove undet values of atan2(0,0)
if(Gimag[fr][orien]==0 && Greal[fr][orien]==0)
par
{
auxGimag[fr][orien]=0;
auxGreal[fr][orien]=511;
}
else
par
{
auxGimag[fr][orien]=Gimag[fr][orien];
auxGreal[fr][orien]=Greal[fr][orien];
} */
CoreATAN2CORDIC_fl(Gimag[fr][orien], Greal[fr][orien], enable, aux[fr][orien]);
if(PipeDelay==PipeLatency)
P[fr][orien] = (aux[fr][orien])\\1;
else
PipeDelay++;
}
}
}
macro proc compute_phase_top(Greal, Gimag, P, index)
{
signed F_BITS auxGreal[NORIENTATIONS], auxGimag[NORIENTATIONS];
signed 9 P_Tmp[NORIENTATIONS];
par
{
seq
{
seq(i=0; i<NFRAMES-1; i++)
{
delay;
}
par(s=0;s<NORIENTATIONS;s++)
{
auxGreal[s]=Greal[s];
auxGimag[s]=Gimag[s];
}
}
seq
{
ifselect(index!=0)
{
seq(t=0; t<index; t++)
{
delay;
}
}
function_compute_phase(auxGreal,auxGimag, P_Tmp);
ifselect(index!=NFRAMES-1)
{
seq(k=index; k<NFRAMES-1; k++)
{
delay;
}
}
}//seq
par(o=0;o<NORIENTATIONS;o++)
{
P[o]=P_Tmp[o];
}
} // par
}
void function_compute_phase(signed int F_BITS (*Greal),signed int F_BITS (*Gimag), signed int 9 *P)
{
compute_phase_index(Greal,Gimag,P);
}
//***************************************************
//Macro compute_phase
//
//LATENCY = 14;
//
//bits format:
//IN: Greal, Gimag --> sign-8-1
//
//OUT: P --> sign-2-6
//
//***************************************************
macro proc compute_phase_index(Greal, Gimag, P){
macro expr PipeLatency=ATAN2LATENCY;
unsigned int 5 PipeDelay;
signed 10 aux[NORIENTATIONS]; //, auxGimag[NFRAMES][NORIENTATIONS], auxGreal[NFRAMES][NORIENTATIONS];
static signal unsigned 1 enable=0;
par(orien=0; orien<NORIENTATIONS;orien++)
{
//Enabling atan2 Core: only for 1 clock cycle
enable=1;
CoreATAN2CORDIC_fl(Gimag[orien], Greal[orien], enable, aux[orien]);
if(PipeDelay==PipeLatency)
P[orien] = (aux[orien])\\1;
else
PipeDelay++;
}//par
}
//***************************************************
//Macro compute_single_phase
//
//LATENCY = 14;
//
//bits format:
//IN: Greal, Gimag --> sign-8-1
//
//OUT: P --> sign-2-6
//
//***************************************************
macro proc compute_single_phase(Greal, Gimag, P)
{
macro expr PipeLatency=ATAN2LATENCY;
unsigned int 5 PipeDelay;
signed 10 aux[NORIENTATIONS]; //, auxGimag[NFRAMES][NORIENTATIONS], auxGreal[NFRAMES][NORIENTATIONS];
static signal unsigned 1 enable=0;
/*/Interface definition
interface atan(int 9 phase_out)
myatan(int 10 x_in=a, int 10 y_in=b, unsigned 1 clk= __clock) with {busformat="BI"}; */
par(orien=0; orien<NORIENTATIONS;orien++)
{
//Enabling atan2 Core: only for 1 clock cycle
enable=1;
//atan2 - core generator:
//Inputs have to be in [-1, 1]
//Outputs are in [-PI, PI]
CoreATAN2CORDIC_fl(Gimag[orien], Greal[orien], enable, aux[orien]);
if(PipeDelay==PipeLatency)
P[orien] = (aux[orien])\\1;
else
PipeDelay++;
}
}
//***************************************************
//Macro unwrap
//
//LATENCY = 12;
//
//bits format:
//IN: Pin [NORIENT][NFRAMES] --> sign-2-6
//
//OUT: Pout[NORIENT][NFRAMES] --> sign-4-5
//
//***************************************************
macro proc unwrap(Pin, Pout){
//***********************************************
//Constant definitions
//***********************************************
macro expr DOUBLE_PI = 402;
//macro expr PI = 201;
macro expr PSize = 10;
macro expr NORIENT = 8;
//macro expr NFRAMES = 5;
//***********************************************
//Declarations: Variables
//***********************************************
unsigned 3 fr;
unsigned 3 orien;
//static unsigned int 3 cur_frame= 1;
//unsigned int 3 cur_frame;
unsigned int 1 A[NORIENT];
int PSize D[NORIENT];
unsigned int 3 cf;
int (PSize+1) Pin_2[NFRAMES][NORIENT];
int (PSize+2) Pin_3[NFRAMES][NORIENT];
int (PSize+3) Pin_4[NFRAMES][NORIENT];
int (PSize+2) D_2[NORIENT];
int (PSize+3) D_3[NORIENT];
int (PSize+4) D_4[NORIENT];
unsigned int 1 A_2[NORIENT];
unsigned int 1 A_3[NORIENT];
unsigned int 1 A_4[NORIENT];
//Pipeline auxiliary variable declarations
int PSize Ds2[NORIENT];
int (PSize+2) D_2s5[NORIENT];
int (PSize+3) D_3s8[NORIENT];
int (PSize+4) D_4s11[NORIENT];
int (PSize-1) Pins1[NFRAMES][NORIENT];
int (PSize-1) Pins2[NFRAMES][NORIENT];
int (PSize+1) Pin_2s4[NFRAMES][NORIENT];
int (PSize+1) Pin_2s5[NFRAMES][NORIENT];
int (PSize+2) Pin_3s7[NFRAMES][NORIENT];
int (PSize+2) Pin_3s8[NFRAMES][NORIENT];
int (PSize+3) Pin_4s10[NFRAMES][NORIENT];
int (PSize+3) Pin_4s11[NFRAMES][NORIENT];
int PSize Pouts1[NORIENT];
int PSize Pouts2[NORIENT];
int PSize Pouts3[NORIENT];
int PSize Pouts4[NORIENT];
int PSize Pouts5[NORIENT];
int PSize Pouts6[NORIENT];
int PSize Pouts7[NORIENT];
int PSize Pouts8[NORIENT];
int PSize Pouts9[NORIENT];
int PSize Pouts10[NORIENT];
int PSize Pouts11[NORIENT];
int PSize Pout_1s4[NORIENT];
int PSize Pout_1s5[NORIENT];
int PSize Pout_1s6[NORIENT];
int PSize Pout_1s7[NORIENT];
int PSize Pout_1s8[NORIENT];
int PSize Pout_1s9[NORIENT];
int PSize Pout_1s10[NORIENT];
int PSize Pout_1s11[NORIENT];
int PSize Pout_2s7[NORIENT];
int PSize Pout_2s8[NORIENT];
int PSize Pout_2s9[NORIENT];
int PSize Pout_2s10[NORIENT];
int PSize Pout_2s11[NORIENT];
int PSize Pout_3s10[NORIENT];
int PSize Pout_3s11[NORIENT];
//Initialisations
//cur_frame=1;
//***********************************************
//Body of the function
//***********************************************
par(orien=0;orien<NORIENT;orien++)
{
//-------------------------------------------
//CURRENT FRAME == 1
//Pipeline Stage 1
par
{
//Writing Pout[0]
Pouts1[orien] = adjs((Pin[0][orien])\\1, width(Pout));
D[orien] = adjs(Pin[1][orien],width(D)) - adjs(Pin[0][orien],width(D));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pins1[fr][orien]=Pin[fr][orien];
}
/////////////////////////////////////////
}
//Pipeline Stage 2
par
{
A[orien] = abs(D[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pins2[fr][orien]=Pins1[fr][orien];
}
Ds2[orien]=D[orien];
//Writing Pout[0]
Pouts2[orien] = Pouts1[orien];
////////////////////////////////////////
}
//Pipeline Stage 3
par
{
par(cf=1; cf<NFRAMES;cf++)
{
Pin_2[cf][orien]=adjs(Pins2[cf][orien],width(Pin_2)) - DOUBLE_PI*(adjs(sign(Ds2[orien]),width(Pin_2))*2+1) * (signed)adju(A[orien],width(Pin_2));
}
//cur_frame=cur_frame+1;
//Writing Pout[0]
Pouts3[orien] = Pouts2[orien];
}
//-------------------------------------------
//CURRENT FRAME == 2
//Pipeline Stage 4
par
{
//Writing Pout[1]
Pout_1s4[orien] = adjs((Pin_2[1][orien])\\1, width(Pout));
D_2[orien] = adjs(Pin_2[2][orien],width(D_2)) - adjs(Pin_2[1][orien],width(D_2));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_2s4[fr][orien]=Pin_2[fr][orien];
}
/////////////////////////////////////////
Pouts4[orien]=Pouts3[orien];
}
//Pipeline Stage 5
par
{
A_2[orien] = abs(D_2[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_2s5[fr][orien]=Pin_2s4[fr][orien];
}
D_2s5[orien]=D_2[orien];
Pout_1s5[orien]=Pout_1s4[orien];
Pouts5[orien]=Pouts4[orien];
////////////////////////////////////////
}
//Pipeline Stage 6
par
{
par(cf=2; cf<NFRAMES;cf++)
{
Pin_3[cf][orien]=adjs(Pin_2s5[cf][orien],width(Pin_3)) - DOUBLE_PI*(adjs(sign(D_2s5[orien]),width(Pin_3))*2+1) * (signed)adju(A_2[orien],width(Pin_3));
}
//cur_frame=cur_frame+1;
Pout_1s6[orien]=Pout_1s5[orien];
Pouts6[orien]=Pouts5[orien];
}
//-------------------------------------------
//CURRENT FRAME == 3
//Pipeline Stage 7
par
{
//Writing Pout[2]
Pout_2s7[orien] = adjs((Pin_3[2][orien])\\1, width(Pout));
D_3[orien] = adjs(Pin_3[3][orien],width(D_3)) - adjs(Pin_3[2][orien],width(D_3));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_3s7[fr][orien]=Pin_3[fr][orien];
}
Pout_1s7[orien]=Pout_1s6[orien];
Pouts7[orien]=Pouts6[orien];
/////////////////////////////////////////
}
//Pipeline Stage 8
par
{
A_3[orien] = abs(D_3[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_3s8[fr][orien]=Pin_3s7[fr][orien];
}
D_3s8[orien]=D_3[orien];
Pout_2s8[orien]=Pout_2s7[orien];
Pout_1s8[orien]=Pout_1s7[orien];
Pouts8[orien]=Pouts7[orien];
////////////////////////////////////////
}
//Pipeline Stage 9
par
{
par(cf=3; cf<NFRAMES;cf++)
{
Pin_4[cf][orien]=adjs(Pin_3s8[cf][orien],width(Pin_4)) - DOUBLE_PI*(adjs(sign(D_3s8[orien]),width(Pin_4))*2+1) * (signed)adju(A_3[orien],width(Pin_4));
}
//cur_frame=cur_frame+1;
Pout_2s9[orien]=Pout_2s8[orien];
Pout_1s9[orien]=Pout_1s8[orien];
Pouts9[orien]=Pouts8[orien];
}
//-------------------------------------------
//CURRENT FRAME == 4
//Pipeline Stage 10
par
{
//Writing Pout[3]
Pout_3s10[orien] = adjs((Pin_4[3][orien])\\1, width(Pout));
D_4[orien] = adjs(Pin_4[4][orien],width(D_4)) - adjs(Pin_4[3][orien],width(D_4));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_4s10[fr][orien]=Pin_4[fr][orien];
}
Pout_2s10[orien]=Pout_2s9[orien];
Pout_1s10[orien]=Pout_1s9[orien];
Pouts10[orien]=Pouts9[orien];
/////////////////////////////////////////
}
//Pipeline Stage 11
par
{
A_4[orien] = abs(D_4[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_4s11[fr][orien]=Pin_4s10[fr][orien];
}
D_4s11[orien]=D_4[orien];
Pout_3s11[orien]=Pout_3s10[orien];
Pout_2s11[orien]=Pout_2s10[orien];
Pout_1s11[orien]=Pout_1s10[orien];
Pouts11[orien]=Pouts10[orien];
////////////////////////////////////////
}
//Pipeline Stage 12
//Writing Pout[0..4]
par
{
Pout[0][orien]=Pouts11[orien];
Pout[1][orien]=Pout_1s11[orien];
Pout[2][orien]=Pout_2s11[orien];
Pout[3][orien]=Pout_3s11[orien];
Pout[4][orien]=adjs((adjs(Pin_4s11[4][orien],width(Pin_4s11)) - DOUBLE_PI*(adjs(sign(D_4s11[orien]),width(Pin_4s11))*2+1)*(signed)adju(A_4[orien],width(Pin_4s11)))\\1, width(Pout));
}
}//par orient
}
//***************************************************
//Macro unwrap_3: version for 3 frames
//
//LATENCY = 6;
//
//bits format:
//IN: Pin [NORIENT][NFRAMES] --> sign-2-6
//
//OUT: Pout[NORIENT][NFRAMES] --> sign-4-5
//
//***************************************************
macro proc unwrap_3(Pin, Pout){
//***********************************************
//Constant definitions
//***********************************************
macro expr DOUBLE_PI = 402;
//macro expr PI = 201;
macro expr PSize = 10;
//***********************************************
//Declarations: Variables
//***********************************************
unsigned 3 fr;
unsigned 3 orien;
//static unsigned int 3 cur_frame= 1;
//unsigned int 3 cur_frame;
unsigned int 1 A[NORIENTATIONS];
int PSize D[NORIENTATIONS];
unsigned int 3 cf;
int (PSize+1) Pin_2[NFRAMES][NORIENTATIONS];
int (PSize+2) Pin_3[NFRAMES][NORIENTATIONS];
int (PSize+3) Pin_4[NFRAMES][NORIENTATIONS];
int (PSize+2) D_2[NORIENTATIONS];
int (PSize+3) D_3[NORIENTATIONS];
int (PSize+4) D_4[NORIENTATIONS];
unsigned int 1 A_2[NORIENTATIONS], A_3[NORIENTATIONS], A_4[NORIENTATIONS];
//Pipeline auxiliary variable declarations
int PSize Ds2[NORIENTATIONS];
int (PSize+2) D_2s5[NORIENTATIONS];
int (PSize-1) Pins1[NFRAMES][NORIENTATIONS], Pins2[NFRAMES][NORIENTATIONS];
int (PSize+1) Pin_2s4[NFRAMES][NORIENTATIONS], Pin_2s5[NFRAMES][NORIENTATIONS];
int PSize Pouts1[NORIENTATIONS], Pouts2[NORIENTATIONS], Pouts3[NORIENTATIONS], Pouts4[NORIENTATIONS], Pouts5[NORIENTATIONS];
int PSize Pout_1s4[NORIENTATIONS], Pout_1s5[NORIENTATIONS];
//***********************************************
//Body of the function
//***********************************************
par(orien=0;orien<NORIENTATIONS;orien++)
{
//-------------------------------------------
//CURRENT FRAME == 1
//Pipeline Stage 1
par
{
//Writing Pout[0]
Pouts1[orien] = adjs((Pin[0][orien])\\1, width(Pout));
D[orien] = adjs(Pin[1][orien],width(D)) - adjs(Pin[0][orien],width(D));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pins1[fr][orien]=Pin[fr][orien];
}
/////////////////////////////////////////
}
//Pipeline Stage 2
par
{
A[orien] = abs(D[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pins2[fr][orien]=Pins1[fr][orien];
}
Ds2[orien]=D[orien];
//Writing Pout[0]
Pouts2[orien] = Pouts1[orien];
////////////////////////////////////////
}
//Pipeline Stage 3
par
{
par(cf=1; cf<NFRAMES;cf++)
{
Pin_2[cf][orien]=adjs(Pins2[cf][orien],width(Pin_2)) - DOUBLE_PI*(adjs(sign(Ds2[orien]),width(Pin_2))*2+1) * (signed)adju(A[orien],width(Pin_2));
}
//cur_frame=cur_frame+1;
//Writing Pout[0]
Pouts3[orien] = Pouts2[orien];
}
//-------------------------------------------
//CURRENT FRAME == 2
//Pipeline Stage 4
par
{
//Writing Pout[1]
Pout_1s4[orien] = adjs((Pin_2[1][orien])\\1, width(Pout));
D_2[orien] = adjs(Pin_2[2][orien],width(D_2)) - adjs(Pin_2[1][orien],width(D_2));
//Copying Pin for the next stage/////////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_2s4[fr][orien]=Pin_2[fr][orien];
}
/////////////////////////////////////////
Pouts4[orien]=Pouts3[orien];
}
//Pipeline Stage 5
par
{
A_2[orien] = abs(D_2[orien])>(PI);
//Copying Pin, D for the next stage/////
par(fr=0;fr<NFRAMES;fr++)
{
Pin_2s5[fr][orien]=Pin_2s4[fr][orien];
}
D_2s5[orien]=D_2[orien];
Pout_1s5[orien]=Pout_1s4[orien];
Pouts5[orien]=Pouts4[orien];
////////////////////////////////////////
}
//Pipeline Stage 6
par
{
//Writing Pout[0..2]
Pout[0][orien]=Pouts5[orien];
Pout[1][orien]=Pout_1s5[orien];
Pout[2][orien]=adjs((adjs(Pin_2s5[2][orien],width(Pout)) - DOUBLE_PI*(adjs(sign(D_2s5[orien]),width(Pout))*2+1)*(signed)adju(A_2[orien],width(Pout)))\\1, width(Pout));
}
}//par orient
}
//*************************************************************************
// macro full_velocity
/*************************************************************************/
macro proc full_velocity(FVx,FVy,LE,thres,nc_min, enable, Ox, Oy)
{
macro expr Frac=4;
macro expr DIVLATENCY=DIVIDER_LATENCY; // added 1 for thresholding in invert function
macro expr THlat=DIVLATENCY+DIVLATENCY+4;
macro expr SUMlat=DIVLATENCY+2;
unsigned int 4 nc[THlat];
unsigned int 1 nc_par[NORIENTATIONS];
signed int FLOW_BITS Vx[NORIENTATIONS], Vy[NORIENTATIONS], auxYY[NORIENTATIONS],auxXX[NORIENTATIONS], auxXY[NORIENTATIONS];
signed int (FLOW_BITS) sumX[SUMlat],sumY[SUMlat], sumYYL_2, sumXXL_2, sumXYL_2;
signed int (DIVIDER_INPUT) NumX[DIVLATENCY], NumY[DIVLATENCY];
signed int DIVIDER_INPUT aux_den_0, aux_den_1, aux_NumX_0, aux_NumX_1,aux_NumY_0, aux_NumY_1;
unsigned int 1 cond[NORIENTATIONS];
signed int (DIVIDER_INPUT) Vxx[NORIENTATIONS][DIVLATENCY+1], Vyy[NORIENTATIONS][DIVLATENCY+1],Vxy[NORIENTATIONS][DIVLATENCY+1];
signed int DIVIDER_INPUT SumXX_YY[NORIENTATIONS];
signed int DIVIDER_INPUT L2[NORIENTATIONS], den;
signed FLOW_BITS quotX, quotY;
//assert (NORIENTATIONS==8, 0, "The code function only for 8 orientations");
//------------------------------------------------------------
// Verify bitwidth in operations
// improve division (ex. divider core)
//------------------------------------------------------------
par
{
/*/ Pipeline 0
par(o=0;o<NORIENTATIONS;o++)
{
cond[o]=(LE[o]<thres); // && (FVx[o]!=NAN) && (FVy[o]!=NAN);
auxFVx[o]=FVx[o];
auxFVy[o]=FVy[o];
} */
// Pipeline 1
par(o=0;o<NORIENTATIONS;o++)
{
//if(cond[o] && (FVxx[o]+FVyy[o])>EPS)
if(LE[o]<thres && (FVx[o]!=0 || FVy[o]!=0) )
par
{
//L2[o]= ((signed)one)/(FVxx[o] + FVyy[o]);
SumXX_YY[o]= (adjs(FVx[o],width(SumXX_YY))*adjs(FVx[o],width(SumXX_YY))) + (adjs(FVy[o],width(SumXX_YY))*adjs(FVy[o],width(SumXX_YY))); //FVxx[o] + FVyy[o];
Vx[o]=FVx[o];
Vy[o]=FVy[o];
Vxx[o][0]=(adjs(FVx[o],width(Vxx))*adjs(FVx[o],width(Vxx))); //\\Frac;
Vyy[o][0]=(adjs(FVy[o],width(Vyy))*adjs(FVy[o],width(Vyy))); //\\Frac;
Vxy[o][0]=(adjs(FVy[o],width(Vxy))*adjs(FVx[o],width(Vxy))); //\\Frac;
nc_par[o]=1;
}
else
par
{
nc_par[o]=0;
//L2[o]=0;
SumXX_YY[o]=0;
Vx[o]=0;
Vy[o]=0;
Vxx[o][0]=0;
Vyy[o][0]=0;
Vxy[o][0]=0;
}
}
par(v=1;v<(DIVLATENCY+1);v++)
{
par(o=0;o<NORIENTATIONS;o++)
{
Vxx[o][v]=Vxx[o][v-1];
Vyy[o][v]=Vyy[o][v-1];
Vxy[o][v]=Vxy[o][v-1];
}
}
//Pipeline 2
par{
// Pipeline 2
//nc=adju((L2[0]>0),4)+adju((L2[1]>0),4)+adju((L2[2]>0),4)+adju((L2[3]>0),4)+adju((L2[4]>0),4)+adju((L2[5]>0),4)+adju((L2[6]>0),4)+adju((L2[7]>0),4);
nc[0] = UnSumMacro(nc_par, 0, NORIENTATIONS-1, width(nc));
//sumX = Vx[0]+Vx[1]+Vx[2]+Vx[3]+Vx[4]+Vx[5]+Vx[6]+Vx[7];
sumX[0] = SumMacro(Vx, 0, NORIENTATIONS-1, width(sumX));
//sumY = Vy[0]+Vy[1]+Vy[2]+Vy[3]+Vy[4]+Vy[5]+Vy[6]+Vy[7];
sumY[0] = SumMacro(Vy, 0, NORIENTATIONS-1, width(sumY));
// Pipeline 2
par(o=0;o<NORIENTATIONS;o++)
{
invert(SumXX_YY[o], enable, L2[o]);
}
// Pipeline 6
par(O=0;O<NORIENTATIONS;O++)
{
//if(L2[O]!=0) // && (Vy[O]*Vx[O])!=0)
par
{
//divide12(Vyy[O], L2[O], auxYY[O]);
auxYY[O] = ((Vyy[O][DIVLATENCY]*adjs(L2[O],width(Vyy)))>>(DIVIDER_INPUT-5)) <-FLOW_BITS;
//divide12(Vxx[O], L2[O], auxXX[O]);
auxXX[O] = ((Vxx[O][DIVLATENCY]*adjs(L2[O],width(Vxx)))>>(DIVIDER_INPUT-5)) <- FLOW_BITS;
//divide12(Vxy[O], L2[O], auxXY[O]);
auxXY[O] = ((Vxy[O][DIVLATENCY]*adjs(L2[O],width(Vxy)))>>(DIVIDER_INPUT-5)) <- FLOW_BITS;
}
//else
//NanCond[O]=1;
}
}
// Pipeline 7
sumYYL_2 = SumMacro(auxYY, 0, NORIENTATIONS-1, width(sumYYL_2));
sumXXL_2 = SumMacro(auxXX, 0, NORIENTATIONS-1, width(sumXXL_2));
sumXYL_2 = SumMacro(auxXY, 0, NORIENTATIONS-1, width(sumXYL_2));
// Pipeline 8
aux_den_0 = (adjs(sumXYL_2,width(aux_den_0)+4)*adjs(sumXYL_2,width(aux_den_0)+4))\\Frac;
aux_den_1 = (adjs(sumXXL_2,width(aux_den_1)+4)*adjs(sumYYL_2,width(aux_den_1)+4))\\Frac;
aux_NumX_0 = (adjs(sumX[SUMlat-1],width(aux_NumX_0)+4)*adjs(sumYYL_2,width(aux_NumX_0)+4))\\Frac;
aux_NumX_1 = (adjs(sumY[SUMlat-1],width(aux_NumX_1)+4)*adjs(sumXYL_2,width(aux_NumX_1)+4))\\Frac;
aux_NumY_0 = (adjs(sumX[SUMlat-1],width(aux_NumY_0)+4)*adjs(sumXYL_2,width(aux_NumY_0)+4))\\Frac;
aux_NumY_1 = (adjs(sumY[SUMlat-1],width(aux_NumY_1)+4)*adjs(sumXXL_2,width(aux_NumY_1)+4))\\Frac;
// Pipeline 9
//den = ((signed)one)/(aux_den_0 - aux_den_1);
invert((aux_den_0 - aux_den_1), enable, den);
NumX[0] = -(aux_NumX_0 - aux_NumX_1);
NumY[0] = aux_NumY_0 - aux_NumY_1;
// Pipeline 10-14
//quotX=adjs((NumX<<4)/den, FLOW_BITS);
quotX=((NumX[DIVLATENCY-1]*adjs(den,width(NumX)))>>(DIVIDER_INPUT-5)) <- FLOW_BITS;
//quotY=adjs((NumY<<4)/den, FLOW_BITS);
quotY=((NumY[DIVLATENCY-1]*adjs(den,width(NumY)))>>(DIVIDER_INPUT-5)) <- FLOW_BITS;
//divide12(NumX, den, quotX);
//divide12(NumY, den, quotY);
// delay for threshold
par(i=1; i<THlat; i++)
{
nc[i]=nc[i-1];
}
par(n=1; n<DIVLATENCY; n++)
{
NumX[n]=NumX[n-1];
NumY[n]=NumY[n-1];
}
par(s=1; s<SUMlat; s++)
{
sumX[s]=sumX[s-1];
sumY[s]=sumY[s-1];
}
// Pipeline 15
if (nc[THlat-1]>=nc_min)
par
{
Ox= quotX; // den = (sumXYL_22-sumXXL_2*sumYYL_2)
//Ox= (-NumX>>(lmo(den<-(width(den)-1))))<-FLOW_BITS;
//Ox= (-NumX);
Oy= quotY;
//Oy= ( NumY>>(lmo(den<-(width(den)-1))))<-FLOW_BITS;
//Oy= (NumY);
}
else
par
{
Ox=SetNAN(Ox); //in matlab is NaN
Oy=SetNAN(Oy); //in matlab is NaN
}
}
}
//*************************************************************************
// macro full_velocity_small
/*************************************************************************/
macro proc full_velocity_small(FVx,FVy,LE,thres, Div_thr,nc_min, Ox, Oy)
{
macro expr Frac=8;
macro expr DIVLATENCY=DIVIDER_LATENCY+1; // added 1 for thresholding in invert function
macro expr THlat=DIVLATENCY+DIVLATENCY+4;
macro expr SUMlat=DIVLATENCY+2;
unsigned int 4 nc[THlat];
unsigned int 1 nc_par[NORIENTATIONS];
signed int FLOW_BITS Vx[NORIENTATIONS], Vy[NORIENTATIONS];
signed int DIVIDER_INPUT auxYY[NORIENTATIONS], auxXX[NORIENTATIONS], auxXY[NORIENTATIONS];
signed int (FLOW_BITS+3) sumX[SUMlat],sumY[SUMlat];
signed int (DIVIDER_INPUT) sumYYL_2, sumXXL_2, sumXYL_2;
signed int (DIVIDER_INPUT) NumX[DIVLATENCY], NumY[DIVLATENCY];
signed int (DIVIDER_INPUT) aux_den_0, aux_den_1, aux_NumX_0, aux_NumX_1,aux_NumY_0, aux_NumY_1;
unsigned int 1 cond[NORIENTATIONS];
signed int (DIVIDER_INPUT) Vxx[NORIENTATIONS][DIVLATENCY+1], Vyy[NORIENTATIONS][DIVLATENCY+1],Vxy[NORIENTATIONS][DIVLATENCY+1];
signed int DIVIDER_INPUT SumXX_YY[NORIENTATIONS];
signed int DIVIDER_INPUT L2[NORIENTATIONS], den;
signed FLOW_BITS quotX, quotY;
//assert (NORIENTATIONS==8, 0, "The code function only for 8 orientations");
//------------------------------------------------------------
// Verify bitwidth in operations
// improve division (ex. divider core)
//------------------------------------------------------------
par
{
/*/ Pipeline 0
par(o=0;o<NORIENTATIONS;o++)
{
cond[o]=(LE[o]<thres); // && (FVx[o]!=NAN) && (FVy[o]!=NAN);
auxFVx[o]=FVx[o];
auxFVy[o]=FVy[o];
} */
// Pipeline 1
par(o=0;o<NORIENTATIONS;o++)
{
//if(cond[o] && (FVxx[o]+FVyy[o])>EPS)
if(LE[o]<thres && ((FVx[o]!=0) || FVy[o]!=0) && (FVx[o]!=NAN) && (FVy[o]!=NAN)) //((FVx[o]*FVx[o])+(FVy[o]*FVy[o]))>EPS)//(FVx[o]!=0 || FVy[o]!=0) )
par
{
//L2[o]= ((signed)one)/(FVxx[o] + FVyy[o]);
SumXX_YY[o]= ( (adjs(FVx[o],width(SumXX_YY)+4)*adjs(FVx[o],width(SumXX_YY)+4)) + (adjs(FVy[o],width(SumXX_YY)+4)*adjs(FVy[o],width(SumXX_YY)+4)))\\4 ; //FVxx[o] + FVyy[o];
Vx[o]=FVx[o];
Vy[o]=FVy[o];
Vxx[o][0]=(adjs(FVx[o],width(Vxx)+4)*adjs(FVx[o],width(Vxx)+4))\\4;
Vyy[o][0]=(adjs(FVy[o],width(Vyy)+4)*adjs(FVy[o],width(Vyy)+4))\\4;
Vxy[o][0]=(adjs(FVy[o],width(Vxy)+4)*adjs(FVx[o],width(Vxy)+4))\\4;
nc_par[o]=1;
}
else
par
{
nc_par[o]=0;
//L2[o]=0;
SumXX_YY[o]=0;
Vx[o]=0;
Vy[o]=0;
Vxx[o][0]=0;
Vyy[o][0]=0;
Vxy[o][0]=0;
}
}
par(v=1;v<(DIVLATENCY+1);v++)
{
par(o=0;o<NORIENTATIONS;o++)
{
Vxx[o][v]=Vxx[o][v-1];
Vyy[o][v]=Vyy[o][v-1];
Vxy[o][v]=Vxy[o][v-1];
}
}
//Pipeline 2
par{
// Pipeline 2
//nc=adju((L2[0]>0),4)+adju((L2[1]>0),4)+adju((L2[2]>0),4)+adju((L2[3]>0),4)+adju((L2[4]>0),4)+adju((L2[5]>0),4)+adju((L2[6]>0),4)+adju((L2[7]>0),4);
nc[0] = UnSumMacro(nc_par, 0, NORIENTATIONS-1, width(nc));
//sumX = Vx[0]+Vx[1]+Vx[2]+Vx[3]+Vx[4]+Vx[5]+Vx[6]+Vx[7];
sumX[0] = SumMacro(Vx, 0, NORIENTATIONS-1, width(sumX));
//sumY = Vy[0]+Vy[1]+Vy[2]+Vy[3]+Vy[4]+Vy[5]+Vy[6]+Vy[7];
sumY[0] = SumMacro(Vy, 0, NORIENTATIONS-1, width(sumY));
// Pipeline 2
par(o=0;o<NORIENTATIONS;o++)
{
invert(SumXX_YY[o], Div_thr, L2[o]);
}
// Pipeline 6
par(O=0;O<NORIENTATIONS;O++)
{
//if(L2[O]!=0) // && (Vy[O]*Vx[O])!=0)
par
{
//auxYY[O] = ((adjs(Vyy[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-5)) <-DIVIDER_INPUT;
auxYY[O] = ((adjs(Vyy[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-9)) <-DIVIDER_INPUT;
//auxXX[O] = ((adjs(Vxx[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-5)) <- DIVIDER_INPUT;
auxXX[O] = ((adjs(Vxx[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-9)) <- DIVIDER_INPUT;
//auxXY[O] = ((adjs(Vxy[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-5)) <- DIVIDER_INPUT;
auxXY[O] = ((adjs(Vxy[O][DIVLATENCY],26)*adjs(L2[O],26))>>(DIVIDER_INPUT-9)) <- DIVIDER_INPUT;
}
//else
//NanCond[O]=1;
}
}
// Pipeline 7
sumYYL_2 = SumMacro(auxYY, 0, NORIENTATIONS-1, width(sumYYL_2));
sumXXL_2 = SumMacro(auxXX, 0, NORIENTATIONS-1, width(sumXXL_2));
sumXYL_2 = SumMacro(auxXY, 0, NORIENTATIONS-1, width(sumXYL_2));
// Pipeline 8
//aux_den_0 = (adjs(sumXYL_2,width(aux_den_0)+4)*adjs(sumXYL_2,width(aux_den_0)+4))\\4;
aux_den_0 = (adjs(sumXYL_2,width(aux_den_0)+Frac)*adjs(sumXYL_2,width(aux_den_0)+Frac))\\Frac;
//aux_den_1 = (adjs(sumXXL_2,width(aux_den_1)+4)*adjs(sumYYL_2,width(aux_den_1)+4))\\4;
aux_den_1 = (adjs(sumXXL_2,width(aux_den_1)+Frac)*adjs(sumYYL_2,width(aux_den_1)+Frac))\\Frac;
aux_NumX_0 = (adjs(sumX[SUMlat-1],width(aux_NumX_0)+4)*adjs(sumYYL_2,width(aux_NumX_0)+4))\\4;
aux_NumX_1 = (adjs(sumY[SUMlat-1],width(aux_NumX_1)+4)*adjs(sumXYL_2,width(aux_NumX_1)+4))\\4;
aux_NumY_0 = (adjs(sumX[SUMlat-1],width(aux_NumY_0)+4)*adjs(sumXYL_2,width(aux_NumY_0)+4))\\4;
aux_NumY_1 = (adjs(sumY[SUMlat-1],width(aux_NumY_1)+4)*adjs(sumXXL_2,width(aux_NumY_1)+4))\\4;
// Pipeline 9
//den = ((signed)one)/(aux_den_0 - aux_den_1);
invert((aux_den_0 - aux_den_1), Div_thr, den);
NumX[0] = -(aux_NumX_0 - aux_NumX_1);
NumY[0] = aux_NumY_0 - aux_NumY_1;
// Pipeline 10-14
//quotX=adjs((NumX<<4)/den, FLOW_BITS);
if(den!=0)
par
{
quotX=((adjs(NumX[DIVLATENCY-1],26)*adjs(den,26))>>(DIVIDER_INPUT-1)) <- FLOW_BITS;
quotY=((adjs(NumY[DIVLATENCY-1],26)*adjs(den,26))>>(DIVIDER_INPUT-1)) <- FLOW_BITS;
}
else
par
{
quotX=SetNAN(quotX);
quotY=SetNAN(quotY);
}
//quotY=adjs((NumY<<4)/den, FLOW_BITS);
//divide12(NumX, den, quotX);
//divide12(NumY, den, quotY);
// delay for threshold
par(i=1; i<THlat; i++)
{
nc[i]=nc[i-1];
}
par(n=1; n<DIVLATENCY; n++)
{
NumX[n]=NumX[n-1];
NumY[n]=NumY[n-1];
}
par(s=1; s<SUMlat; s++)
{
sumX[s]=sumX[s-1];
sumY[s]=sumY[s-1];
}
// Pipeline 15
if (nc[THlat-1]>=nc_min)
par
{
Ox= quotX; // den = (sumXYL_22-sumXXL_2*sumYYL_2)
//Ox= (-NumX>>(lmo(den<-(width(den)-1))))<-FLOW_BITS;
//Ox= (-NumX);
Oy= quotY;
//Oy= ( NumY>>(lmo(den<-(width(den)-1))))<-FLOW_BITS;
//Oy= (NumY);
}
else
par
{
Ox=SetNAN(Ox); //in matlab is NaN
Oy=SetNAN(Oy); //in matlab is NaN
}
}
}
/*
// Invert function : DIVLATENCY cycles
// -----------------------------
macro proc invert(Den, Div_thr, quot)
{
//signed int DIVIDER_INPUT Den_p0;
unsigned int (log2ceil(width(Den))) MSB_Den;
//static signed int 14 one = 0b01000000000000;
static signed int 18 one = 0b010000000000000000;
unsigned 1 cond[DIVIDER_LATENCY];
// Enable for Cores
static signal unsigned 1 enable;
interface divider_18 (signed DIVIDER_INPUT quot, signed DIVIDER_INPUT remd, unsigned 1 rfd) divider(signed DIVIDER_INPUT dividend = one,
signed DIVIDER_INPUT divisor = adjs(Den,DIVIDER_INPUT), unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
par
{
//Enabling atan2 Core: only for 1 clock cycle
enable=1;
cond[0]=(abs(Den)<((signed)(0@Div_thr)));
par(i=1;i<DIVIDER_LATENCY;i++)
{
cond[i]=cond[i-1];
}
if(cond[DIVIDER_LATENCY-1]==1)
quot = 0;
else
quot = divider.quot;
}
}
*/
// Divide function
// -----------------------------
macro proc divide12(Num, Den, quot)
{
signed int (width(Num)) Num_p0, Num_p1, Den_p0, Den_p1;
signed int (width(quot)) Num_p2, Den_p2[DIVIDER_LATENCY];
unsigned int (log2ceil(width(Num))) shift, MSB_Num, MSB_Den;
unsigned int 1 NotValid, NotValid_2[DIVIDER_LATENCY];
interface divider_12 (signed FLOW_BITS quot, signed FLOW_BITS remd, unsigned 1 rfd) divider(signed FLOW_BITS dividend = Num_p2,
signed FLOW_BITS divisor = Den_p2[0], unsigned 1 clk=__clock, unsigned 1 ce=1) with {busformat="B<I>"};
//*/
par
{
// Pipeline 0
if(Num>0)
MSB_Num = lmo(Num);
else
if(Num==0)
MSB_Num = 0;
else
MSB_Num = lmo(-Num);
if(Den>0)
MSB_Den = lmo(Den);
else
if(Den==0)
MSB_Den = 0;
else
MSB_Den = lmo(-Den);
Den_p0 = Den;
Num_p0 = Num;
// Pipeline 1
Num_p1 = Num_p0;
Den_p1 = Den_p0;
if( (abs(MSB_Num-MSB_Den))>=(width(quot)-2) )
NotValid=1;
else
NotValid=0;
if(MSB_Num > MSB_Den)
shift = width(Num)-MSB_Num-2 ;
else
shift = width(Num)-MSB_Den-2 ;
// Pipeline 2
NotValid_2[0] = NotValid;
//my_dividend = (Num_p1<<shift)\\(width(Num)-width(quot));
Num_p2 = (Num_p1<<shift)\\(width(Num)-width(quot));
//my_divisor = (Den_p1<<shift)\\(width(Num)-width(quot));
Den_p2[0] = (Den_p1<<shift)\\(width(Num)-width(quot));
// delay for thresholds
par(i=1;i<DIVIDER_LATENCY;i++)
{
NotValid_2[i] = NotValid_2[i-1];
Den_p2[i] = Den_p2[i-1];
}
// Pipeline 3
if(NotValid_2[DIVIDER_LATENCY-1]==0 && Den_p2[DIVIDER_LATENCY-1]!=0)
par
{
quot = divider.quot;
//quot = (Num_p2)/(Den_p2);
}
else
quot = SetNAN(quot);
}
}
//--------------------------------------------
// ¡¡¡¡¡¡¡¡¡¡¡ Pay attention !!!!!!!!!!!!!!
// Sign on output is not changed
//-----------------------------------------------
macro proc new_full_velocity(FV, LE,thres,nc_min, Ox, Oy)
{
macro expr Frac=4;
macro expr DIVLATENCY=DIVIDER_LATENCY; // added 1 for thresholding in invert function
macro expr THlat=DIVLATENCY+3;
//macro expr SUMlat=DIVLATENCY+2;
unsigned int 4 nc[THlat];
unsigned int 1 bad_div[DIVLATENCY];
unsigned int 1 nc_par[NORIENTATIONS];
//unsigned int 1 cond[NORIENTATIONS];
signed int 9 auxYY[NORIENTATIONS], auxXX[NORIENTATIONS], auxXY[NORIENTATIONS];
signed int (FLOW_BITS+4) Vx[NORIENTATIONS], Vy[NORIENTATIONS];
signed int (FLOW_BITS+7) sumX,sumY;
signed int (DIVIDER_INPUT) sumYYL_2, sumXXL_2, sumXYL_2;
signed int (DIVIDER_INPUT) aux_den_0, aux_den_1, aux_NumX_0, aux_NumX_1,aux_NumY_0, aux_NumY_1;
//signed int (DIVIDER_INPUT) NumX[DIVLATENCY], NumY[DIVLATENCY];
signed int (DIVIDER_INPUT) NumX, NumY;
signed int (DIVIDER_INPUT) den, den_1;
//signed int (DIVIDER_INPUT) diff[DIVLATENCY];
// signed int (DIVIDER_INPUT) diff;
signed int (DIVIDER_INPUT) quotX, quotY;
const int 9 WREAL_SUMX[NORIENTATIONS] = {-81, -75, -58, -31, 0, 31, 58, 75}; // 2^7 * {-F0 * cos(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 9 WIMAG_SUMY[NORIENTATIONS] = {0, -31, -58, -75, -81, -75, -58, -31}; // 2^7 * {-F0 * sin(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 9 SUMXY_L2[NORIENTATIONS] = {0, 45, 64, 45, 0, -45, -64, -45}; // FRAC: 7 bits
const int 9 SUMXX_L2[NORIENTATIONS] = {128, 109, 64, 19, 0, 19, 64, 109}; // FRAC: 7 bits
const int 9 SUMYY_L2[NORIENTATIONS] = {0, 19, 64, 109, 128, 109, 64, 19}; // FRAC: 7 bits
//assert (NORIENTATIONS==8, 0, "The code function only for 8 orientations");
//------------------------------------------------------------
// Verify bitwidth in operations
// improve division (ex. divider core)
//------------------------------------------------------------
par
{
// Pipeline 1
par(o=0;o<NORIENTATIONS;o++)
{
if(LE[o]<thres && FV[o]!=0 && FV[o]!=NAN)
par
{
Vx[o]=(adjs(FV[o], width(Vx)+4)*adjs(WREAL_SUMX[o],width(Vx)+4))\\4; //s-7-8
Vy[o]=(adjs(FV[o], width(Vy)+4)*adjs(WIMAG_SUMY[o],width(Vy)+4))\\4; //s-7-8
auxXX[o]=adjs(SUMXX_L2[o],width(auxXX)); //s-0-7 bits
auxXY[o]=adjs(SUMXY_L2[o],width(auxXY)); //s-0-7 bits
auxYY[o]=adjs(SUMYY_L2[o],width(auxYY)); //s-0-7 bits
nc_par[o]=1;
}
else
par
{
Vx[o]=0;
Vy[o]=0;
auxXX[o]=0;
auxXY[o]=0;
auxYY[o]=0;
nc_par[o]=0;
}
}
//Pipeline 2
par
{
nc[0] = UnSumMacro(nc_par, 0, NORIENTATIONS-1, width(nc));
sumX = SumMacro(Vx, 0, NORIENTATIONS-1, width(sumX)); //s-10-8
sumY = SumMacro(Vy, 0, NORIENTATIONS-1, width(sumY)); //s-10-8
sumYYL_2 = SumMacro(auxYY, 0, NORIENTATIONS-1, width(sumYYL_2)); //adjs(s-3-7,DIVIDERINPUT)
sumXXL_2 = SumMacro(auxXX, 0, NORIENTATIONS-1, width(sumXXL_2)); //adjs(s-3-7,DIVIDERINPUT)
sumXYL_2 = SumMacro(auxXY, 0, NORIENTATIONS-1, width(sumXYL_2)); //adjs(s-3-7,DIVIDERINPUT)
}
// Pipeline 3
par
{
//aux_den_0 = (adjs(sumXYL_2,width(aux_den_0)+6)*adjs(sumXYL_2,width(aux_den_0)+6))\\6; //s-9-8
//aux_den_1 = (adjs(sumXXL_2,width(aux_den_1)+6)*adjs(sumYYL_2,width(aux_den_1)+6))\\6; //s-9-8
aux_den_0 = (adjs(sumXYL_2,width(aux_den_0)+8)*adjs(sumXYL_2,width(aux_den_0)+8))\\8; //s-11-6
aux_den_1 = (adjs(sumXXL_2,width(aux_den_1)+8)*adjs(sumYYL_2,width(aux_den_1)+8))\\8; //s-11-6
//aux_NumX_0 = ((adjs(sumX,width(aux_NumX_0)+7+4)*adjs(sumYYL_2,width(aux_NumX_0)+7+4))\\7)<-DIVIDER_INPUT; //s-9-8
//aux_NumX_1 = ((adjs(sumY,width(aux_NumX_1)+7+4)*adjs(sumXYL_2,width(aux_NumX_1)+7+4))\\7)<-DIVIDER_INPUT; //s-9-8
//aux_NumY_0 = ((adjs(sumX,width(aux_NumY_0)+7+4)*adjs(sumXYL_2,width(aux_NumY_0)+7+4))\\7)<-DIVIDER_INPUT; //s-9-8
//aux_NumY_1 = ((adjs(sumY,width(aux_NumY_1)+7+4)*adjs(sumXXL_2,width(aux_NumY_1)+7+4))\\7)<-DIVIDER_INPUT; //s-9-8
aux_NumX_0 = ((adjs(sumX,width(aux_NumX_0)+7+4)*adjs(sumYYL_2,width(aux_NumX_0)+7+4))\\3)<-DIVIDER_INPUT; //s-5-12
aux_NumX_1 = ((adjs(sumY,width(aux_NumX_1)+7+4)*adjs(sumXYL_2,width(aux_NumX_1)+7+4))\\3)<-DIVIDER_INPUT; //s-5-12
aux_NumY_0 = ((adjs(sumX,width(aux_NumY_0)+7+4)*adjs(sumXYL_2,width(aux_NumY_0)+7+4))\\3)<-DIVIDER_INPUT; //s-5-12
aux_NumY_1 = ((adjs(sumY,width(aux_NumY_1)+7+4)*adjs(sumXXL_2,width(aux_NumY_1)+7+4))\\3)<-DIVIDER_INPUT; //s-5-12
}
// Pipeline 4
par
{
//invert((aux_den_0 - aux_den_1), den);
NumX = -(aux_NumX_0 - aux_NumX_1); //s-9-8
NumY = (aux_NumY_0 - aux_NumY_1);
den = aux_den_0 - aux_den_1;
}
// Pipeline 5+divlatency
invert(NumX, den, quotX);
invert(NumY, den, quotY);
if(den!=0)
par
{
bad_div[0]=0;
}
else
par
{
bad_div[0]=1;
}
// delays for threshold and div
par(i=1; i<THlat; i++)
{
nc[i]=nc[i-1];
}
par(d=1; d<DIVLATENCY; d++)
{
bad_div[d]=bad_div[d-1];
}
// Pipeline 6 + divlatency
if (nc[THlat-1]>=nc_min && bad_div[DIVLATENCY-1]==0)
par
{
Ox= (quotX <- FLOW_BITS)>>2;
Oy= (quotY <- FLOW_BITS)>>2;
}
else
par
{
Ox=SetNAN(Ox); //in matlab is NaN
Oy=SetNAN(Oy); //in matlab is NaN
}
}
}
// Invert function : DIVLATENCY cycles
// -----------------------------
macro proc invert(Num, Den, quot)
{
//signed int DIVIDER_INPUT Den_p0;
//unsigned int (log2ceil(width(Den))) MSB_Den;
//static signed int 14 one = 0b01000000000000;
//static signed int 18 one = 0b010000000000000000;
//unsigned 1 cond;//[DIVIDER_LATENCY];
// Enable for Cores
static signal unsigned 1 enable;
interface divider_18 (signed DIVIDER_INPUT quot, signed DIVIDER_INPUT remd, unsigned 1 rfd) divider(signed DIVIDER_INPUT dividend = Num,
signed DIVIDER_INPUT divisor = adjs(Den,DIVIDER_INPUT), unsigned 1 clk=__clock, unsigned 1 ce=enable) with {busformat="B<I>"};
par
{
//Enabling atan2 Core: only for 1 clock cycle
enable=1;
//cond[0]=(abs(Den)<((signed)(0@Div_thr)));
//cond=(abs(Den)<((signed)(0@Div_thr)));
/*par(i=1;i<DIVIDER_LATENCY;i++)
{
cond[i]=cond[i-1];
}
if(cond[DIVIDER_LATENCY-1]==1)*/
//if(cond==1)
//if(Den<12 && Den>-12)
// quot = 0;
//else
quot = divider.quot;
//quot = ((signed)one) / adjs(Den,width(quot));
}
}
//***************************************************
//Macro component_velocity
//
//LATENCY = 4;
//
//
//bits format:
//IN: P[NFRAMES][NORIENT] --> sign-4-5
//
//OUT: FV[NFRAMES] --> sign-14-5
// LE[NFRAMES] --> sign-28-5
//
//***************************************************/
macro proc new_component_velocity(P, FV, LE){
//***********************************************/
//Constant definitions for 3 frames
//***********************************************
macro expr PSize = 10;
macro expr SXX = 14;
macro expr SX = 6;
macro expr DEN = 6;
const int 3 XX[NFRAMES] = {1, 2, 3}; //XX3 is XX in the third dimension
const int 8 WREAL[NORIENTATIONS] = {-81, -75, -58, -31, 0, 31, 58, 75}; // 25 * {-F0 * cos(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
const int 8 WIMAG[NORIENTATIONS] = {0, -31, -58, -75, -81, -75, -58, -31}; // 25 * {-F0 * sin(PI/(NORIENT*i)) / (2*PI*F0*F0)} i= {0,1 ... NORIENT-1}
//***********************************************
//Declarations: Variables
//***********************************************
int (PSize+2) Sxy[NORIENTATIONS];
int (PSize+1) Sxy_0[NORIENTATIONS][NFRAMES];
int (PSize+1) Sy[NORIENTATIONS];
int PSize Sy_0[NORIENTATIONS][NFRAMES];
int (PSize+3) a[NORIENTATIONS];
int (PSize+5) a_0[NORIENTATIONS];
int (PSize+5) a_1[NORIENTATIONS];
int (PSize+12) a_2[NORIENTATIONS];
int (PSize+2) b[NORIENTATIONS];
int (PSize+4) b_0[NORIENTATIONS];
int (PSize+4) b_1[NORIENTATIONS];
int (PSize+11) b_2[NORIENTATIONS];
int (PSize+3) a3_0[NORIENTATIONS];
int (PSize+2) bs3[NORIENTATIONS];
int (PSize+2) bs3_1[NORIENTATIONS];
int (PSize+2) bs4_0[NORIENTATIONS];
int (PSize+2) bs4_1[NORIENTATIONS];
int (PSize+3) Reg[NFRAMES][NORIENTATIONS];
int (PSize+3) Reg_0[NFRAMES][NORIENTATIONS];
//Pipeline auxiliary variables
int PSize Ps0[NFRAMES][NORIENTATIONS];
int PSize Ps1[NFRAMES][NORIENTATIONS];
int PSize Ps2[NFRAMES][NORIENTATIONS];
int PSize Ps2_1[NFRAMES][NORIENTATIONS];
int PSize Ps2_2[NFRAMES][NORIENTATIONS];
int PSize Ps3[NFRAMES][NORIENTATIONS];
int PSize Ps3_1[NFRAMES][NORIENTATIONS];
int (PSize+3) LE_0[NORIENTATIONS][NFRAMES];
int (2*PSize) LE_1[NORIENTATIONS][NFRAMES];
//***********************************************
//Body of the function
//***********************************************
par(orien=0;orien<NORIENTATIONS;orien++)
{
//Pipeline Stage 0
par(f=0;f<NFRAMES;f++)
{
Sxy_0[orien][f] = adjs(P[f][orien],width(Sxy_0))*adjs(XX[f],width(Sxy_0));
Sy_0[orien][f] = P[f][orien];
//Copying P for the next stage
Ps0[f][orien]=P[f][orien];
}
//Pipeline Stage 1
par
{
//Sxy[orien] = (adjs(P[0][orien],width(Sxy))*adjs(XX[0],width(Sxy)) + adjs(P[1][orien],width(Sxy))*adjs(XX[1],width(Sxy)) + adjs(P[2][orien],width(Sxy))*adjs(XX[2],width(Sxy)) + adjs(P[3][orien],width(Sxy))*adjs(XX[3],width(Sxy)) + adjs(P[4][orien],width(Sxy))*adjs(XX[4],width(Sxy)));
Sxy[orien] = SumMacro(Sxy_0[orien], 0, NFRAMES-1,width(Sxy));
//Sy[orien] = adjs(P[0][orien],width(Sy)) + adjs(P[1][orien],width(Sy)) + adjs(P[2][orien],width(Sy)) + adjs(P[3][orien],width(Sy)) + adjs(P[4][orien],width(Sy));
Sy[orien] = SumMacro(Sy_0[orien], 0, NFRAMES-1,width(Sy));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps1[f][orien]=Ps0[f][orien];
}
}
//Pipeline Stage 2_0
par
{
a_0[orien] = SXX*adjs(Sy[orien],width(a_0));
b_0[orien] = NFRAMES*adjs(Sxy[orien],width(b_0));
a_1[orien] = SX*adjs(Sxy[orien],width(a_1));
b_1[orien] = SX*adjs(Sy[orien],width(b_1));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2[f][orien]=Ps1[f][orien];
}
}
//Pipeline Stage 2_1
par
{
//a_2[orien] = (a_1[orien]-a_2[orien])*5; // 5 frames
//b_2[orien] = (b_1[orien]-b_2[orien])*5; // multiplied by 5 for following /50 division that become <<8 : 5/256 ~= 1/50
a_2[orien] = (adjs(a_0[orien],width(a_2))-adjs(a_1[orien],width(a_2))); // 3 frames
b_2[orien] = (adjs(b_0[orien],width(b_2))-adjs(b_1[orien],width(b_2)));
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2_1[f][orien]=Ps2[f][orien];
}
}
//Pipeline Stage 2_2
par
{
//Using 5 decimals for a and b (*25)
//a[orien] = (adjs(SXX,PSize+15)*32*adjs(Sy[orien],PSize+15) - adjs(SX,PSize+15)*32*adjs(Sxy[orien],PSize+15))/adjs(DEN,PSize+15);
//a[orien] = a_2[orien]<<8; //for 5 frames
//a[orien] = a_2[orien]<<3; //for 3 frames
//a[orien] = adjs(((a_2[orien])*21)>>7,width(a)); //for 3 frames
a[orien] = adjs((a_2[orien]*43)>>8,width(a)); //for 3 frames
//b[orien] = adjs((NFRAMES*32*adjs(Sxy[orien],PSize+13) - adjs(SX,PSize+13)*32*adjs(Sy[orien],PSize+13))/adjs(DEN,PSize+13), width(b));
//b[orien] = adjs(b_2[orien]<<8,width(b)); // for 5 frames
//b[orien] = adjs(((b_2[orien])*21)>>7,width(b)); // for 3 frames
b[orien] = adjs((b_2[orien]*43)>>8,width(b)); // for 3 frames
//Copying P for the next stage
par(f=0;f<NFRAMES;f++)
{
Ps2_2[f][orien]=Ps2_1[f][orien];
}
}
//Pipeline Stage 3_0
par
{
par(f=0;f<NFRAMES;f++)
{
Reg_0[f][orien] = adjs(b[orien],width(Reg_0))*adjs(XX[f],width(Reg_0));
//Copying P for the next stage
Ps3[f][orien]=Ps2_2[f][orien];
}
//Copying b for the next stage
bs3[orien]=b[orien];
a3_0[orien] = a[orien];
}
//Pipeline Stage 3_1
par
{
par(f=0;f<NFRAMES;f++)
{
//Reg[fr][orien] = adjs(a[orien],width(Reg))+ adjs(b[orien],width(Reg))*adjs(XX[fr],width(Reg));
Reg[f][orien] = adjs(a3_0[orien],width(Reg))+ adjs(Reg_0[f][orien],width(Reg));
//Copying P for the next stage
Ps3_1[f][orien]=Ps3[f][orien];
}
//Copying b for the next stage
bs3_1[orien]=bs3[orien];
}
//Pipeline Stage 4_0
par
{
par(f=0;f<NFRAMES;f++)
{
LE_0[orien][f] = adjs(Reg[f][orien],width(LE_0)) - adjs(Ps3_1[f][orien], width(LE_0));
}
//Copying b for the next stage
bs4_0[orien]=bs3_1[orien];
}
//Pipeline Stage 4_1
par
{
par(f=0;f<NFRAMES;f++)
{
LE_1[orien][f] = adjs(LE_0[orien][f],width(LE_1))*adjs(LE_0[orien][f],width(LE_1));
}
//Copying b for the next stage
bs4_1[orien]=bs4_0[orien];
}
//Pipeline Stage 4_2
par
{
//LE[orien] = adjs(((((adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien], 2*PSize+26)*32)*(adjs(Reg[0][orien],2*PSize+26)- adjs(Ps3[0][orien],2*PSize+26)*32) + (adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32)*(adjs(Reg[1][orien], 2*PSize+26)- adjs(Ps3[1][orien],2*PSize+26)*32) + (adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32)*(adjs(Reg[2][orien],2*PSize+26)- adjs(Ps3[2][orien],2*PSize+26)*32) + (adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32)*(adjs(Reg[3][orien],2*PSize+26)- adjs(Ps3[3][orien],2*PSize+26)*32) + (adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32)*(adjs(Reg[4][orien],2*PSize+26)- adjs(Ps3[4][orien],2*PSize+26)*32))/NFRAMES)\\15), width(LE));
//LE[orien] = adjs(((SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize)*21)>>6)\\2,width(LE));
LE[orien] = adjs(((SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize+2)*85)>>8)\\2,width(LE));
//LE[orien]=adjs(((LE_1[0][orien]+LE_1[1][orien]+LE_1[2][orien])>>2)\\15,width(LE));
//LE[orien] = adjs(SumMacro(LE_1[orien],0, NFRAMES-1,2*PSize+26)\\15, width(LE));
//Simplifying the equation: FVreal = - (F0*cos(ang)/2*PI)*b[orien] --> FVreal = Wreal[orien]*b[orien] //Wreal is initialised with factor 25
// FVimag = - (F0*sin(ang)/2*PI)*b[orien] --> FVimag = Wimag[orien]*b[orien] //Wreal is initialised with factor 25
//FVreal[orien]= (adjs(bs4_1[orien],PSize+8)*adjs(WREAL[orien],PSize+8))\\6; //final size of FVreal is PSize+18
//FVimag[orien]= (adjs(bs4_1[orien],PSize+8)*adjs(WIMAG[orien],PSize+8))\\6; //final size of FVimag is PSize+18
//FVreal[orien]= ((adjs(bs4_1[orien],PSize+8)*adjs(WREAL[orien],PSize+8))\\2)<-width(FVreal); //final size of FVreal is PSize+18
//FVimag[orien]= ((adjs(bs4_1[orien],PSize+8)*adjs(WIMAG[orien],PSize+8))\\2)<-width(FVimag); //final size of FVimag is PSize+18
//FVreal[orien]= ((adjs(bs4_1[orien],PSize+10)*adjs(WREAL[orien],PSize+10))\\4)<-width(FVreal); //final size of FVreal is PSize+18
//FVimag[orien]= ((adjs(bs4_1[orien],PSize+10)*adjs(WIMAG[orien],PSize+10))\\4)<-width(FVimag); //final size of FVimag is PSize+18
FV[orien]=adjs(bs4_1[orien], width(FV)); //Only 5 bits for Frac !!!!
}
}
}
#ifndef __OPTICFLOW__
#define __OPTICFLOW__
#include "GaborPrimitives.hch"
#define NFRAMES 3
#define NORIENTATIONS 8
#define ATAN2WIDTH 10 //19 //24
#define ATAN2OUTWIDTH 10 //9 //19 //24
#define ATAN2LATENCY (ATAN2OUTWIDTH+4)
#define ATAN2NAME atan2_10bit //atan2_19bit
#define DIVIDER_NAME divider_21
#define DIVIDER_NAME_2 divider_27
#define DIVIDER_INPUT 18
#define DIVIDER_LATENCY DIVIDER_INPUT+4 // is +4 if divider has clks/div==1
#define FLOW_BITS 12
#define THRESHOLD 16
#define NC_MIN 4
#define EPS 0
#define NAN 0b100000000000
// CORES
macro proc CoreATAN2CORDIC_fl(y, x, enable, angle);
macro proc CoreDIVIDER(my_dividend, my_divisor, result, enable);
macro proc CoreDIVIDER_2(my_dividend, my_divisor, result, enable);
//***************************************************
//Macro component_velocity
//***************************************************
macro proc component_velocity(P, FVreal, FVimag, LE);
macro proc component_velocity_mia(P, FVreal, FVimag, LE);
macro proc new_component_velocity(P, FV, LE);
//***************************************************
//Macro compute_phase
//***************************************************
macro proc compute_phase(Greal, Gimag, P);
macro proc compute_single_phase(Greal, Gimag, P);
//***************************************************
//Macro unwrap
//***************************************************
macro proc unwrap(Pin, Pout);
macro proc unwrap_3(Pin, Pout);
//***************************************************
//Macro full_velocity
//***************************************************
macro proc full_velocity(FVx,FVy,LE,thres,nc_min, enable, Ox, Oy);
macro proc full_velocity_small(FVx,FVy,LE,thres, Div_thr, nc_min, Ox, Oy);
macro proc new_full_velocity(FV, LE,thres, nc_min, Ox, Oy);
macro proc divide12(Num, Den, quot);
macro proc invert(Num, Den, quot);
//***************************************************
//Resource sharing functions
//***************************************************
macro proc compute_phase_top(Greal,Gimag, P, index);
void function_compute_phase(signed int F_BITS (*Greal),signed int F_BITS (*Gimag), signed int 9 *P);
macro proc compute_phase_index(Greal, Gimag, P);
#endif
\ No newline at end of file
/* parameters.hch
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#ifndef __PARAMETERS__
#define __PARAMETERS__
// Number of cameras (1 for single camera, 2 for stereo system)
//#define NCAMERAS 2
// Max image resolution
#define MAX_RES_X 1024
#define MAX_RES_Y 1024
#define MAX_IMSIZE (MAX_RES_X*MAX_RES_Y)
//Number of frames we are using
#define NFRAMES 3
#endif
\ No newline at end of file
......@@ -20,7 +20,7 @@
% ImSize - Size of the input images
%
% DESCRIPTION
% Interface for a top architecture to interface with the disparity estimation core
% Interface for a top architecture to interface with the optic flow estimation core
% RETURN
%
*/
......
......@@ -8,7 +8,7 @@
#include "stdlib.hch"
#include "channels.hch"
//#include "xircav4_lib.hch" Platform-dependent
//#include "xircav4_lib.hch" //Platform-dependent
#define CORE 1 // 0 for sub-circuit test, 1 for core calls
......
/* lklib.hcc
% Copyright (C) 2014 Francisco Barranco, 09/02/2014, University of Granada-University of Maryland.
% License, GNU GPL, free software, without any warranty.
*/
#include "lklib.hch"
#include "cores.hch"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment