typedef int32_t MyData;
#define BLOCK 128
const unsigned int BSIZE = BLOCK;

#pragma omp target device(fpga)
#pragma omp task in([BSIZE]in1, [BSIZE]in2) out([BSIZE]out)
void acc_vadd(const MyData *const restrict in1,
              const MyData *const restrict in2,
                    MyData *const restrict out)
{
#pragma HLS array_partition variable=in1 complete
#pragma HLS array_partition variable=in2 complete
#pragma HLS array_partition variable=out complete
  
 loop_vector_add_unroll:
  for (u_int16_t i=0 ; i<BLOCK ; i++)
    {
#     pragma HLS unroll factor=BSIZE
      
      out[i] = (in1[i] + in2[i]);
    }

  return;
}

/********************** VIVADO HLS REPORT ********************/
/* Target Board: ZedBoard                                    */
/*************************************************************/
/* DSP48E      0 used |    220 available - 0.0%  utilization */
/* BRAM_18K   8  used |    280 available - 2.86% utilization */
/* LUT     15125 used |  53200 available -28.43% utilization */
/* FF      17696 used | 106400 available -16.63% utilization */
/*************************************************************/
