typedef int32_t MyData;
#define BLOCK 128
const unsigned int BSIZE = BLOCK;

#pragma omp target device(fpga)
#pragma omp task in([BSIZE]in) out([BSIZE]out)
void accumulation(const MyData    *const restrict in,
	                MyData    *const restrict out,
		  const u_int32_t                 el)

{
#pragma HLS inline off

  static MyData sum;

  MyData A = ((el == 0) ? in[0] : (sum + in[0]));

  out[0] = A;

 loop_acc:
  for (u_int16_t i=1 ; i<BLOCK ; i++)
    {
#    pragma HLS pipeline II=1

      A += in[i];
      out[i] = A;
    }

  sum = out[BLOCK - 1];

  return;
}

/********************** VIVADO HLS REPORT ********************/
/* Target Board: ZedBoard                                    */
/*************************************************************/
/* DSP48E      0 used |    220 available -  0.0% utilization */
/* BRAM_18K    8 used |    280 available - 2.86% utilization */
/* LUT      8107 used |  53200 available -15.29% utilization */
/* FF       5115 used | 106400 available - 4.84% utilization */
/*************************************************************/
