typedef int32_t MyData;
#define BLOCK 128
const unsigned int BSIZE = BLOCK;

#pragma omp target device(fpga)
#pragma omp task in([BSIZE]in) out([BSIZE]out)
void accumulation(const MyData    *const restrict in,
	                MyData    *const restrict out,
		  const u_int32_t                 el)

{
#pragma HLS inline off

  static MyData sum;

  out[0] = ((el == 0) ? in[0] : (sum + in[0]));
  
 loop_acc:
  for (u_int16_t i=1 ; i<BLOCK ; i++)
    {
      out[i] = (out[i - 1] + in[i]);
    }

  sum = out[BLOCK - 1];
  
  return;
}

/********************** VIVADO HLS REPORT ********************/
/* Target Board: ZedBoard                                    */
/*************************************************************/
/* DSP48E      0 used |    220 available -  0.0% utilization */
/* BRAM_18K    8 used |    280 available - 2.86% utilization */
/* LUT      8107 used |  53200 available -15.24% utilization */
/* FF       5115 used | 106400 available - 4.81% utilization */
/*************************************************************/
