typedef int32_t MyData;
#define BLOCK 128
const unsigned int BSIZE = BLOCK;

#pragma omp target device(fpga)
#pragma omp task in([BSIZE]in) out([BSIZE]histog)
void histogram(const MyData  *const restrict in,
	             MyData  *const restrict histog,
	       const u_int8_t                copy_out,
	       const u_int8_t                reset)

{
#pragma HLS inline off

  static u_int32_t local_histog[BLOCK];

  if (reset)
    {
    loop_reset:
      for (u_int16_t i=0 ; i<BLOCK ; i++)
	{
	  local_histog[i] = 0;
	}
    }
  
 loop_histogram:
  for (u_int16_t i=0 ; i<BLOCK ; i++)
    {
      local_histog[in[i]]++;
    }

  /* copy local_histog to histog */
  if (copy_out)
    {
    loop_copy:
      for (u_int16_t i=0 ; i<BLOCK ; i++)
	{
	  histog[i] = local_histog[i];
	}
    }

  return;
}


/********************** VIVADO HLS REPORT ********************/
/* Target Board: ZedBoard                                    */
/*************************************************************/
/* DSP48E      0 used |    220 available -  0.0% utilization */
/* BRAM_18K    9 used |    280 available - 3.21% utilization */
/* LUT      8372 used |  53200 available -15.74% utilization */
/* FF       5237 used | 106400 available - 4.92% utilization */
/*************************************************************/
