Commit fd09fa13 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

BRIGGS weighting Hermitian break fixed

parent ea97cf42
Loading
Loading
Loading
Loading
+148 −27
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#include <numbers>
#include <string>
#include <algorithm>
#include <vector>
#include <cctype>
#include <iostream>
#include "../../ricklib.h"
@@ -248,6 +249,13 @@ void wstack(
      if (grid_w >= num_w_planes) grid_w = num_w_planes - 1;  // H4: clamp max-w vis to top plane

      // Skip if the vis center cell is outside this rank's local grid.
      // This is CORRECT (not a bug): the bucket-sort redistribution sends a
      // boundary visibility to EVERY rank whose ghost-extended slab overlaps
      // it, so halo copies are duplicates. Only the OWNING rank (center cell
      // inside its slab) must deposit density, otherwise boundary cells would
      // be double-counted. The owner always holds ALL visibilities whose
      // center falls in its cells (ghost band is a superset of the owned
      // band), so the local density at owned cells is complete.
      if (grid_u < x_start || grid_u >= x_start + xaxis) continue;
      if (grid_v < y_start || grid_v >= y_start + yaxis) continue;
      if (grid_w < 0 || grid_w >= num_w_planes) continue;
@@ -301,43 +309,156 @@ void wstack(
      }
    }

    // Compute per-vis effective weight ONCE per visibility, using the cell
    // at the vis center (NOT inside the kernel-cell inner loop).
    #pragma omp parallel for num_threads(num_threads)
    // =========================================================================
    // Pass 2: per-vis effective weight computation.
    //
    // Owned vis (center in this rank's slab): look up weight_uv[cell] directly.
    //
    // Halo vis (center in a neighbouring rank's slab but kernel footprint
    // overlapping this slab): they must use the SAME Briggs effective weight
    // that the owning rank would assign — otherwise the halo and its conjugate
    // baseline get mismatched weights, breaking Hermitian symmetry.
    //
    // Protocol: two MPI_Alltoallv rounds.
    //   Round 1: each rank sends (u_local, v_local, w) int-triples to the
    //            rank that owns the density cell (the "owning rank").
    //   Round 2: each owning rank looks up weight_uv and sends the density
    //            double back to the requester.
    // After the exchange every vis — owned or halo — has its correct density,
    // and the per-vis weight formula is applied uniformly.
    // =========================================================================

    // -- D0: Gather all ranks' slab extents. ----------------------------------
    struct SlabExt { int x0, xs, y0, ys; };
    std::vector<SlabExt> all_ext(size);
    {
      SlabExt me = { x_start, xaxis, y_start, yaxis };
      MPI_Allgather(&me, 4, MPI_INT,
                    all_ext.data(), 4, MPI_INT, MPI_COMM_WORLD);
    }

    // Return the rank that owns cell (gu, gv), or -1 for out-of-range cells.
    auto cell_owner = [&](int gu, int gv) -> int {
      for (int r = 0; r < size; r++) {
        if (gu >= all_ext[r].x0 && gu < all_ext[r].x0 + all_ext[r].xs &&
            gv >= all_ext[r].y0 && gv < all_ext[r].y0 + all_ext[r].ys)
          return r;
      }
      return -1;
    };

    // -- D1: Single pass: owned vis get weights now; halo vis get queued. -----
    // halo_vis_idx[r]  — local vis indices of halos whose owner is rank r
    // halo_queries[r]  — matching (u_local, v_local, w) triples
    std::vector<std::vector<myull>> halo_vis_idx(size);
    // Flattened int-triple storage per destination rank (packed separately)
    std::vector<std::vector<int>> halo_uvw(size);  // 3 ints per entry

    for (myull ii = 0; ii < num_points; ii++) {
      // H4 FIX: sentinel is > 1.0 (1.0 is a valid max-w vis)
      if (ww[ii] > 1.0) { out_vis_weight[ii] = 0.0; continue; }

      double pos_u = uu[ii] / dx;
      double pos_v = vv[ii] / dx;
      double ww_i  = ww[ii] / dw;
      const double pos_u = uu[ii] / dx;
      const double pos_v = vv[ii] / dx;
      const double ww_i  = ww[ii] / dw;
      int grid_u = (int)pos_u;
      int grid_v = (int)pos_v;
      int grid_w = (int)ww_i;
      if (grid_w >= num_w_planes) grid_w = num_w_planes - 1;  // H4: clamp max-w vis to top plane

      if (grid_u < x_start || grid_u >= x_start + xaxis ||
          grid_v < y_start || grid_v >= y_start + yaxis ||
          grid_w < 0 || grid_w >= num_w_planes) {
        out_vis_weight[ii] = 0.0;
        continue;
      if (grid_w >= num_w_planes) grid_w = num_w_planes - 1;
      if (grid_w < 0 || grid_w >= num_w_planes) {
        out_vis_weight[ii] = 0.0; continue;
      }

      myull cell = (myull)(grid_u - x_start)
      if (grid_u >= x_start && grid_u < x_start + xaxis &&
          grid_v >= y_start && grid_v < y_start + yaxis) {
        // Owned vis — compute weight directly.
        const myull cell = (myull)(grid_u - x_start)
                         + (myull)(grid_v - y_start) * (myull)xaxis
                         + (myull)grid_w * (myull)xaxis * (myull)yaxis;

        const double n_cell = weight_uv[cell];
        const double w_vis  = (double)weight[ii];

        if (n_cell <= 0.0) { out_vis_weight[ii] = 0.0; continue; }

      if (wmode == WeightingMode::Uniform) {
        // WSClean Uniform: w_eff = w_vis / Σw_cell.
        if (wmode == WeightingMode::Uniform)
          out_vis_weight[ii] = w_vis / n_cell;
      } else { // Briggs
        // WSClean Briggs: w_eff = w_vis / (1 + Σw_cell · f²).
        else
          out_vis_weight[ii] = w_vis / (1.0 + n_cell * briggs_f2);
      } else {
        // Halo vis — queue a density request to the owning rank.
        const int owner = cell_owner(grid_u, grid_v);
        if (owner < 0) { out_vis_weight[ii] = 0.0; continue; }
        halo_vis_idx[owner].push_back(ii);
        halo_uvw[owner].push_back(grid_u - all_ext[owner].x0);
        halo_uvw[owner].push_back(grid_v - all_ext[owner].y0);
        halo_uvw[owner].push_back(grid_w);
      }
    }

    // -- D2: Build Alltoallv send buffer for Round 1 (int triples). -----------
    std::vector<int> sq_cnt(size), sq_dsp(size, 0);
    for (int r = 0; r < size; r++) sq_cnt[r] = (int)halo_uvw[r].size();
    for (int r = 1; r < size; r++) sq_dsp[r] = sq_dsp[r-1] + sq_cnt[r-1];
    const int sq_total = sq_dsp[size-1] + sq_cnt[size-1];

    std::vector<int> send_q(sq_total);
    for (int r = 0; r < size; r++) {
      std::copy(halo_uvw[r].begin(), halo_uvw[r].end(),
                send_q.begin() + sq_dsp[r]);
    }
    halo_uvw.clear();  // free early

    // Exchange counts (Alltoall of single ints), then the query triples.
    std::vector<int> rq_cnt(size), rq_dsp(size, 0);
    MPI_Alltoall(sq_cnt.data(), 1, MPI_INT,
                 rq_cnt.data(), 1, MPI_INT, MPI_COMM_WORLD);
    for (int r = 1; r < size; r++) rq_dsp[r] = rq_dsp[r-1] + rq_cnt[r-1];
    const int rq_total = rq_dsp[size-1] + rq_cnt[size-1];

    std::vector<int> recv_q(rq_total);
    MPI_Alltoallv(send_q.data(), sq_cnt.data(), sq_dsp.data(), MPI_INT,
                  recv_q.data(), rq_cnt.data(), rq_dsp.data(), MPI_INT,
                  MPI_COMM_WORLD);
    send_q.clear();  // free early

    // -- D3: Service received queries: look up weight_uv for each triple. -----
    const int n_recv_queries = rq_total / 3;
    std::vector<double> send_dens(n_recv_queries);
    for (int j = 0; j < n_recv_queries; j++) {
      const int u_l = recv_q[j*3 + 0];
      const int v_l = recv_q[j*3 + 1];
      const int w   = recv_q[j*3 + 2];
      const myull cell = (myull)u_l
                       + (myull)v_l * (myull)xaxis
                       + (myull)w   * (myull)xaxis * (myull)yaxis;
      send_dens[j] = (cell < dens_size) ? weight_uv[cell] : 0.0;
    }
    recv_q.clear();

    // -- D4: Alltoallv Round 2 — send densities back to requesters. -----------
    // For round 2 the roles flip: what we received becomes what we send.
    std::vector<int> sd_cnt(size), sd_dsp(size, 0);
    std::vector<int> rd_cnt(size), rd_dsp(size, 0);
    for (int r = 0; r < size; r++) sd_cnt[r] = rq_cnt[r] / 3;  // queries from r → densities to r
    for (int r = 0; r < size; r++) rd_cnt[r] = sq_cnt[r] / 3;  // queries to r → densities from r
    for (int r = 1; r < size; r++) sd_dsp[r] = sd_dsp[r-1] + sd_cnt[r-1];
    for (int r = 1; r < size; r++) rd_dsp[r] = rd_dsp[r-1] + rd_cnt[r-1];
    const int rd_total = rd_dsp[size-1] + rd_cnt[size-1];

    std::vector<double> recv_dens(rd_total);
    MPI_Alltoallv(send_dens.data(), sd_cnt.data(), sd_dsp.data(), MPI_DOUBLE,
                  recv_dens.data(), rd_cnt.data(), rd_dsp.data(), MPI_DOUBLE,
                  MPI_COMM_WORLD);
    send_dens.clear();

    // -- D5: Apply received densities to halo vis. ----------------------------
    for (int r = 0; r < size; r++) {
      for (int j = 0; j < rd_cnt[r]; j++) {
        const myull vis_ii   = halo_vis_idx[r][j];
        const double n_cell  = recv_dens[rd_dsp[r] + j];
        const double w_vis   = (double)weight[vis_ii];
        if (n_cell <= 0.0) { out_vis_weight[vis_ii] = 0.0; continue; }
        if (wmode == WeightingMode::Uniform)
          out_vis_weight[vis_ii] = w_vis / n_cell;
        else
          out_vis_weight[vis_ii] = w_vis / (1.0 + n_cell * briggs_f2);
      }
    }
  }
+14 −0
Original line number Diff line number Diff line
@@ -43,6 +43,20 @@
 *     weight is then precomputed once:
 *       - Uniform: w_eff = w_vis / Σw_cell
 *       - Briggs:  w_eff = w_vis / (1 + Σw_cell · f²)
 *     HALO DENSITY EXCHANGE: visibilities whose center cell lies in a
 *     NEIGHBOURING rank's slab (sent here by the ghost-region bucket sort
 *     because their kernel footprint overlaps this slab) cannot address a
 *     local density cell. Their density Σw_cell is fetched from the OWNING
 *     rank via a two-round MPI_Alltoallv exchange: round 1 sends
 *     (u_local, v_local, w) query triples to the cell owner; round 2 returns
 *     the looked-up weight_uv density. Every vis — owned or halo — then gets
 *     the identical Briggs/Uniform effective weight it would receive on its
 *     owning rank, so conjugate baselines split across slabs stay matched
 *     and Hermitian symmetry of the gridded UV plane is preserved exactly.
 *     (Zeroing halo vis truncated footprints at slab boundaries, ~5%
 *     imaginary dirty image; a raw-Natural fallback mismatched conjugate
 *     pair weights catastrophically, up to ~48% imaginary. Both superseded.)
 *     This makes wstack() COLLECTIVE over MPI_COMM_WORLD for Uniform/Briggs.
 *  4. Main pass: convolve each visibility (and its frequency samples) onto
 *     the grid using w_eff (Uniform/Briggs) or the raw weight (Natural). The
 *     weighting branch is hoisted to the per-visibility level, out of the hot