Commit 4d34bc09 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

First RICK C++ release

parent 761bf9d4
Loading
Loading
Loading
Loading

CMakeLists.txt

0 → 100755
+154 −0
Original line number Diff line number Diff line
cmake_minimum_required(VERSION 3.16)

# Project name and languages
project(RickMPI LANGUAGES CXX)

# Set C++20 standard
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# --- 1. Project Definitions ---
# NOTE: the gridding kernel (Gauss vs Kaiser-Bessel) is now selected at
# RUNTIME from config.kernel_type (parsed from the YAML). The old
# KAISERBESSEL compile definition has been removed — a single binary now
# supports both kernels. Do NOT re-add a kernel-selection macro here.
add_compile_definitions(
    USE_MPI
    STOKESI
    PHASE_ON
)

# Debug mode
if(DEBUG)
    add_compile_definitions(DEBUG)
    set(CMAKE_BUILD_TYPE Debug)
endif()

option(ENABLE_FITSIO "Enable FITSIO support" OFF)

# --- 2. Find Dependencies ---

# A. MPI (Required)
find_package(MPI REQUIRED)

# B. OpenMP (Required)
find_package(OpenMP REQUIRED)

# C. FFTW3 + FFTW3_OMP + FFTW3F (Required for fft_library.cpp)
find_package(PkgConfig)
pkg_check_modules(FFTW3 REQUIRED fftw3)
find_library(FFTW3_OMP_LIB NAMES fftw3_omp HINTS ${FFTW3_LIBRARY_DIRS})
find_library(FFTW3F_LIB NAMES fftw3f HINTS ${FFTW3_LIBRARY_DIRS})

# D. HeFFTe
find_path(HEFFTE_INCLUDE_DIR NAMES heffte.h
          HINTS ${HEFFTE_ROOT} $ENV{HEFFTE_ROOT}
          PATH_SUFFIXES include)

find_library(HEFFTE_LIBRARY NAMES heffte
             HINTS ${HEFFTE_ROOT} $ENV{HEFFTE_ROOT}
             PATH_SUFFIXES lib lib64)

if(NOT HEFFTE_INCLUDE_DIR OR NOT HEFFTE_LIBRARY)
    message(FATAL_ERROR "HeFFTe not found! Please set HEFFTE_ROOT.")
endif()

# E. CFITSIO (Optional)
if(ENABLE_FITSIO)
    find_package(CFITSIO REQUIRED)
    add_compile_definitions(FITSIO)
endif()

# --- 3. Source Files Configuration ---
set(SOURCES
    test_clib.cpp
    src/config/config_parser.cpp
    src/gridding/convolution.cpp
    src/gridding/weighting.cpp
    src/gridding/stokes.cpp
    src/gridding/sector.cpp
    src/gridding/wstack.cpp
    src/gridding/gridding_data.cpp
    src/gridding/gridding_main.cpp
    src/phase/phase_correction_library.cpp
    src/mpi/mpi_functions.cpp
    src/fft/fft_library.cpp
    src/io/io_functions.cpp
    src/communication/communication.cpp
    src/decomposition/decomposition.cpp
    src/decomposition/decomposition_2d.cpp
)

# --- 4. Target Definition ---
add_executable(rick ${SOURCES})

# Add a symlink or copy target for convenience (optional)
# add_custom_target(test_rick
#     COMMAND ${CMAKE_COMMAND} -E echo "Test target ready"
#     DEPENDS rick
# )

# --- 5. Compiler Flags & Optimizations ---
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang|Intel")
    
    # Initialize a list with common optimization and warning flags
    # -O3: High-level optimization
    # -Wno-unused-result: Silence warnings for ignored return values (e.g., fscanf)
    set(RICK_FLAGS "-O3" "-Wno-unused-result")

    # Architecture-specific optimizations
    if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
        # Flags for Intel/AMD architectures
        # -march=native: Optimize for the host CPU instructions
        # -mavx -mavx2: Enable Advanced Vector Extensions
        list(APPEND RICK_FLAGS "-march=native" "-mtune=native" "-mavx" "-mavx2")
        message(STATUS "Architecture detected as x86_64. Enabling AVX/AVX2 flags.")
        
    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
        # Flags for ARM architectures (Apple Silicon, AWS Graviton, etc.)
        # AVX flags are removed as they are incompatible with ARM
        list(APPEND RICK_FLAGS "-march=native" "-mtune=native")
        message(STATUS "Architecture detected as ARM64. Using NEON/Native optimizations.")
        
    else()
        # Fallback for other architectures
        message(STATUS "Unknown architecture (${CMAKE_SYSTEM_PROCESSOR}). Using generic O3.")
    endif()

    # Apply the gathered flags to the 'rick' target
    # We also include OpenMP specific flags detected by find_package(OpenMP)
    target_compile_options(rick PRIVATE ${RICK_FLAGS} ${OpenMP_CXX_FLAGS})

endif()

# --- 6. Include Directories and Linking ---
target_include_directories(rick PRIVATE
    ${MPI_CXX_INCLUDE_DIRS}
    ${HEFFTE_INCLUDE_DIR}
    ${FFTW3_INCLUDE_DIRS}
    ${CMAKE_CURRENT_SOURCE_DIR}
    ${CMAKE_CURRENT_SOURCE_DIR}/src
    ${CMAKE_CURRENT_SOURCE_DIR}/src/config
    ${CMAKE_CURRENT_SOURCE_DIR}/src/gridding
    ${CMAKE_CURRENT_SOURCE_DIR}/src/communication
    ${CMAKE_CURRENT_SOURCE_DIR}/src/io
    ${CMAKE_CURRENT_SOURCE_DIR}/src/decomposition
)

target_link_libraries(rick PRIVATE
    MPI::MPI_CXX
    OpenMP::OpenMP_CXX
    ${HEFFTE_LIBRARY}
    ${FFTW3_LIBRARIES}
    ${FFTW3_OMP_LIB}
    ${FFTW3F_LIB}
    m
)

if(ENABLE_FITSIO)
    target_link_libraries(rick PRIVATE CFITSIO::CFITSIO)
endif()

# --- 7. Installation ---
install(TARGETS rick DESTINATION bin)
+554 −2

File changed.

Preview size limit exceeded, changes collapsed.

+1 −0
Original line number Diff line number Diff line
- [RICK codebase notes](rick_codebase.md) — key conventions and recurring issues in the RICK radio interferometry imaging codebase
+48 −0
Original line number Diff line number Diff line
---
name: RICK codebase notes
description: Recurring patterns, conventions, and known bug classes in the RICK (Radio Imaging Code Kernels) C++/MPI+OpenMP codebase
type: project
---

RICK is a distributed radio interferometry imaging code (C++/MPI+OpenMP) doing W-projection gridding.

**Decomposition model:**
- 1D mode: full X (x_start=0, xaxis=grid_size_x), Y strip per rank (Gaussian-weighted strip sizes possible).
- 2D mode: sub-rectangle of both axes; requires ≥4 ranks; "2d" or "2d_gaussian" modes via config.
- Visibilities distributed via ghost-region bucket sort: any vis whose kernel footprint (±KernelLen cells) overlaps a rank's domain is sent to that rank. Each rank then clips kernel to its local domain in `wstack.cpp`.

**Why:** Allows each rank to grid disjoint pieces of overlapping kernel footprints — no double-counting and no halo exchange needed. The grid is *output*-decomposed, not input-decomposed.

**How to apply:** Any code touching grid coordinates must convert local↔global. `j_global = j + x_start`, `k_global = k + y_start`. Don't forget x_start (only y_start was historically threaded through).

**Recurring bug classes:**
- Forgetting to clip kmax/jmax to local `[0, yaxis-1]`/`[0, xaxis-1]` after global→local conversion → writes into adjacent w-plane memory or OOB.
- Forgetting `x_start` in coordinate computations (only y_start was originally plumbed through).
- Halo-exchange utilities (`exchange_halos_x/y/2d` in `src/communication/communication.cpp`) have stride bugs (write at `grid + y*xaxis + size_x` which assumes a halo-extended row that isn't allocated). Treat these as broken — use the ghost-region clipping design instead.
- MPI-IO write in `phase_correction` uses `starts[2] = {y_start, 0}` — correct for 1D, broken for 2D (should be `{y_start, x_start}`).
- FITSIO write uses `rank * yaxis` — only valid when all ranks have equal yaxis (not true under Gaussian-weighted decomposition).
- Bucket-sort `v_norm = vvt[i] / grid_size_y` while `vvt` is already in `[0,1]` (normalized by `create_binMS.py`) — dimensional mismatch with `min_vals[r]` that's also in `[0,1]`. Pre-existing in `test_clib.cpp`.
- Kernel index `kKer = increaseprecision * fabs(v_dist + KernelLen)` relies on kernel symmetry; correct for Gauss/KaiserBessel but would be wrong for asymmetric kernels.
- **Layout-mismatch class** (e.g., the 2026 `weight_grid` work): a buffer is allocated as a flat real array `[iw*xaxis*yaxis + i]` at the call site, but written with the interleaved complex stride `2*(j+k*xaxis+grid_w*xaxis*yaxis)` inside `wstack.cpp`. Anything sized like the visibility grid must use either the interleaved index everywhere or the flat index everywhere — never mix.

**WSClean-style sum-of-weights normalization (added 2026):**
- New `weight_grid` plumbed through `gridding → wstack → fftw_data → phase_correction(weight_grid_fft)` to divide image by per-pixel Σweights (summed over w-planes).
- Conceptually correct compared to WSClean's `I = FFT[G]/Σw`; FFT-ing the weight grid with the same backward HeFFTe transform and then summing in image space gives the per-pixel sum-of-gridded-weights, which matches what WSClean computes.
- Distributed correctness requires **per-pixel sum across ranks** for the same (iu,iv,iw), but RICK's grid is output-decomposed (each pixel lives on exactly one rank, weights for that pixel are accumulated locally) — so the post-bucket-sort weight_grid is already complete per pixel; no Allreduce needed for normalization. Halo cells from kernel overlap are still the correctness concern.
- Common mistakes when implementing the chain: (1) the `2*` factor in init/accumulation when the buffer is allocated flat, (2) initialization gated behind `WEIGHTING_UNIFORM/BRIGGS` so NATURAL gets all-zeros and the divide zeros the image, (3) `ww[i]==1.0` skip in the visibility pass but not in the weight pass causing weight/data asymmetry, (4) missing `dwnorm` factor on the weight_sum side leaves a constant residual factor of `num_w_planes`.

**Conventions:**
- `myuint = unsigned int` (32-bit), `myull = unsigned long long`. `iKer` is sometimes `myull` and sometimes `unsigned int` across files — silent truncation risk for very large grids.
- Kernel: `KernelLen = (w_support - 1)/2`; `convkernel` allocated `increaseprecision * w_support` doubles; symmetric.
- Visibility coords (`vv[i]`, `uu[i]`) read from disk are in `[0,1]`. Cell-unit coords obtained via `pos_v = vv[i] / dx` where `dx = 1/grid_size_x`.
- `ww[i] == 1.0` is treated as flagged/skipped in `wstack`, but is also the legitimate maximum w after normalization — suspect logic.
- OpenMP main pass uses `#pragma omp atomic` on grid accumulation. Weighting pre-pass is serial (no parallel-for).
- MPI thread level: `MPI_THREAD_FUNNELED` (only main thread does MPI).

**Weighting refactor (mid-2026):**
- The `WEIGHTING_UNIFORM` / `WEIGHTING_BRIGGS` compile-time switches were eliminated; weighting mode is now selected at runtime from `config.weighting_type` ("NATURAL" / "UNIFORM" / "BRIGGS") with `config.robust` carrying R.
- WSClean parity formulas (must match): Uniform `w_eff = w_vis / Σw_cell`; Briggs `w_eff = w_vis / (1 + Σw_cell · f²)` with `f² = (5·10^{-R})² / (Σ(Σw_cell)² / Σ(Σw_cell))` — note Σ is GLOBAL across the whole density grid (and across MPI ranks → one Allreduce).
- Density grid `weight_uv` is **flat** real-only (no `2*` interleave stride); the prior code allocated 2× and indexed with `2*(j+k*xaxis+grid_w*xaxis*yaxis)` which is a bug pattern from the same family as the `weight_grid` flat/interleaved mismatch.
- Per-vis effective weight must be computed ONCE per visibility at the vis-center cell, NOT inside the per-kernel-cell inner loop (the prior code did the latter and overwrote `out_weight_uniform[visindex]` per cell).
- Pre-pass must be parallel and must skip `ww==1.0` flagged points to stay symmetric with the main gridding pass.
- Use `double` (not `float`) for Σw and Σw² accumulations — float overflows quickly on real datasets and corrupts Briggs `f²`.
+4 −0
Original line number Diff line number Diff line
- [User role and context](user_role.md) — Giovanni Lacopo (INAF), C/C++/MPI/HeFFTe HPC radio-imaging engineer
- [RICK imaging pipeline](project_rick_pipeline.md) — RICK FFT structure, phase-leakage bug, fftshift checkerboard fix
- [Do not compile after edits in RICK](feedback_no_autocompile.md) — never run cmake/make in RICK; user compiles himself
- [RICK Doxygen & conventions](project_rick_doxygen.md) — src/ module map, WSClean ground-truth conventions, fabricated-doc failure mode, known unfixed bugs
Loading