First RICK C++ release (4d34bc09) · Commits · Claudio Gheller / HPC_Imaging

CMakeLists.txt

0 → 100755

+154 −0

Original line number	Diff line number	Diff line
		cmake_minimum_required(VERSION 3.16)

		# Project name and languages
		project(RickMPI LANGUAGES CXX)

		# Set C++20 standard
		set(CMAKE_CXX_STANDARD 20)
		set(CMAKE_CXX_STANDARD_REQUIRED ON)
		set(CMAKE_CXX_EXTENSIONS OFF)

		# --- 1. Project Definitions ---
		# NOTE: the gridding kernel (Gauss vs Kaiser-Bessel) is now selected at
		# RUNTIME from config.kernel_type (parsed from the YAML). The old
		# KAISERBESSEL compile definition has been removed — a single binary now
		# supports both kernels. Do NOT re-add a kernel-selection macro here.
		add_compile_definitions(
		USE_MPI
		STOKESI
		PHASE_ON
		)

		# Debug mode
		if(DEBUG)
		add_compile_definitions(DEBUG)
		set(CMAKE_BUILD_TYPE Debug)
		endif()

		option(ENABLE_FITSIO "Enable FITSIO support" OFF)

		# --- 2. Find Dependencies ---

		# A. MPI (Required)
		find_package(MPI REQUIRED)

		# B. OpenMP (Required)
		find_package(OpenMP REQUIRED)

		# C. FFTW3 + FFTW3_OMP + FFTW3F (Required for fft_library.cpp)
		find_package(PkgConfig)
		pkg_check_modules(FFTW3 REQUIRED fftw3)
		find_library(FFTW3_OMP_LIB NAMES fftw3_omp HINTS ${FFTW3_LIBRARY_DIRS})
		find_library(FFTW3F_LIB NAMES fftw3f HINTS ${FFTW3_LIBRARY_DIRS})

		# D. HeFFTe
		find_path(HEFFTE_INCLUDE_DIR NAMES heffte.h
		HINTS ${HEFFTE_ROOT} $ENV{HEFFTE_ROOT}
		PATH_SUFFIXES include)

		find_library(HEFFTE_LIBRARY NAMES heffte
		HINTS ${HEFFTE_ROOT} $ENV{HEFFTE_ROOT}
		PATH_SUFFIXES lib lib64)

		if(NOT HEFFTE_INCLUDE_DIR OR NOT HEFFTE_LIBRARY)
		message(FATAL_ERROR "HeFFTe not found! Please set HEFFTE_ROOT.")
		endif()

		# E. CFITSIO (Optional)
		if(ENABLE_FITSIO)
		find_package(CFITSIO REQUIRED)
		add_compile_definitions(FITSIO)
		endif()

		# --- 3. Source Files Configuration ---
		set(SOURCES
		test_clib.cpp
		src/config/config_parser.cpp
		src/gridding/convolution.cpp
		src/gridding/weighting.cpp
		src/gridding/stokes.cpp
		src/gridding/sector.cpp
		src/gridding/wstack.cpp
		src/gridding/gridding_data.cpp
		src/gridding/gridding_main.cpp
		src/phase/phase_correction_library.cpp
		src/mpi/mpi_functions.cpp
		src/fft/fft_library.cpp
		src/io/io_functions.cpp
		src/communication/communication.cpp
		src/decomposition/decomposition.cpp
		src/decomposition/decomposition_2d.cpp
		)

		# --- 4. Target Definition ---
		add_executable(rick ${SOURCES})

		# Add a symlink or copy target for convenience (optional)
		# add_custom_target(test_rick
		# COMMAND ${CMAKE_COMMAND} -E echo "Test target ready"
		# DEPENDS rick
		# )

		# --- 5. Compiler Flags & Optimizations ---
		if(CMAKE_CXX_COMPILER_ID MATCHES "GNU\|Clang\|Intel")

		# Initialize a list with common optimization and warning flags
		# -O3: High-level optimization
		# -Wno-unused-result: Silence warnings for ignored return values (e.g., fscanf)
		set(RICK_FLAGS "-O3" "-Wno-unused-result")

		# Architecture-specific optimizations
		if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64\|amd64")
		# Flags for Intel/AMD architectures
		# -march=native: Optimize for the host CPU instructions
		# -mavx -mavx2: Enable Advanced Vector Extensions
		list(APPEND RICK_FLAGS "-march=native" "-mtune=native" "-mavx" "-mavx2")
		message(STATUS "Architecture detected as x86_64. Enabling AVX/AVX2 flags.")

		elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64\|arm64")
		# Flags for ARM architectures (Apple Silicon, AWS Graviton, etc.)
		# AVX flags are removed as they are incompatible with ARM
		list(APPEND RICK_FLAGS "-march=native" "-mtune=native")
		message(STATUS "Architecture detected as ARM64. Using NEON/Native optimizations.")

		else()
		# Fallback for other architectures
		message(STATUS "Unknown architecture (${CMAKE_SYSTEM_PROCESSOR}). Using generic O3.")
		endif()

		# Apply the gathered flags to the 'rick' target
		# We also include OpenMP specific flags detected by find_package(OpenMP)
		target_compile_options(rick PRIVATE ${RICK_FLAGS} ${OpenMP_CXX_FLAGS})

		endif()

		# --- 6. Include Directories and Linking ---
		target_include_directories(rick PRIVATE
		${MPI_CXX_INCLUDE_DIRS}
		${HEFFTE_INCLUDE_DIR}
		${FFTW3_INCLUDE_DIRS}
		${CMAKE_CURRENT_SOURCE_DIR}
		${CMAKE_CURRENT_SOURCE_DIR}/src
		${CMAKE_CURRENT_SOURCE_DIR}/src/config
		${CMAKE_CURRENT_SOURCE_DIR}/src/gridding
		${CMAKE_CURRENT_SOURCE_DIR}/src/communication
		${CMAKE_CURRENT_SOURCE_DIR}/src/io
		${CMAKE_CURRENT_SOURCE_DIR}/src/decomposition
		)

		target_link_libraries(rick PRIVATE
		MPI::MPI_CXX
		OpenMP::OpenMP_CXX
		${HEFFTE_LIBRARY}
		${FFTW3_LIBRARIES}
		${FFTW3_OMP_LIB}
		${FFTW3F_LIB}
		m
		)

		if(ENABLE_FITSIO)
		target_link_libraries(rick PRIVATE CFITSIO::CFITSIO)
		endif()

		# --- 7. Installation ---
		install(TARGETS rick DESTINATION bin)

README.md

+554 −2

File changed.

Preview size limit exceeded, changes collapsed.

agents/claude_agents/agent-memory/cpp-hpc-reviewer/MEMORY.md

0 → 100644

+1 −0

Original line number	Diff line number	Diff line
		- [RICK codebase notes](rick_codebase.md) — key conventions and recurring issues in the RICK radio interferometry imaging codebase

agents/claude_agents/agent-memory/cpp-hpc-reviewer/rick_codebase.md

0 → 100644

+48 −0

Original line number	Diff line number	Diff line
		---
		name: RICK codebase notes
		description: Recurring patterns, conventions, and known bug classes in the RICK (Radio Imaging Code Kernels) C++/MPI+OpenMP codebase
		type: project
		---

		RICK is a distributed radio interferometry imaging code (C++/MPI+OpenMP) doing W-projection gridding.

		Decomposition model:
		- 1D mode: full X (x_start=0, xaxis=grid_size_x), Y strip per rank (Gaussian-weighted strip sizes possible).
		- 2D mode: sub-rectangle of both axes; requires ≥4 ranks; "2d" or "2d_gaussian" modes via config.
		- Visibilities distributed via ghost-region bucket sort: any vis whose kernel footprint (±KernelLen cells) overlaps a rank's domain is sent to that rank. Each rank then clips kernel to its local domain in `wstack.cpp`.

		Why: Allows each rank to grid disjoint pieces of overlapping kernel footprints — no double-counting and no halo exchange needed. The grid is output-decomposed, not input-decomposed.

		How to apply: Any code touching grid coordinates must convert local↔global. `j_global = j + x_start`, `k_global = k + y_start`. Don't forget x_start (only y_start was historically threaded through).

		Recurring bug classes:
		- Forgetting to clip kmax/jmax to local `[0, yaxis-1]`/`[0, xaxis-1]` after global→local conversion → writes into adjacent w-plane memory or OOB.
		- Forgetting `x_start` in coordinate computations (only y_start was originally plumbed through).
		- Halo-exchange utilities (`exchange_halos_x/y/2d` in `src/communication/communication.cpp`) have stride bugs (write at `grid + y*xaxis + size_x` which assumes a halo-extended row that isn't allocated). Treat these as broken — use the ghost-region clipping design instead.
		- MPI-IO write in `phase_correction` uses `starts[2] = {y_start, 0}` — correct for 1D, broken for 2D (should be `{y_start, x_start}`).
		- FITSIO write uses `rank * yaxis` — only valid when all ranks have equal yaxis (not true under Gaussian-weighted decomposition).
		- Bucket-sort `v_norm = vvt[i] / grid_size_y` while `vvt` is already in `[0,1]` (normalized by `create_binMS.py`) — dimensional mismatch with `min_vals[r]` that's also in `[0,1]`. Pre-existing in `test_clib.cpp`.
		- Kernel index `kKer = increaseprecision * fabs(v_dist + KernelLen)` relies on kernel symmetry; correct for Gauss/KaiserBessel but would be wrong for asymmetric kernels.
		- Layout-mismatch class (e.g., the 2026 `weight_grid` work): a buffer is allocated as a flat real array `[iwxaxisyaxis + i]` at the call site, but written with the interleaved complex stride `2(j+kxaxis+grid_wxaxisyaxis)` inside `wstack.cpp`. Anything sized like the visibility grid must use either the interleaved index everywhere or the flat index everywhere — never mix.

		WSClean-style sum-of-weights normalization (added 2026):
		- New `weight_grid` plumbed through `gridding → wstack → fftw_data → phase_correction(weight_grid_fft)` to divide image by per-pixel Σweights (summed over w-planes).
		- Conceptually correct compared to WSClean's `I = FFT[G]/Σw`; FFT-ing the weight grid with the same backward HeFFTe transform and then summing in image space gives the per-pixel sum-of-gridded-weights, which matches what WSClean computes.
		- Distributed correctness requires per-pixel sum across ranks for the same (iu,iv,iw), but RICK's grid is output-decomposed (each pixel lives on exactly one rank, weights for that pixel are accumulated locally) — so the post-bucket-sort weight_grid is already complete per pixel; no Allreduce needed for normalization. Halo cells from kernel overlap are still the correctness concern.
		- Common mistakes when implementing the chain: (1) the `2*` factor in init/accumulation when the buffer is allocated flat, (2) initialization gated behind `WEIGHTING_UNIFORM/BRIGGS` so NATURAL gets all-zeros and the divide zeros the image, (3) `ww[i]==1.0` skip in the visibility pass but not in the weight pass causing weight/data asymmetry, (4) missing `dwnorm` factor on the weight_sum side leaves a constant residual factor of `num_w_planes`.

		Conventions:
		- `myuint = unsigned int` (32-bit), `myull = unsigned long long`. `iKer` is sometimes `myull` and sometimes `unsigned int` across files — silent truncation risk for very large grids.
		- Kernel: `KernelLen = (w_support - 1)/2`; `convkernel` allocated `increaseprecision * w_support` doubles; symmetric.
		- Visibility coords (`vv[i]`, `uu[i]`) read from disk are in `[0,1]`. Cell-unit coords obtained via `pos_v = vv[i] / dx` where `dx = 1/grid_size_x`.
		- `ww[i] == 1.0` is treated as flagged/skipped in `wstack`, but is also the legitimate maximum w after normalization — suspect logic.
		- OpenMP main pass uses `#pragma omp atomic` on grid accumulation. Weighting pre-pass is serial (no parallel-for).
		- MPI thread level: `MPI_THREAD_FUNNELED` (only main thread does MPI).

		Weighting refactor (mid-2026):
		- The `WEIGHTING_UNIFORM` / `WEIGHTING_BRIGGS` compile-time switches were eliminated; weighting mode is now selected at runtime from `config.weighting_type` ("NATURAL" / "UNIFORM" / "BRIGGS") with `config.robust` carrying R.
		- WSClean parity formulas (must match): Uniform `w_eff = w_vis / Σw_cell`; Briggs `w_eff = w_vis / (1 + Σw_cell · f²)` with `f² = (5·10^{-R})² / (Σ(Σw_cell)² / Σ(Σw_cell))` — note Σ is GLOBAL across the whole density grid (and across MPI ranks → one Allreduce).
		- Density grid `weight_uv` is flat real-only (no `2` interleave stride); the prior code allocated 2× and indexed with `2(j+kxaxis+grid_wxaxis*yaxis)` which is a bug pattern from the same family as the `weight_grid` flat/interleaved mismatch.
		- Per-vis effective weight must be computed ONCE per visibility at the vis-center cell, NOT inside the per-kernel-cell inner loop (the prior code did the latter and overwrote `out_weight_uniform[visindex]` per cell).
		- Pre-pass must be parallel and must skip `ww==1.0` flagged points to stay symmetric with the main gridding pass.
		- Use `double` (not `float`) for Σw and Σw² accumulations — float overflows quickly on real datasets and corrupts Briggs `f²`.

agents/claude_agents/agent-memory/radioastronomy-expert/MEMORY.md

0 → 100644

+4 −0

Original line number	Diff line number	Diff line
		- [User role and context](user_role.md) — Giovanni Lacopo (INAF), C/C++/MPI/HeFFTe HPC radio-imaging engineer
		- [RICK imaging pipeline](project_rick_pipeline.md) — RICK FFT structure, phase-leakage bug, fftshift checkerboard fix
		- [Do not compile after edits in RICK](feedback_no_autocompile.md) — never run cmake/make in RICK; user compiles himself
		- [RICK Doxygen & conventions](project_rick_doxygen.md) — src/ module map, WSClean ground-truth conventions, fabricated-doc failure mode, known unfixed bugs