Commit 19cd20e6 authored by Giovanni Lacopo's avatar Giovanni Lacopo
Browse files

casacore + pixel size fixed

parent fd09fa13
Loading
Loading
Loading
Loading
+74 −1
Original line number Diff line number Diff line
@@ -3,11 +3,25 @@ cmake_minimum_required(VERSION 3.16)
# Project name and languages
project(RickMPI LANGUAGES CXX)

# Set C++20 standard
# Set C++20 standard (required project-wide: RICK uses std::numbers, a C++20
# feature, in test_clib.cpp and src/gridding/{gridding_data,wstack}.cpp and
# src/deconvolution/clean_solver.cpp).
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# casacore at ${CASACORE_ROOT} was built for C++14: its Containers/Allocator.h
# relies on std::allocator<T>::pointer / const_pointer / reference /
# const_reference, which C++17 removed (deprecated since C++11). Compiling the
# casacore-including TU under C++20 therefore fails. ms_parallel_reader.cpp is
# the ONLY translation unit that includes casacore headers, and it itself uses
# no C++17/20 features (no std::numbers, structured bindings, if constexpr,
# optional/variant/string_view/span/format). The only types it exposes across
# the TU boundary (in ms_parallel_reader.h) are std::vector, std::string and
# PODs, which are ABI-compatible between C++14 and C++20 on a single GCC, so
# this per-file downgrade is safe. The per-file override is applied below,
# after the target is defined.

# --- 1. Project Definitions ---
# NOTE: the gridding kernel (Gauss vs Kaiser-Bessel) is now selected at
# RUNTIME from config.kernel_type (parsed from the YAML). The old
@@ -60,6 +74,50 @@ if(ENABLE_FITSIO)
    add_compile_definitions(FITSIO)
endif()

# F. casacore (MeasurementSet C++ I/O for the native MS reader)
# casacore does NOT ship a CMake config package; use pkg-config with a manual
# find_library fallback that honours CASACORE_ROOT / CASACORE_HOME.
option(ENABLE_CASACORE "Enable casacore MS reader" ON)
if(ENABLE_CASACORE)
    find_package(PkgConfig)
    if(PkgConfig_FOUND)
        pkg_check_modules(CASACORE casacore)
    endif()
    if(NOT CASACORE_FOUND)
        # Manual fallback: search standard prefixes.
        find_path(CASACORE_INCLUDE_DIR
            NAMES casacore/ms/MeasurementSets/MeasurementSet.h
            HINTS /usr/include /usr/local/include
                  $ENV{CASACORE_ROOT}/include
                  $ENV{CASACORE_HOME}/include)
        find_library(CASACORE_MS_LIB       NAMES casa_ms
            HINTS $ENV{CASACORE_ROOT}/lib $ENV{CASACORE_HOME}/lib)
        find_library(CASACORE_TABLES_LIB   NAMES casa_tables
            HINTS $ENV{CASACORE_ROOT}/lib $ENV{CASACORE_HOME}/lib)
        find_library(CASACORE_MEASURES_LIB NAMES casa_measures
            HINTS $ENV{CASACORE_ROOT}/lib $ENV{CASACORE_HOME}/lib)
        find_library(CASACORE_SCIMATH_LIB  NAMES casa_scimath
            HINTS $ENV{CASACORE_ROOT}/lib $ENV{CASACORE_HOME}/lib)
        find_library(CASACORE_CASA_LIB     NAMES casa_casa
            HINTS $ENV{CASACORE_ROOT}/lib $ENV{CASACORE_HOME}/lib)
        if(CASACORE_INCLUDE_DIR AND CASACORE_MS_LIB AND CASACORE_TABLES_LIB
                                AND CASACORE_CASA_LIB)
            set(CASACORE_FOUND TRUE)
            set(CASACORE_INCLUDE_DIRS ${CASACORE_INCLUDE_DIR})
            set(CASACORE_LIBRARIES
                ${CASACORE_MS_LIB} ${CASACORE_TABLES_LIB}
                ${CASACORE_MEASURES_LIB} ${CASACORE_SCIMATH_LIB}
                ${CASACORE_CASA_LIB})
        endif()
    endif()
    if(CASACORE_FOUND)
        add_compile_definitions(HAVE_CASACORE)
        message(STATUS "casacore found: ${CASACORE_LIBRARIES}")
    else()
        message(FATAL_ERROR "casacore not found. Set CASACORE_ROOT or install it.")
    endif()
endif()

# --- 3. Source Files Configuration ---
set(SOURCES
    test_clib.cpp
@@ -75,6 +133,8 @@ set(SOURCES
    src/mpi/mpi_functions.cpp
    src/fft/fft_library.cpp
    src/io/io_functions.cpp
    # Native zero-dependency MS ingestion (replaces Python create_binMS.py)
    src/io/ms_parallel_reader.cpp
    src/communication/communication.cpp
    src/decomposition/decomposition.cpp
    src/decomposition/decomposition_2d.cpp
@@ -86,6 +146,17 @@ set(SOURCES
# --- 4. Target Definition ---
add_executable(rick ${SOURCES})

# Per-file C++14 override for the casacore-including translation unit.
# See the note at the top of this file: casacore's headers do not compile under
# C++17/20 because they use std::allocator<T>::{pointer,const_pointer,reference,
# const_reference}, removed in C++17. ms_parallel_reader.cpp uses no C++17/20
# features and exposes only C++11-era types across its header boundary, so
# compiling just this TU at -std=c++14 is correct and ABI-safe while the rest of
# RICK stays at C++20. -std=c++14 (not gnu++14) keeps extensions off, matching
# CMAKE_CXX_EXTENSIONS OFF for the rest of the project.
set_source_files_properties(src/io/ms_parallel_reader.cpp
    PROPERTIES COMPILE_FLAGS "-std=c++14")

# Add a symlink or copy target for convenience (optional)
# add_custom_target(test_rick
#     COMMAND ${CMAKE_COMMAND} -E echo "Test target ready"
@@ -138,6 +209,7 @@ target_include_directories(rick PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}/src/io
    ${CMAKE_CURRENT_SOURCE_DIR}/src/decomposition
    ${CMAKE_CURRENT_SOURCE_DIR}/src/deconvolution
    ${CASACORE_INCLUDE_DIRS}
)

target_link_libraries(rick PRIVATE
@@ -147,6 +219,7 @@ target_link_libraries(rick PRIVATE
    ${FFTW3_LIBRARIES}
    ${FFTW3_OMP_LIB}
    ${FFTW3F_LIB}
    ${CASACORE_LIBRARIES}
    m
)

MS_DATA_CONTRACT.md

0 → 100644
+88 −0
Original line number Diff line number Diff line
# MS Data Contract: Measurement Set to RICK Binary Format

This document defines the data contract for extracting data from a CASA Measurement Set (MS) and converting it into the RICK binary format used by the imager.

## 1. Column Mapping Table

The following table maps MS columns to their Python types and the corresponding C++ casacore types for implementation in a parallel reader.

| MS Column | Python Type | C++ Casacore Type | Dimensions | Notes |
| :--- | :--- | :--- | :--- | :--- |
| `UVW` | `np.float64` | `ROArrayColumn<double>` | `[N_rows, 3]` | Coordinates in meters |
| `DATA` | `complex128` | `ROArrayColumn<std::complex<double>>` | `[N_rows, N_chan, N_pol]` | Visibilities |
| `FLAG` | `bool` | `ROArrayColumn<bool>` | `[N_rows, N_chan, N_pol]` | Data quality flags |
| `WEIGHT_SPECTRUM`| `np.float64` | `ROArrayColumn<double>` | `[N_rows, N_chan, N_pol]` | Per-channel weights (preferred) |
| `WEIGHT` | `np.float64` | `ROScalarColumn<double>` | `[N_rows]` | Per-row weights (fallback) |
| `ANTENNA1` | `int` | `ROScalarColumn<int>` | `[N_rows]` | Antenna 1 ID |
| `ANTENNA2` | `int` | `ROScalarColumn<int>` | `[N_rows]` | Antenna 2 ID |
| `DATA_DESC_ID` | `int` | `ROScalarColumn<int>` | `[N_rows]` | Spectral Window ID |
| `CHAN_FREQ` | `np.float64` | `ROArrayColumn<double>` | `[N_spw, N_chan]` | (Sub-table: `SPECTRAL_WINDOW`) Frequencies in Hz |
| `INTERVAL` | `float` | `ROScalarColumn<double>` | `[1]` | (Sub-table: `TIME`) Time per sample in seconds |

## 2. Binary File Layout (RICK Format)

The converter produces a directory containing the following binary files. All coordinates are normalized and visibilities are cast to single precision.

| File Name | Data Type | Array Shape | Comment |
| :--- | :--- | :--- | :--- |
| `ucoord.bin` | `float64` | `[2 * N_rows * N_chan]` | Normalized $u$ coordinate $\in [0, 1]$ |
| `vcoord.bin` | `float64` | `[2 * N_rows * N_chan]` | Normalized $v$ coordinate $\in [0, 1]$ |
| `wcoord.bin` | `float64` | `[2 * N_rows * N_chan]` | Normalized $w$ coordinate $\in [0, 0.9999]$ |
| `visibilities_real.bin`| `float32` | `[2 * N_rows * N_chan * N_pol]` | Real part of visibilities (C-order) |
| `visibilities_img.bin` | `float32` | `[2 * N_rows * N_chan * N_pol]` | Imaginary part of visibilities (C-order) |
| `weights.bin` | `float32` | `[2 * N_rows * N_chan * N_pol]` | Weights (broadcast if per-row) |
| `meta.txt` | `string` | N/A | Metadata and scaling parameters |

### Coordinate Normalization
- $u, v$ are normalized using a symmetric max range: $u_{norm} = (u_{wave} + maxg) / (2 \cdot maxg)$.
- $w$ is normalized: $w_{norm} = (w_{wave} - w_{min}) / (w_{max} - w_{min})$.
- Values are clamped at $0.9999$ to avoid sentinel conflicts in the RICK gridder.

### Hermitian Symmetry
The binary files contain double the original MS rows to satisfy the FFT requirement $V(-u, -v, -w) = V^*(u, v, w)$.
- For every original row $(u, v, w, V)$, a conjugate row $(-u, -v, -w, V^*)$ is appended.

## 3. Metadata Format (`meta.txt`)

The metadata file is a line-separated text file containing the following fields:

| Line | Field | Type | Meaning |
| :--- | :--- | :--- | :--- |
| 1 | `Nmeasures` | `int` | Total rows in binary files ($2 \times N_{rows} \times N_{chan}$) |
| 2 | `Nvis` | `int` | Total visibility samples ($N_{measures} \times N_{pol}$) |
| 3 | `freq_per_chan` | `int` | Always `1` (due to UVW expansion) |
| 4 | `polarisations` | `int` | Number of polarizations per sample |
| 5 | `Ntime` | `int` | Number of timesteps |
| 6 | `timepersample` | `float` | Time interval between samples (seconds) |
| 7 | `total_obs_time` | `float` | Total observation time (hours) |
| 8 | `Nbaselines` | `int` | Number of unique antenna baselines |
| 9 | `uvmin` | `float` | $-maxg$ (lower bound of normalized UV in wavelengths) |
| 10 | `uvmax` | `float` | $maxg$ (upper bound of normalized UV in wavelengths) |
| 11 | `wmin` | `float` | Minimum $w$ value in meters |
| 12 | `wmax` | `float` | Maximum $w$ value in meters |

## 4. C++ API Mapping

To implement the parallel reader in C++, the following `casacore` API mappings should be used.

### Header Requirements
```cpp
#include <cascore/tables/Table.h>
#include <cascore/tables/ROScalarColumn.h>
#include <cascore/tables/ROArrayColumn.h>
#include <cascore/tables/TableQuery.h>
```

### Function Mapping

| Python-Casacore Call | C++ Casacore Equivalent | Notes |
| :--- | :--- | :--- |
| `pt.table(msfile, readonly=True)` | `Table(msfile, Table::ReadOnly)` | Opens the MS in read-only mode. |
| `table.getcol('COL')` (Scalar) | `table.getCol("COL")` $\rightarrow$ `ROScalarColumn<T>` | Extract as `ROScalarColumn<double>` or `int`. |
| `table.getcol('COL')` (Array) | `table.getCol("COL")` $\rightarrow$ `ROArrayColumn<T>` | Extract as `ROArrayColumn<double>` etc. |
| `table.nrows()` | `table.nrow()` | Returns the number of rows in the table. |
| `table.query(expr)` | `TableQuery query(table, expr)` | Used for SPW filtering (`DATA_DESC_ID == X`). |
| `ms.getcol('COL', start, len)` | `col.get(start, len, buffer)` | `buffer` is a pointer to the destination array. |

### Concurrency Note
`Table::ReadOnly` mode is designed for concurrent read-only access across multiple threads/processes. For high-performance parallel reading, use `TableQuery` to split the `nrow()` range across MPI ranks.
+52 −0
Original line number Diff line number Diff line
@@ -1072,3 +1072,55 @@ i.e. circular polarisation maps to the **imaginary** part of the (XY − YX) cro
---

*Documentation version: 2.1 | Last updated: June 2026*

---

## Native Binary MS Ingestion Layer

`NativeMSReader` (`src/io/ms_parallel_reader.h/.cpp`) replaces both
`create_binMS.py` and the casacore-based draft.  It reads CASA
Measurement Sets directly from their on-disk binary format with
**zero external library dependencies** beyond standard C++ and MPI.

### CASA MS On-Disk Structure

Array columns (UVW, DATA, FLAG, WEIGHT_SPECTRUM) are stored by
`TiledShapeStMan` in `table.f<n>_TSM<i>` files — back-to-back
fixed-size tiles of raw Fortran-order binary data, with no per-tile
header.

### Byte-Offset Arithmetic

Let $T_r$ = tile rows, $P$ = polarisations, $C$ = channels,
$s$ = `sizeof(element)`.

Tile index for MS row $r$:
$$t = \left\lfloor \frac{r}{T_r} \right\rfloor$$

Byte offset of tile $t$ in the TSM data file:
$$\delta_{\text{tile}}(t) = \delta_0 + t \cdot T_r \cdot P \cdot C \cdot s$$

Intra-tile byte offset of element $(p,\,c,\,r \bmod T_r)$ (Fortran order):
$$\delta_{\text{elem}} = \bigl((r \bmod T_r)\cdot C\cdot P + c\cdot P + p\bigr)\cdot s$$

### MPI Row Distribution

Rank $k$ owns rows $[r_k,\; r_k + N_k)$ where:
$$N_k = \left\lfloor \frac{N_{\text{rows}}}{P} \right\rfloor + \bigl[k < N_{\text{rows}} \bmod P\bigr]$$
$$r_k = k\left\lfloor \frac{N_{\text{rows}}}{P} \right\rfloor + \min(k,\; N_{\text{rows}} \bmod P)$$

### Processing Pipeline

```
[TSM tile read (chunked)] → [flag zeroing] → [per-channel λ expansion]
   → [MPI_Allreduce maxg/wmin/wmax] → [Hermitian extension]
   → [UV→[0,1], W→[0,0.9999] normalisation] → [MPI-IO collective write]
```

### Usage

```cpp
rick::io::NativeMSReader reader("/path/to/obs.ms", rank, size);
reader.readAndProcess();
reader.writeBinaryFiles("/path/to/output.binMS");
```

SUMMARY.md

0 → 100644
+393 −0

File added.

Preview size limit exceeded, changes collapsed.

+17.3 KiB

File added.

No diff preview for this file type.

Loading