Source code for rfmix_reader.io.chunk

"""
Adapted from the `_chunk.py` script in the `pandas-plink` package.
Source: https://github.com/limix/pandas-plink/blob/main/pandas_plink/_chunk.py
"""
from typing import Optional
from dataclasses import dataclass

__all__ = ["Chunk"]



[docs]
@dataclass
class Chunk:
    """
    Chunk specification for a contiguous submatrix of the haplotype matrix.

    Parameters
    ----------
    nsamples : Optional[int], default=1024
        Number of samples in a single chunk, limited by the total number of
        samples. Set to `None` to include all samples.
    nloci : Optional[int], default=1024
        Number of loci in a single chunk, limited by the total number of
        loci. Set to `None` to include all loci.

    Notes
    -----
    - Small chunks may increase computational time, while large chunks may increase
      memory usage.
    - For small datasets, try setting both `nsamples` and `nloci` to `None`.
    - For large datasets where you need to use every sample, try setting `nsamples=None`
      and choose a small value for `nloci`.
    """
    nsamples: Optional[int] = 1024
    nloci: Optional[int] = 1024