Skip to content

Command Line Interface

cli

Rekx is a command line interface to Kerchunk

constants

hardcodings

Hardcodings

typer_parameters

OrderCommands

Bases: TyperGroup

list_commands

list_commands(ctx: Context)

Return list of commands in the order they appear.

See also
  • https://github.com/tiangolo/typer/issues/428#issuecomment-1238866548
Source code in rekx/typer_parameters.py
def list_commands(self, ctx: Context):
    """Return list of commands in the order they appear.

    See also
    --------
    - https://github.com/tiangolo/typer/issues/428#issuecomment-1238866548

    """
    return list(self.commands)

callback_input_path

callback_input_path(input_path: Path)
Source code in rekx/typer_parameters.py
def callback_input_path(input_path: Path):
    """ """
    from rich import print

    if not input_path.exists():
        print(f"[red]The path [code]{input_path}[/code] does not exist[/red].")
        raise typer.Exit()

    if not input_path.is_file() and not input_path.is_dir():
        print(f"[red]The path: [code]{input_path}[/code] is not valid[/red].")
        raise typer.Exit()

    return input_path

callback_source_directory

callback_source_directory(directory: Path)
Source code in rekx/typer_parameters.py
def callback_source_directory(directory: Path):
    """ """
    if not directory.exists() or not any(directory.iterdir()):
        print(
            f"[red]The directory [code]{directory}[/code] does not exist or is empty[/red]."
        )
    return directory

rich_help_panel_names

models

get_file_format

get_file_format(file_path: Path) -> FileFormat

Get the format from the filename extension.

Source code in rekx/models.py
def get_file_format(file_path: Path) -> FileFormat:
    """
    Get the format from the filename extension.
    """
    file_extension = file_path.suffix.lower()
    return FileFormat(file_extension)

select_netcdf_variable_set_from_dataset

select_netcdf_variable_set_from_dataset(
    netcdf4_variable_set: Type[Enum],
    variable_set: List[Enum],
    dataset: Dataset,
)

The same Enum model for both : netcdf4_variable_set and xarray_variable_set

Source code in rekx/models.py
def select_netcdf_variable_set_from_dataset(
    netcdf4_variable_set: Type[enum.Enum],
    variable_set: List[enum.Enum],
    dataset: netCDF4.Dataset,
):
    """
    The same Enum model for both : netcdf4_variable_set and xarray_variable_set
    """
    metadata_attributes = {"record_status", "bnds"}
    coordinates_data_attributes = {"lat_bnds", "lon_bnds"}
    time_coordinate = {"time"}
    dimensions_attributes = set(dataset.dimensions)  # no `coordinates` via netCDF4
    variables_attributes = set(dataset.variables)
    data_attributes = (
        variables_attributes
        - dimensions_attributes
        - coordinates_data_attributes
        - metadata_attributes
    )

    if variable_set == netcdf4_variable_set.all:
        return variables_attributes

    elif variable_set == netcdf4_variable_set.coordinates:
        return dimensions_attributes  # Same as next one ?

    elif variable_set == netcdf4_variable_set.coordinates_without_data:
        return dimensions_attributes

    elif variable_set == netcdf4_variable_set.data:
        return data_attributes

    elif variable_set == netcdf4_variable_set.metadata:
        return metadata_attributes.intersection(variables_attributes)

    elif variable_set == netcdf4_variable_set.time:
        return time_coordinate

    else:
        raise ValueError("Invalid category")

select_xarray_variable_set_from_dataset

select_xarray_variable_set_from_dataset(
    xarray_variable_set: Type[Enum],
    variable_set: List[Enum],
    dataset: Dataset,
)

Select user-requested set of variables from an Xarray dataset.

Parameters:

Name Type Description Default
xarray_variable_set Type[Enum]

The Enum model to use for selection

required
variable_set List[Enum]

The user-requested sets of variables to select based on the Enum model

required
dataset Dataset

The input Xarray dataset from which to extract the user-requested variables

required

Returns:

Type Description
Examples
--------
Notes

Is quasi-identical to the function select_netcdf_variable_set_from_dataset() with differences in terms of the names of attributes. See also docstring of other function.

Source code in rekx/models.py
def select_xarray_variable_set_from_dataset(
    xarray_variable_set: Type[enum.Enum],
    variable_set: List[enum.Enum],
    dataset: xr.Dataset,
):
    """
    Select user-requested set of variables from an Xarray dataset.

    Parameters
    ----------
    xarray_variable_set: enum.Enum
        The Enum model to use for selection

    variable_set: List[enum.Enum]
        The user-requested sets of variables to select based on the Enum model

    dataset: xr.Dataset
        The input Xarray dataset from which to extract the user-requested
        variables

    Returns
    -------


    Examples
    --------


    Notes
    -----
    Is quasi-identical to the function
    select_netcdf_variable_set_from_dataset() with differences in terms of the
    names of attributes. See also docstring of other function.
    """
    # Hardcoded ! ---------------------------------------------
    metadata_attributes = {"record_status"}
    coordinates_data_attributes = {"lat_bnds", "lon_bnds"}
    time_coordinate = {"time"}
    variables_attributes = set(dataset.variables)
    coordinates_attributes = set(dataset.coords)
    data_attributes = (
        set(dataset.data_vars) - coordinates_data_attributes - metadata_attributes
    )
    # --------------------------------------------- Hardcoded !

    if variable_set == xarray_variable_set.all:
        return variables_attributes

    elif variable_set == xarray_variable_set.coordinates:
        return coordinates_attributes

    elif variable_set == xarray_variable_set.coordinates_without_data:
        return coordinates_attributes - coordinates_data_attributes

    elif variable_set == xarray_variable_set.data:
        # return data - coordinates_data - metadata
        return data_attributes - coordinates_data_attributes - metadata_attributes

    elif variable_set == xarray_variable_set.metadata:
        return metadata_attributes.intersection(variables_attributes)

    elif variable_set == xarray_variable_set.time:
        return time_coordinate

    else:
        raise ValueError("Invalid category")

utilities

get_scale_and_offset

get_scale_and_offset(netcdf)

Get scale and offset values from a netCDF file

Source code in rekx/utilities.py
def get_scale_and_offset(netcdf):
    """Get scale and offset values from a netCDF file"""
    dataset = netCDF4.Dataset(netcdf)
    netcdf_dimensions = set(dataset.dimensions)
    netcdf_dimensions.update(
        {"lon", "longitude", "lat", "latitude"}
    )  # all space dimensions?
    netcdf_variables = set(dataset.variables)
    variable = str(
        list(netcdf_variables.difference(netcdf_dimensions))[0]
    )  # single variable name!

    if "scale_factor" in dataset[variable].ncattrs():
        scale_factor = dataset[variable].scale_factor
    else:
        scale_factor = None

    if "add_offset" in dataset[variable].ncattrs():
        add_offset = dataset[variable].add_offset
    else:
        add_offset = None

    return (scale_factor, add_offset)

select_location_time_series

select_location_time_series(
    time_series: Path = None,
    longitude: float = None,
    latitude: float = None,
    mask_and_scale: bool = False,
    neighbor_lookup: MethodForInexactMatches = MethodForInexactMatches.nearest,
    tolerance: float = 0.1,
    in_memory: bool = False,
    verbose: int = VERBOSE_LEVEL_DEFAULT,
)

Select a location from a time series dataset format supported by xarray

Source code in rekx/utilities.py
def select_location_time_series(
    time_series: Path = None,
    longitude: float = None,  # Longitude = None,
    latitude: float = None,  # Latitude = None,
    mask_and_scale: bool = False,
    neighbor_lookup: MethodForInexactMatches = MethodForInexactMatches.nearest,
    tolerance: float = 0.1,
    in_memory: bool = False,
    verbose: int = VERBOSE_LEVEL_DEFAULT,
):
    """Select a location from a time series dataset format supported by
    xarray"""
    data_array = open_data_array(
        time_series,
        mask_and_scale,
        in_memory,
    )
    indexers = set_location_indexers(
        data_array=data_array,
        longitude=longitude,
        latitude=latitude,
        verbose=verbose,
    )
    try:
        location_time_series = data_array.sel(
            **indexers,
            method=neighbor_lookup,
            tolerance=tolerance,
        )
        location_time_series.load()  # load into memory for fast processing

    except Exception as exception:
        print(f"{ERROR_IN_SELECTING_DATA} : {exception}")
        raise SystemExit(33)

    if verbose == 3:
        debug(locals())

    return location_time_series

set_location_indexers

set_location_indexers(
    data_array,
    longitude: float = None,
    latitude: float = None,
    verbose: int = VERBOSE_LEVEL_DEFAULT,
)

Select single pair of coordinates from a data array

Will select center coordinates if none of (longitude, latitude) are provided.

Source code in rekx/utilities.py
def set_location_indexers(
    data_array,
    longitude: float = None,  # Longitude = None,
    latitude: float = None,  # Latitude = None,
    verbose: int = VERBOSE_LEVEL_DEFAULT,
):
    """Select single pair of coordinates from a data array

    Will select center coordinates if none of (longitude, latitude) are
    provided.
    """
    # ----------------------------------------------------------- Deduplicate me
    # Ugly hack for when dimensions 'longitude', 'latitude' are not spelled out!
    # Use `coords` : a time series of a single pair of coordinates has only a `time` dimension!
    indexers = {}
    dimensions = [
        dimension for dimension in data_array.coords if isinstance(dimension, str)
    ]
    if set(["lon", "lat"]) & set(dimensions):
        x = "lon"
        y = "lat"
    elif set(["longitude", "latitude"]) & set(dimensions):
        x = "longitude"
        y = "latitude"

    if x and y:
        logger.info(f"Dimensions  : {x}, {y}")

    if not (longitude and latitude):
        warning = f"{exclamation_mark} Coordinates (longitude, latitude) not provided. Selecting center coordinates."
        logger.warning(warning)
        print(warning)

        center_longitude = float(data_array[x][len(data_array[x]) // 2])
        center_latitude = float(data_array[y][len(data_array[y]) // 2])
        indexers[x] = center_longitude
        indexers[y] = center_latitude

        text_coordinates = f"{check_mark} Center coordinates (longitude, latitude) : {center_longitude}, {center_latitude}."

    else:
        indexers[x] = longitude
        indexers[y] = latitude
        text_coordinates = f"{check_mark} Coordinates : {longitude}, {latitude}."

    logger.info(text_coordinates)

    if verbose > 0:
        print(text_coordinates)

    if verbose == 3:
        debug(locals())

    return indexers

conversions

convert_to_radians

convert_to_radians(
    ctx: Context, param: CallbackParam, angle: float
) -> float

Convert floating point angular measurement from degrees to radians.

Source code in rekx/conversions.py
def convert_to_radians(
    ctx: typer.Context, param: typer.CallbackParam, angle: float
) -> float:
    """Convert floating point angular measurement from degrees to radians."""
    if ctx.resilient_parsing:
        return
    if type(angle) != float:
        raise typer.BadParameter("Input should be a float!")

    return np.radians(angle)

timestamp

generate_datetime_series

generate_datetime_series(
    start_time: Optional[str] = None,
    end_time: Optional[str] = None,
    frequency: Optional[str] = TIMESTAMPS_FREQUENCY_DEFAULT,
)
Example

start_time = '2010-06-01 06:00:00' end_time = '2010-06-01 08:00:00' frequency = 'h' # 'h' for hourly generate_datetime_series(start_time, end_time, frequency) array(['2010-06-01T06:00:00', '2010-06-01T07:00:00', '2010-06-01T08:00:00'], dtype='datetime64[s]')

Source code in rekx/timestamp.py
def generate_datetime_series(
    start_time: Optional[str] = None,
    end_time: Optional[str] = None,
    frequency: Optional[str] = TIMESTAMPS_FREQUENCY_DEFAULT,
):
    """
    Example
    -------
    >>> start_time = '2010-06-01 06:00:00'
    >>> end_time = '2010-06-01 08:00:00'
    >>> frequency = 'h'  # 'h' for hourly
    >>> generate_datetime_series(start_time, end_time, frequency)
    array(['2010-06-01T06:00:00', '2010-06-01T07:00:00', '2010-06-01T08:00:00'],
          dtype='datetime64[s]')
    """
    start = np.datetime64(start_time)
    end = np.datetime64(end_time)
    freq = np.timedelta64(1, frequency)
    timestamps = np.arange(start, end + freq, freq)  # +freq to include the end time

    from pandas import DatetimeIndex

    timestamps = DatetimeIndex(timestamps.astype("datetime64[ns]"))
    return timestamps.astype("datetime64[ns]")