inspect
¶
Functions:
Name | Description |
---|---|
inspect_netcdf_data |
Collect the metadata of a single or multiple NetCDF files. |
inspect_netcdf_data
¶
inspect_netcdf_data(
input_path: Path = ".",
pattern: str = "*.nc",
variable: str = None,
variable_set: XarrayVariableSet = all,
long_table: Optional[bool] = True,
group_metadata: Optional[bool] = False,
longitude: float = 8,
latitude: float = 45,
repetitions: int = REPETITIONS_DEFAULT,
humanize: bool = False,
csv: Path = None,
verbose: int = VERBOSE_LEVEL_DEFAULT,
) -> None
Collect the metadata of a single or multiple NetCDF files.
Scan the source_directory
for files that match the given pattern
,
and collect their metadata, including : file name, file size, dimensions,
shape, chunks, cache, type, scale, offset, compression, shuffling and
lastly measure the time required to retrieve and load data variables (only)
in memory.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
input_path
|
Path
|
A singe path or a list of paths to the input NetCDF data |
'.'
|
variable
|
str
|
Name of the variable to query |
None
|
variable_set
|
XarrayVariableSet
|
Name of the set of variables to query. See also docstring of XarrayVariableSet. |
all
|
longitude
|
float
|
The longitude of the location to read data |
8
|
latitude
|
float
|
The latitude of the location to read data |
45
|
group_metadata
|
Optional[bool]
|
Visually group metadata records per input file by using empty lines in-between |
False
|
repetitions
|
int
|
Number of repetitions for read operation |
REPETITIONS_DEFAULT
|
humanize
|
bool
|
Humanize measured quantities of bytes |
False
|
csv
|
Path
|
Output file name for comma-separated values |
None
|
verbose
|
int
|
Verbosity level |
VERBOSE_LEVEL_DEFAULT
|
Returns:
Type | Description |
---|---|
None
|
This function does not return anything. It either prints out the results in the terminal or writes then in a CSV file if requested. |
Source code in rekx/inspect.py
def inspect_netcdf_data(
input_path: Annotated[Path, typer_argument_source_path] = ".",
pattern: Annotated[str, typer_option_filename_pattern] = "*.nc",
variable: str = None,
variable_set: Annotated[
XarrayVariableSet, typer.Option(help="Set of Xarray variables to diagnose")
] = XarrayVariableSet.all,
long_table: Annotated[
Optional[bool],
"Group rows of metadata per input NetCDF file and variable in a long table",
] = True,
group_metadata: Annotated[
Optional[bool],
"Visually cluster rows of metadata per input NetCDF file and variable",
] = False,
longitude: Annotated[float, typer_argument_longitude_in_degrees] = 8,
latitude: Annotated[float, typer_argument_latitude_in_degrees] = 45,
repetitions: Annotated[int, typer_option_repetitions] = REPETITIONS_DEFAULT,
humanize: Annotated[bool, typer_option_humanize] = False,
csv: Annotated[Path, typer_option_csv] = None,
verbose: Annotated[int, typer_option_verbose] = VERBOSE_LEVEL_DEFAULT,
) -> None:
"""Collect the metadata of a single or multiple NetCDF files.
Scan the `source_directory` for files that match the given `pattern`,
and collect their metadata, including : file name, file size, dimensions,
shape, chunks, cache, type, scale, offset, compression, shuffling and
lastly measure the time required to retrieve and load data variables (only)
in memory.
Parameters
----------
input_path: Path
A singe path or a list of paths to the input NetCDF data
variable: str
Name of the variable to query
variable_set: XarrayVariableSet
Name of the set of variables to query. See also docstring of
XarrayVariableSet.
longitude: float
The longitude of the location to read data
latitude: float
The latitude of the location to read data
group_metadata: bool
Visually group metadata records per input file by using empty lines
in-between
repetitions: int
Number of repetitions for read operation
humanize: bool
Humanize measured quantities of bytes
csv: Path
Output file name for comma-separated values
verbose: int
Verbosity level
Returns
-------
None
This function does not return anything. It either prints out the
results in the terminal or writes then in a CSV file if requested.
"""
if input_path.is_file():
metadata, _ = get_netcdf_metadata(
input_netcdf_path=input_path,
variable=variable,
variable_set=variable_set,
longitude=longitude,
latitude=latitude,
repetitions=repetitions,
humanize=humanize,
)
if not csv:
from .print import print_metadata_table
print_metadata_table(metadata)
if csv:
write_metadata_dictionary_to_csv(
dictionary=metadata,
output_filename=csv,
)
return
if input_path.is_dir():
source_directory = Path(input_path)
if not any(source_directory.iterdir()):
print(f"[red]The directory [code]{source_directory}[/code] is empty[/red].")
return
file_paths = list(source_directory.glob(pattern))
if not file_paths:
print(
f"No files matching the pattern [code]{pattern}[/code] found in [code]{source_directory}[/code]!"
)
return
mode = DisplayMode(verbose)
with display_context[mode]:
try:
metadata_series = get_multiple_netcdf_metadata(
file_paths=file_paths,
variable_set=variable_set,
longitude=longitude,
latitude=latitude,
repetitions=repetitions,
humanize=humanize,
)
except TypeError as e:
raise ValueError("Error occurred:", e)
if csv:
write_nested_dictionary_to_csv(
nested_dictionary=metadata_series,
output_filename=csv,
)
return
if not long_table:
from .print import print_metadata_series_table
print_metadata_series_table(
metadata_series=metadata_series,
group_metadata=group_metadata,
)
else:
from .print import print_metadata_series_long_table
print_metadata_series_long_table(
metadata_series=metadata_series,
group_metadata=group_metadata,
)