Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ repos:
- id: check-added-large-files
args: ["--maxkb=5000"]

- repo: https://github.com/ikamensh/flynt
rev: 1.0.1
hooks:
- id: flynt
args: ["--fail-on-change"]

- repo: https://github.com/psf/black
rev: 23.12.1
hooks:
Expand All @@ -27,12 +33,6 @@ repos:
hooks:
- id: isort

- repo: https://github.com/ikamensh/flynt
rev: 1.0.1
hooks:
- id: flynt
args: ["--fail-on-change"]

- repo: https://github.com/PyCQA/docformatter
rev: v1.7.5
hooks:
Expand Down
1 change: 1 addition & 0 deletions climateset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
DATA_DIR = PROJECT_ROOT / "data"
RAW_DATA = DATA_DIR / "raw"
PROCESSED_DATA = DATA_DIR / "processed"
FINAL_DATA = DATA_DIR / "final"
LOAD_DATA = DATA_DIR / "load"
META_DATA = DATA_DIR / "meta"
SCRIPT_DIR = PROJECT_ROOT / "scripts"
Expand Down
Empty file.
23 changes: 23 additions & 0 deletions climateset/processing/processor_step.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from abc import ABC, abstractmethod
from pathlib import Path


class AbstractProcessorStep(ABC):
def __init__(self):
self.results_directory = None

@abstractmethod
def execute(self, input_directory):
pass

def get_results_directory(self):
return self.results_directory


def process_steps(input_directory: Path, list_of_steps: list[AbstractProcessorStep]):
current_input_directory = input_directory
for step in list_of_steps:
step.execute(current_input_directory)
# current_input_directory = step.get_results_directory()

return current_input_directory
Empty file.
85 changes: 85 additions & 0 deletions climateset/processing/raw/abstract_raw_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Union

from climateset import CONFIGS
from climateset.processing.raw.checker import (
AbstractDirectoryChecker,
BasicDirectoryChecker,
)
from climateset.utils import create_logger, get_json_config

LOGGER = create_logger(__name__)


class AbstractRawProcessor(ABC):
"""Abstract class for raw processing."""

def __init__(
self,
input_directory: Union[str, Path],
working_directory: Union[str, Path],
processing_steps: list = None,
checker: AbstractDirectoryChecker = None,
processing_parameters_config: Union[str, Path] = CONFIGS / "processing" / "raw_processing_params.json",
):
"""Init
Args:

"""
# type of class
self.type_class = self.type_class_meta()
self.input_directory = input_directory
if isinstance(input_directory, str):
self.input_directory = Path(input_directory)
self.working_directory = working_directory
if isinstance(working_directory, str):
self.working_directory = Path(working_directory)
self.processing_steps = processing_steps
if not self.processing_steps:
self.processing_steps = []
self.available_steps = {}
self.processing_params_config: Union[str, Path] = processing_parameters_config
self.meta_raw_dict = get_json_config(self.processing_params_config)
self.calendar = self.meta_raw_dict["calendar"]
self.desired_units = self.meta_raw_dict["units"]

self.checker = checker
if not self.checker:
self.checker: AbstractDirectoryChecker = BasicDirectoryChecker(directory=self.input_directory)

@abstractmethod
def type_class_meta(self) -> str:
"""Returns the name tag of the subclass."""

@abstractmethod
def process_directory(self):
"""
Preprocessing a subdir - must be implemented for each subclass.

Returns:

"""

@abstractmethod
def file_belongs_to_type(self, input_file: Path) -> bool:
"""
Checks if a file belongs to input4mips or cmip6 category.

Args:
input_file (Path): the file that should be checked
Returns:
True if it belong to the class type, False if not
"""

def add_processing_step(self, steps: Union[str, list]):
if isinstance(steps, str):
steps = [steps]
for step in steps:
if step in self.available_steps:
self.processing_steps.append(step)

def list_available_steps(self):
step_list = [step for step in self.available_steps if not step.startswith("_")]
LOGGER.info(f"Available steps: {step_list}")
return step_list
Loading
Loading