Module src.utils
Utilities for the project.
Expand source code
"""Utilities for the project."""
from pathlib import Path
import pandas as pd
import yaml
def load_config(stage: str) -> dict:
"""Load the configuration file.
Parameters
----------
stage : str
Stage of the pipeline.
Returns
-------
config : Dict
Configuration dictionary.
"""
return yaml.safe_load(open("params.yaml"))[stage]
def get_raw_data_path(config: dict) -> str:
"""Get the path to the raw data.
Parameters
----------
config : Dict
Configuration dictionary.
Returns
-------
raw_data_path : pathlib.Path
Path to the raw data.
"""
return Path(config["raw"], config["dataset_name"])
def get_processed_data_path(config: dict):
"""Get the path to the processed data.
Parameters
----------
config : Dict
Configuration dictionary.
Returns
-------
processed_data_path : pathlib.Path
Path to the processed data.
"""
return Path(config["processed"], config["dataset_name"])
def get_model_path(config: dict):
"""Get the path to the model.
Parameters
----------
config : Dict
Configuration dictionary.
Returns
-------
model_path : pathlib.Path
Path to the model.
"""
dir = Path(config["models"])
dir.mkdir(exist_ok=True)
return Path(dir, config["model_name"])
def load_data(raw: bool, config: dict, **kwargs):
"""Load the wine quality dataset.
Parameters
----------
raw : bool
Whether to load the raw or processed data.
config: dict
Configuration dictionary.
**kwargs
Additional Keyword arguments to pass to `pd.read_csv`.
Returns
-------
data : pd.DataFrame
Dataframe containing the data.
"""
if raw:
path = get_raw_data_path(config)
else:
path = get_processed_data_path(config)
return pd.read_csv(path, **kwargs)
Functions
def get_model_path(config: dict)-
Get the path to the model.
Parameters
config:Dict- Configuration dictionary.
Returns
model_path:pathlib.Path- Path to the model.
Expand source code
def get_model_path(config: dict): """Get the path to the model. Parameters ---------- config : Dict Configuration dictionary. Returns ------- model_path : pathlib.Path Path to the model. """ dir = Path(config["models"]) dir.mkdir(exist_ok=True) return Path(dir, config["model_name"]) def get_processed_data_path(config: dict)-
Get the path to the processed data.
Parameters
config:Dict- Configuration dictionary.
Returns
processed_data_path:pathlib.Path- Path to the processed data.
Expand source code
def get_processed_data_path(config: dict): """Get the path to the processed data. Parameters ---------- config : Dict Configuration dictionary. Returns ------- processed_data_path : pathlib.Path Path to the processed data. """ return Path(config["processed"], config["dataset_name"]) def get_raw_data_path(config: dict) ‑> str-
Get the path to the raw data.
Parameters
config:Dict- Configuration dictionary.
Returns
raw_data_path:pathlib.Path- Path to the raw data.
Expand source code
def get_raw_data_path(config: dict) -> str: """Get the path to the raw data. Parameters ---------- config : Dict Configuration dictionary. Returns ------- raw_data_path : pathlib.Path Path to the raw data. """ return Path(config["raw"], config["dataset_name"]) def load_config(stage: str) ‑> dict-
Load the configuration file.
Parameters
stage:str- Stage of the pipeline.
Returns
config:Dict- Configuration dictionary.
Expand source code
def load_config(stage: str) -> dict: """Load the configuration file. Parameters ---------- stage : str Stage of the pipeline. Returns ------- config : Dict Configuration dictionary. """ return yaml.safe_load(open("params.yaml"))[stage] def load_data(raw: bool, config: dict, **kwargs)-
Load the wine quality dataset.
Parameters
raw:bool- Whether to load the raw or processed data.
config:dict- Configuration dictionary.
**kwargs- Additional Keyword arguments to pass to
pd.read_csv.
Returns
data:pd.DataFrame- Dataframe containing the data.
Expand source code
def load_data(raw: bool, config: dict, **kwargs): """Load the wine quality dataset. Parameters ---------- raw : bool Whether to load the raw or processed data. config: dict Configuration dictionary. **kwargs Additional Keyword arguments to pass to `pd.read_csv`. Returns ------- data : pd.DataFrame Dataframe containing the data. """ if raw: path = get_raw_data_path(config) else: path = get_processed_data_path(config) return pd.read_csv(path, **kwargs)