"""Implement Snowfall class.
This module contains the Snowfall class used to run repeated (!)
simulations of water nucleation in vials. It makes use of class
Snowflake for the individual simulations.
"""
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import seaborn as sns
import multiprocessing as mp
from typing import List, Tuple, Union, Sequence, Optional
from ethz_snow.snowflake import Snowflake
from .__init__ import __citation__, __bibtex__
[docs]class Snowfall:
"""A class to handle multiple Stochastic Nucleation of Water simulation.
More information regarding the equations and their derivation can be found in
"Stochastic shelf-scale modeling framework for the freezing stage in
freeze-drying processes", Deck, Ochsenbein, and Mazzotti (2022),
Int J Pharm, 613, 121276, https://doi.org/10.1016/j.ijpharm.2021.121276
as well as in the Snowflake class documentation.
Parameters:
Nrep (int): Number of repetitions.
pool_size (int): Size of worker pool for parallelization.
stats (dict): Statistics for each simulation.
stats_df (pd.DataFrame): Long-form table of all statistics.
simulationStatus (int): Status of simulation (0 = not run, 1 = run).
"""
def __init__(self, Nrep: int = 5, pool_size: int = None, **kwargs):
"""Construct a Snowfall object.
Args:
Nrep (int, optional): Number of repetitions. Defaults to 5.
pool_size (int, optional): Size of worker pool for parallelization.
Defaults to None (= # of cpu cores).
"""
self.pool_size = pool_size
self.Nrep = int(Nrep)
if "seed" in kwargs.keys():
# seed will be chosen by Snowfall
del kwargs["seed"]
if ("storeStates" in kwargs.keys()) and (kwargs["storeStates"] is not None):
print("WARNING: States cannot be stored for Snowfall simulations.")
del kwargs["storeStates"]
Sf_template = Snowflake(**kwargs)
Sf_template._buildHeatflowMatrices() # pre-build H_int, H_ext, H_shelf
self.Sf_template = Sf_template
self.stats = dict()
self.stats_df = pd.DataFrame()
@property
def simulationStatus(self) -> int:
"""Return simulation status of instance.
Returns:
int: Simulation status. 0 = not run, 1 = run.
"""
if self.stats:
return 1
else:
return 0
@classmethod
def _uniqueFlake(cls, S, seed):
"""Run a single Snowflake with a specific seed."""
S.seed = seed
S.run()
return S.stats
@classmethod
def _uniqueFlake_sync(cls, S, seed, return_dict):
"""Run a single Snowflake with a specific seed.
Run a single Snowflake with a specific seed
and write it back into multiprocessing.Manager object for data sharing.
"""
S.seed = seed
S.run()
return_dict[seed] = S.stats
[docs] def run(self, how="async"):
"""Run all the Snowflake simulations.
Args:
how (str, optional): How to perform runs. Valid options are
'async', 'sync', and 'sequential' (no parallelization).
Defaults to "async".
"""
# clean up old simulation
self.stats = dict()
self.stats_df = pd.DataFrame()
if how == "async":
with mp.Pool(self.pool_size) as p:
# starmap is only available since python 3.3
# it allows passing multiple arguments
res = p.starmap_async(
Snowfall._uniqueFlake,
[(self.Sf_template, i) for i in range(self.Nrep)],
).get()
self.stats = {i: r for i, r in enumerate(res)}
elif how == "sync":
manager = mp.Manager()
return_dict = manager.dict()
with mp.Pool(self.pool_size) as p:
# starmap is only available since python 3.3
# it allows passing multiple arguments
p.starmap(
Snowfall._uniqueFlake_sync,
[(self.Sf_template, i, return_dict) for i in range(self.Nrep)],
)
self.stats = dict(return_dict)
elif how == "sequential":
for i in range(self.Nrep):
self.stats[i] = self._uniqueFlake(self.Sf_template, i)
[docs] def nucleationTimes(
self,
group: Union[str, Sequence[str]] = "all",
seed: Union[int, Sequence[int], None] = None,
) -> np.ndarray:
"""Return nucleation times.
Args:
group (Union[str, Sequence[str]], optional): Subgroup to return.
Defaults to "all".
seed (Union[int, Sequence[int], None], optional): Seed(s) to return.
Defaults to None.
Returns:
np.ndarray: The nucleation times.
"""
return self._returnStats(what="tnuc", group=group, seed=seed)
[docs] def nucleationTemperatures(
self,
group: Union[str, Sequence[str]] = "all",
seed: Union[int, Sequence[int], None] = None,
) -> np.ndarray:
"""Return nucleation temperatures.
Args:
group (Union[str, Sequence[str]], optional): Subgroup to return.
Defaults to "all".
seed (Union[int, Sequence[int], None], optional): Seed(s) to return.
Defaults to None.
Returns:
np.ndarray: The nucleation temperatures.
"""
return self._returnStats(what="Tnuc", group=group, seed=seed)
[docs] def solidificationTimes(
self,
group: Union[str, Sequence[str]] = "all",
seed: Union[int, Sequence[int], None] = None,
) -> np.ndarray:
"""Return solidification times.
Args:
group (Union[str, Sequence[str]], optional): Subgroup to return.
Defaults to "all".
seed (Union[int, Sequence[int], None], optional): Seed(s) to return.
Defaults to None.
Returns:
np.ndarray: The solidification times.
"""
return self._returnStats(what="tsol", group=group, seed=seed)
def _returnStats(
self,
what: str,
group: str = "all",
seed: Union[int, Sequence[int], None] = None,
):
"""Return arbitarary statistic.
Helper function to compute arbitrary statistic for
nucleationTimes, solidificationTimes, nucleationTemperatures.
Args:
what (str): What to return.
group (Union[str, Sequence[str]], optional): Subgroup to return.
Defaults to "all".
seed (Union[int, Sequence[int], None], optional): Seed(s) to return.
Defaults to None.
Returns:
np.ndarray: The statistic in question.
"""
df = self.to_frame()
if group != "all":
if not isinstance(group, (tuple, list)):
group = [group]
df = df[df.group.isin(group)]
if seed is not None:
if not isinstance(seed, (tuple, list)):
seed = [seed]
df = df[df.seed.isin(seed)]
if what == "tnuc":
df = df[df.variable == "t_nucleation"]
elif what == "Tnuc":
df = df[df.variable == "T_nucleation"]
elif what == "tsol":
df = df[df.variable == "t_solidification"]
return df["value"].to_numpy()
[docs] def plot(
self,
what: str = "t_nucleation",
kind: str = "box",
seed: Union[int, Sequence[int], None] = None,
group: Union[str, Sequence[str]] = "all",
):
"""Create plots for Snowfall object.
Args:
kind (str, optional): Any sns.catplot 'kind' input is allowed.
Defaults to "box".
what (str, optional): What to plot, i.e., keys of the stats dict.
Valid options are t_nucleation, T_nucleation, t_solidification.
Defaults to "t_nucleation".
seed (Union[int, Sequence[int], None], optional): Seed(s) to return.
Defaults to None.
group (Union[str, Sequence[str]], optional): Subgroup to return.
Defaults to "all".
"""
df = self.to_frame()
if group != "all":
if not isinstance(group, (tuple, list)):
group = [group]
df = df[df.group.isin(group)]
if seed is not None:
if not isinstance(seed, (tuple, list)):
seed = [seed]
df = df[df.seed.isin(seed)]
df = df[df.variable.str.contains(what)]
sns.catplot(data=df, hue="group", y="value", kind=kind, x="variable")
[docs] def to_frame(self) -> pd.DataFrame:
"""Save statistics as pandas dataframe.
Raises:
ValueError: Simulation needs to run first.
Returns:
pd.DataFrame: The statistics in long form.
"""
if self.simulationStatus == 0:
raise ValueError("Simulation needs to be run first.")
elif self.stats_df.empty:
stats_df = pd.DataFrame(
columns=["group", "vial", "variable", "value", "seed"]
)
self.Sf_template._simulationStatus = 1
# call Snowflake function for the first Snowflake
# to get the right layout
N_tot = self.Sf_template.N_vials_total
self.Sf_template.stats = self.stats[0]
# repeat the first df because vial and group IDs should not change
stats_df = pd.concat([self.Sf_template.to_frame()[0]] * self.Nrep)
# get the colum containing variable and value
varCol, valCol = (
stats_df.columns.get_loc("variable"),
stats_df.columns.get_loc("value"),
)
for i in range(1, self.Nrep):
stats_df.iloc[
i * N_tot * 3 : (i + 1) * N_tot * 3, [varCol, valCol]
] = pd.DataFrame(self.stats[i]).melt()
# this assumes that the first stats_df 'melted' in the same way
# we will add a test to ensure this assumption is not violated
stats_df["seed"] = np.repeat(np.arange(0, self.Nrep), N_tot * 3)
# clean up template
self.Sf_template.stats = dict()
self.Sf_template._simulationStatus = 0
self.stats_df = stats_df
elif not self.stats_df.empty:
# do not recalculate, but load from memory
stats_df = self.stats_df
return stats_df
def __repr__(self) -> str:
"""Return string representation of the Snowfall class.
Returns:
str: The Snowfall class string representation giving some basic info.
"""
return (
f"Snowfall([{self.Nrep} Snowflake{'s' if self.Nrep > 1 else ''}, "
+ f"pool_size: {self.pool_size}])"
)