Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge branch 'dev' into main #13

Merged
merged 5 commits into from
Aug 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<a href='https://github.com/WenjieDu/PyPOTS'><img src='https://raw.githubusercontent.com/WenjieDu/PyPOTS/main/docs/figs/PyPOTS%20logo.svg?sanitize=true' width='190' align='right' /></a>
<a href='https://github.com/WenjieDu/PyPOTS'><img src='https://raw.githubusercontent.com/WenjieDu/PyPOTS/main/docs/figs/PyPOTS%20logo.svg?sanitize=true' width='200' align='right' /></a>

# <p align='center'>Welcome to PyPOTS</p>
## <p align='center'>Welcome to PyPOTS</p>
**<p align='center'>A Python Toolbox for Data Mining on Partially-Observed Time Series</p>**

<p align='center'>
Expand Down
2 changes: 1 addition & 1 deletion pypots/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'

version = '0.0.7'
version = "0.0.7"
94 changes: 57 additions & 37 deletions pypots/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,25 @@


class BaseModel(ABC):
""" Base class for all models.
"""
"""Base class for all models."""

def __init__(self, device):
self.logger = {}
self.model = None

if device is None:
self.device = torch.device(
"cuda:0" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu"
"cuda:0"
if torch.cuda.is_available() and torch.cuda.device_count() > 0
else "cpu"
)
else:
self.device = device

def check_input(self, expected_n_steps, expected_n_features, X, y=None, out_dtype='tensor'):
""" Check value type and shape of input X and y
def check_input(
self, expected_n_steps, expected_n_features, X, y=None, out_dtype="tensor"
):
"""Check value type and shape of input X and y

Parameters
----------
Expand All @@ -54,15 +57,20 @@ def check_input(self, expected_n_steps, expected_n_features, X, y=None, out_dtyp

y : tensor
"""
assert out_dtype in ['tensor', 'ndarray'], f'out_dtype should be "tensor" or "ndarray", but got {out_dtype}'
assert out_dtype in [
"tensor",
"ndarray",
], f'out_dtype should be "tensor" or "ndarray", but got {out_dtype}'
is_list = isinstance(X, list)
is_array = isinstance(X, np.ndarray)
is_tensor = isinstance(X, torch.Tensor)
assert is_tensor or is_array or is_list, TypeError('X should be an instance of list/np.ndarray/torch.Tensor, '
f'but got {type(X)}')
assert is_tensor or is_array or is_list, TypeError(
"X should be an instance of list/np.ndarray/torch.Tensor, "
f"but got {type(X)}"
)

# convert the data type if in need
if out_dtype == 'tensor':
if out_dtype == "tensor":
if is_list:
X = torch.tensor(X).to(self.device)
elif is_array:
Expand All @@ -80,44 +88,57 @@ def check_input(self, expected_n_steps, expected_n_features, X, y=None, out_dtyp

# check the shape of X here
X_shape = X.shape
assert len(X_shape) == 3, f'input should have 3 dimensions [n_samples, seq_len, n_features],' \
f'but got shape={X.shape}'
assert X_shape[1] == expected_n_steps, f'expect X.shape[1] to be {expected_n_steps}, but got {X_shape[1]}'
assert X_shape[2] == expected_n_features, f'expect X.shape[2] to be {expected_n_features}, but got {X_shape[2]}'
assert len(X_shape) == 3, (
f"input should have 3 dimensions [n_samples, seq_len, n_features],"
f"but got shape={X.shape}"
)
assert (
X_shape[1] == expected_n_steps
), f"expect X.shape[1] to be {expected_n_steps}, but got {X_shape[1]}"
assert (
X_shape[2] == expected_n_features
), f"expect X.shape[2] to be {expected_n_features}, but got {X_shape[2]}"

if y is not None:
assert len(X) == len(y), f'lengths of X and y must match, ' \
f'but got f{len(X)} and {len(y)}'
assert len(X) == len(y), (
f"lengths of X and y must match, " f"but got f{len(X)} and {len(y)}"
)
if isinstance(y, torch.Tensor):
y = y.to(self.device) if out_dtype == 'tensor' else y.numpy()
y = y.to(self.device) if out_dtype == "tensor" else y.numpy()
elif isinstance(y, list):
y = torch.tensor(y).to(self.device) if out_dtype == 'tensor' else np.asarray(y)
y = (
torch.tensor(y).to(self.device)
if out_dtype == "tensor"
else np.asarray(y)
)
elif isinstance(y, np.ndarray):
y = torch.from_numpy(y).to(self.device) if out_dtype == 'tensor' else y
y = torch.from_numpy(y).to(self.device) if out_dtype == "tensor" else y
else:
raise TypeError('y should be an instance of list/np.ndarray/torch.Tensor, '
f'but got {type(y)}')
raise TypeError(
"y should be an instance of list/np.ndarray/torch.Tensor, "
f"but got {type(y)}"
)
return X, y
else:
return X

def save_logs_to_tensorboard(self, saving_path):
""" Save logs (self.logger) into a tensorboard file.
"""Save logs (self.logger) into a tensorboard file.

Parameters
----------
saving_path : str
Local disk path to save the tensorboard file.
"""
# TODO: find a solution for log saving
raise IOError('This function is not ready for users.')
raise IOError("This function is not ready for users.")
# tb_summary_writer = SummaryWriter(saving_path)
# tb_summary_writer.add_custom_scalars(self.logger)
# tb_summary_writer.close()
# print(f'Log saved successfully to {saving_path}.')

def save_model(self, saving_path):
""" Save the model to a disk file.
"""Save the model to a disk file.

Parameters
----------
Expand All @@ -128,10 +149,10 @@ def save_model(self, saving_path):
torch.save(self.model, saving_path)
except Exception as e:
print(e)
print(f'Saved successfully to {saving_path}.')
print(f"Saved successfully to {saving_path}.")

def load_model(self, model_path):
""" Load the saved model from a disk file.
"""Load the saved model from a disk file.

Parameters
----------
Expand All @@ -152,14 +173,15 @@ def load_model(self, model_path):
self.model = loaded_model.model
except Exception as e:
raise e
print(f'Model loaded successfully from {model_path}.')
print(f"Model loaded successfully from {model_path}.")


class BaseNNModel(BaseModel):
""" Abstract class for all neural-network models.
"""
"""Abstract class for all neural-network models."""

def __init__(self, learning_rate, epochs, patience, batch_size, weight_decay, device):
def __init__(
self, learning_rate, epochs, patience, batch_size, weight_decay, device
):
super().__init__(device)

# training hype-parameters
Expand All @@ -173,14 +195,12 @@ def __init__(self, learning_rate, epochs, patience, batch_size, weight_decay, de
self.model = None
self.optimizer = None
self.best_model_dict = None
self.best_loss = float('inf')
self.logger = {
'training_loss': [],
'validating_loss': []
}
self.best_loss = float("inf")
self.logger = {"training_loss": [], "validating_loss": []}

def _print_model_size(self):
""" Print the number of trainable parameters in the initialized NN model.
"""
"""Print the number of trainable parameters in the initialized NN model."""
num_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
print(f'Model initialized successfully. Number of the trainable parameters: {num_params}')
print(
f"Model initialized successfully. Number of the trainable parameters: {num_params}"
)
7 changes: 3 additions & 4 deletions pypots/classification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from pypots.classification.raindrop import Raindrop

__all__ = [
'BRITS',
'GRUD',
'Raindrop',

"BRITS",
"GRUD",
"Raindrop",
]
75 changes: 47 additions & 28 deletions pypots/classification/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,14 @@


class BaseClassifier(BaseModel):
""" Abstract class for all classification models.
"""
"""Abstract class for all classification models."""

def __init__(self, device):
super().__init__(device)

@abstractmethod
def fit(self, train_X, train_y, val_X=None, val_y=None):
""" Train the classifier.
"""Train the classifier.

Parameters
----------
Expand All @@ -45,7 +44,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None):

@abstractmethod
def classify(self, X):
""" Classify the input with the trained model.
"""Classify the input with the trained model.

Parameters
----------
Expand All @@ -61,22 +60,32 @@ def classify(self, X):


class BaseNNClassifier(BaseNNModel, BaseClassifier):
def __init__(self, n_classes, learning_rate, epochs, patience, batch_size, weight_decay,
device):
super().__init__(learning_rate, epochs, patience, batch_size, weight_decay, device)
def __init__(
self,
n_classes,
learning_rate,
epochs,
patience,
batch_size,
weight_decay,
device,
):
super().__init__(
learning_rate, epochs, patience, batch_size, weight_decay, device
)
self.n_classes = n_classes

@abstractmethod
def assemble_input_data(self, data):
pass

def _train_model(self, training_loader, val_loader=None):
self.optimizer = torch.optim.Adam(self.model.parameters(),
lr=self.lr,
weight_decay=self.weight_decay)
self.optimizer = torch.optim.Adam(
self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay
)

# each training starts from the very beginning, so reset the loss and model dict here
self.best_loss = float('inf')
self.best_loss = float("inf")
self.best_model_dict = None

try:
Expand All @@ -87,12 +96,14 @@ def _train_model(self, training_loader, val_loader=None):
inputs = self.assemble_input_data(data)
self.optimizer.zero_grad()
results = self.model.forward(inputs)
results['loss'].backward()
results["loss"].backward()
self.optimizer.step()
epoch_train_loss_collector.append(results['loss'].item())
epoch_train_loss_collector.append(results["loss"].item())

mean_train_loss = np.mean(epoch_train_loss_collector) # mean training loss of the current epoch
self.logger['training_loss'].append(mean_train_loss)
mean_train_loss = np.mean(
epoch_train_loss_collector
) # mean training loss of the current epoch
self.logger["training_loss"].append(mean_train_loss)

if val_loader is not None:
self.model.eval()
Expand All @@ -101,14 +112,16 @@ def _train_model(self, training_loader, val_loader=None):
for idx, data in enumerate(val_loader):
inputs = self.assemble_input_data(data)
results = self.model.forward(inputs)
epoch_val_loss_collector.append(results['loss'].item())
epoch_val_loss_collector.append(results["loss"].item())

mean_val_loss = np.mean(epoch_val_loss_collector)
self.logger['validating_loss'].append(mean_val_loss)
print(f'epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}')
self.logger["validating_loss"].append(mean_val_loss)
print(
f"epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}"
)
mean_loss = mean_val_loss
else:
print(f'epoch {epoch}: training loss {mean_train_loss:.4f}')
print(f"epoch {epoch}: training loss {mean_train_loss:.4f}")
mean_loss = mean_train_loss

if mean_loss < self.best_loss:
Expand All @@ -118,18 +131,24 @@ def _train_model(self, training_loader, val_loader=None):
else:
self.patience -= 1
if self.patience == 0:
print('Exceeded the training patience. Terminating the training procedure...')
print(
"Exceeded the training patience. Terminating the training procedure..."
)
break
except Exception as e:
print(f'Exception: {e}')
print(f"Exception: {e}")
if self.best_model_dict is None:
raise RuntimeError('Training got interrupted. Model was not get trained. Please try fit() again.')
raise RuntimeError(
"Training got interrupted. Model was not get trained. Please try fit() again."
)
else:
RuntimeWarning('Training got interrupted. '
'Model will load the best parameters so far for testing. '
"If you don't want it, please try fit() again.")
RuntimeWarning(
"Training got interrupted. "
"Model will load the best parameters so far for testing. "
"If you don't want it, please try fit() again."
)

if np.equal(self.best_loss, float('inf')):
raise ValueError('Something is wrong. best_loss is Nan after training.')
if np.equal(self.best_loss, float("inf")):
raise ValueError("Something is wrong. best_loss is Nan after training.")

print('Finished training.')
print("Finished training.")
Loading