From 137b35dbd3fd9f247a08de165d494b519205df41 Mon Sep 17 00:00:00 2001 From: Maciej Skrabski Date: Tue, 2 Aug 2022 14:44:53 +0200 Subject: [PATCH 1/4] fix: brits imputation test device mismatch --- pypots/imputation/base.py | 60 +++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/pypots/imputation/base.py b/pypots/imputation/base.py index c3aee779..a7c2a626 100644 --- a/pypots/imputation/base.py +++ b/pypots/imputation/base.py @@ -70,13 +70,15 @@ def __init__(self, learning_rate, epochs, patience, batch_size, weight_decay, de def assemble_input_data(self, data): pass - def _train_model(self, training_loader, val_loader=None, val_X_intact=None, val_indicating_mask=None): - self.optimizer = torch.optim.Adam(self.model.parameters(), - lr=self.lr, - weight_decay=self.weight_decay) + def _train_model( + self, training_loader, val_loader=None, val_X_intact=None, val_indicating_mask=None + ): + self.optimizer = torch.optim.Adam( + self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay + ) # each training starts from the very beginning, so reset the loss and model dict here - self.best_loss = float('inf') + self.best_loss = float("inf") self.best_model_dict = None try: @@ -87,12 +89,14 @@ def _train_model(self, training_loader, val_loader=None, val_X_intact=None, val_ inputs = self.assemble_input_data(data) self.optimizer.zero_grad() results = self.model.forward(inputs) - results['loss'].backward() + results["loss"].backward() self.optimizer.step() - epoch_train_loss_collector.append(results['loss'].item()) + epoch_train_loss_collector.append(results["loss"].item()) - mean_train_loss = np.mean(epoch_train_loss_collector) # mean training loss of the current epoch - self.logger['training_loss'].append(mean_train_loss) + mean_train_loss = np.mean( + epoch_train_loss_collector + ) # mean training loss of the current epoch + self.logger["training_loss"].append(mean_train_loss) if val_loader is not None: self.model.eval() @@ -101,17 +105,21 @@ def _train_model(self, training_loader, val_loader=None, val_X_intact=None, val_ for idx, data in enumerate(val_loader): inputs = self.assemble_input_data(data) results = self.model.forward(inputs) - imputation_collector.append(results['imputed_data']) + imputation_collector.append(results["imputed_data"]) imputation_collector = torch.cat(imputation_collector) imputation_collector = imputation_collector - mean_val_loss = cal_mae(imputation_collector, val_X_intact, val_indicating_mask) - self.logger['validating_loss'].append(mean_val_loss) - print(f'epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}') + mean_val_loss = cal_mae( + imputation_collector, val_X_intact, val_indicating_mask + ) + self.logger["validating_loss"].append(mean_val_loss) + print( + f"epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}" + ) mean_loss = mean_val_loss else: - print(f'epoch {epoch}: training loss {mean_train_loss:.4f}') + print(f"epoch {epoch}: training loss {mean_train_loss:.4f}") mean_loss = mean_train_loss if mean_loss < self.best_loss: @@ -121,25 +129,29 @@ def _train_model(self, training_loader, val_loader=None, val_X_intact=None, val_ else: self.patience -= 1 - if os.getenv('enable_nni', False): + if os.getenv("enable_nni", False): nni.report_intermediate_result(mean_loss) if epoch == self.epochs - 1 or self.patience == 0: nni.report_final_result(self.best_loss) if self.patience == 0: - print('Exceeded the training patience. Terminating the training procedure...') + print("Exceeded the training patience. Terminating the training procedure...") break except Exception as e: - print(f'Exception: {e}') + print(f"Exception: {e}") if self.best_model_dict is None: - raise RuntimeError('Training got interrupted. Model was not get trained. Please try fit() again.') + raise RuntimeError( + "Training got interrupted. Model was not get trained. Please try fit() again." + ) else: - RuntimeWarning('Training got interrupted. ' - 'Model will load the best parameters so far for testing. ' - "If you don't want it, please try fit() again.") + RuntimeWarning( + "Training got interrupted. " + "Model will load the best parameters so far for testing. " + "If you don't want it, please try fit() again." + ) - if np.equal(self.best_loss, float('inf')): - raise ValueError('Something is wrong. best_loss is Nan after training.') + if np.equal(self.best_loss.item(), float("inf")): + raise ValueError("Something is wrong. best_loss is Nan after training.") - print('Finished training.') + print("Finished training.") From 97ae394df543a011d9875186535a5b4742ad974c Mon Sep 17 00:00:00 2001 From: Maciej Skrabski Date: Tue, 2 Aug 2022 14:59:44 +0200 Subject: [PATCH 2/4] fix: locf mask and arange device mismatch --- pypots/imputation/locf.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pypots/imputation/locf.py b/pypots/imputation/locf.py index fbc1073b..322e91af 100644 --- a/pypots/imputation/locf.py +++ b/pypots/imputation/locf.py @@ -23,13 +23,13 @@ class LOCF(BaseImputer): """ def __init__(self, nan=0): - super().__init__('cpu') + super().__init__("cpu") self.nan = nan def fit(self, train_X, val_X=None): warnings.warn( - 'LOCF (Last Observed Carried Forward) imputation class has no parameter to train. ' - 'Please run func impute(X) directly.' + "LOCF (Last Observed Carried Forward) imputation class has no parameter to train. " + "Please run func impute(X) directly." ) def locf_numpy(self, X): @@ -86,7 +86,7 @@ def locf_torch(self, X): trans_X = X.permute((0, 2, 1)) mask = torch.isnan(trans_X) n_samples, n_steps, n_features = mask.shape - idx = torch.where(~mask, torch.arange(n_features), 0) + idx = torch.where(~mask, torch.arange(n_features, device=mask.device), 0) idx = torch.cummax(idx, dim=2) collector = [] @@ -116,8 +116,10 @@ def impute(self, X): array-like, Imputed time series. """ - assert len(X.shape) == 3, f'Input X should have 3 dimensions [n_samples, n_steps, n_features], ' \ - f'but the actual shape of X: {X.shape}' + assert len(X.shape) == 3, ( + f"Input X should have 3 dimensions [n_samples, n_steps, n_features], " + f"but the actual shape of X: {X.shape}" + ) if isinstance(X, list): X = np.asarray(X) @@ -126,6 +128,7 @@ def impute(self, X): elif isinstance(X, torch.Tensor): X_imputed = self.locf_torch(X).detach().cpu().numpy() else: - raise TypeError('X must be type of list/np.ndarray/torch.Tensor, ' - f'but got {type(X)}') + raise TypeError( + "X must be type of list/np.ndarray/torch.Tensor, " f"but got {type(X)}" + ) return X_imputed From 37ebcd6ac2cbb25b59c086255ba53277240cc4b0 Mon Sep 17 00:00:00 2001 From: Maciej Skrabski Date: Tue, 2 Aug 2022 15:26:00 +0200 Subject: [PATCH 3/4] fix: raindrop devices mismatch --- pypots/classification/raindrop.py | 295 +++++++++++++++++++----------- 1 file changed, 193 insertions(+), 102 deletions(-) diff --git a/pypots/classification/raindrop.py b/pypots/classification/raindrop.py index 29fdc9e4..3424a822 100644 --- a/pypots/classification/raindrop.py +++ b/pypots/classification/raindrop.py @@ -43,7 +43,9 @@ class PositionalEncodingTF(nn.Module): def __init__(self, d_pe, max_len=500): super().__init__() - assert d_pe % 2 == 0, 'd_pe should be even, otherwise the output dims will be not equal to d_pe' + assert ( + d_pe % 2 == 0 + ), "d_pe should be even, otherwise the output dims will be not equal to d_pe" self.max_len = max_len self._num_timescales = d_pe // 2 @@ -64,7 +66,9 @@ def forward(self, time_vectors): times = time_vectors.unsqueeze(2) scaled_time = times / torch.Tensor(timescales[None, None, :]) - pe = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], axis=-1) # T x B x d_model + pe = torch.cat( + [torch.sin(scaled_time), torch.cos(scaled_time)], axis=-1 + ) # T x B x d_model pe = pe.type(torch.FloatTensor) return pe @@ -72,11 +76,22 @@ def forward(self, time_vectors): class ObservationPropagation(MessagePassing): _alpha: OptTensor - def __init__(self, in_channels: Union[int, Tuple[int, int]], out_channels: int, - n_nodes: int, ob_dim: int, heads: int = 1, concat: bool = True, - beta: bool = False, dropout: float = 0., edge_dim: Optional[int] = None, - bias: bool = True, root_weight: bool = True, **kwargs): - kwargs.setdefault('aggr', 'add') + def __init__( + self, + in_channels: Union[int, Tuple[int, int]], + out_channels: int, + n_nodes: int, + ob_dim: int, + heads: int = 1, + concat: bool = True, + beta: bool = False, + dropout: float = 0.0, + edge_dim: Optional[int] = None, + bias: bool = True, + root_weight: bool = True, + **kwargs + ): + kwargs.setdefault("aggr", "add") super().__init__(node_dim=0, **kwargs) self.in_channels = in_channels @@ -97,21 +112,20 @@ def __init__(self, in_channels: Union[int, Tuple[int, int]], out_channels: int, if edge_dim is not None: self.lin_edge = Linear(edge_dim, heads * out_channels, bias=False) else: - self.lin_edge = self.register_parameter('lin_edge', None) + self.lin_edge = self.register_parameter("lin_edge", None) if concat: - self.lin_skip = Linear(in_channels[1], heads * out_channels, - bias=bias) + self.lin_skip = Linear(in_channels[1], heads * out_channels, bias=bias) if self.beta: self.lin_beta = Linear(3 * heads * out_channels, 1, bias=False) else: - self.lin_beta = self.register_parameter('lin_beta', None) + self.lin_beta = self.register_parameter("lin_beta", None) else: self.lin_skip = Linear(in_channels[1], out_channels, bias=bias) if self.beta: self.lin_beta = Linear(3 * out_channels, 1, bias=False) else: - self.lin_beta = self.register_parameter('lin_beta', None) + self.lin_beta = self.register_parameter("lin_beta", None) self.weight = Parameter(torch.Tensor(in_channels[1], heads * out_channels)) self.bias = Parameter(torch.Tensor(heads * out_channels)) @@ -145,8 +159,16 @@ def reset_parameters(self): glorot(self.map_weights) self.increase_dim.reset_parameters() - def forward(self, x: Union[Tensor, PairTensor], p_t: Tensor, edge_index: Adj, edge_weights=None, use_beta=False, - edge_attr: OptTensor = None, return_attention_weights=None): + def forward( + self, + x: Union[Tensor, PairTensor], + p_t: Tensor, + edge_index: Adj, + edge_weights=None, + use_beta=False, + edge_attr: OptTensor = None, + return_attention_weights=None, + ): r""" Args: @@ -165,7 +187,9 @@ def forward(self, x: Union[Tensor, PairTensor], p_t: Tensor, edge_index: Adj, ed if isinstance(x, Tensor): x: PairTensor = (x, x) - out = self.propagate(edge_index, x=x, edge_weights=edge_weights, edge_attr=edge_attr, size=None) + out = self.propagate( + edge_index, x=x, edge_weights=edge_weights, edge_attr=edge_attr, size=None + ) alpha = self._alpha self._alpha = None @@ -181,13 +205,20 @@ def forward(self, x: Union[Tensor, PairTensor], p_t: Tensor, edge_index: Adj, ed if isinstance(edge_index, Tensor): return out, (edge_index, alpha) elif isinstance(edge_index, SparseTensor): - return out, edge_index.set_value(alpha, layout='coo') + return out, edge_index.set_value(alpha, layout="coo") else: return out - def message_selfattention(self, x_i: Tensor, x_j: Tensor, edge_weights: Tensor, edge_attr: OptTensor, - index: Tensor, ptr: OptTensor, - size_i: Optional[int]) -> Tensor: + def message_selfattention( + self, + x_i: Tensor, + x_j: Tensor, + edge_weights: Tensor, + edge_attr: OptTensor, + index: Tensor, + ptr: OptTensor, + size_i: Optional[int], + ) -> Tensor: query = self.lin_query(x_i).view(-1, self.heads, self.out_channels) key = self.lin_key(x_j).view(-1, self.heads, self.out_channels) @@ -208,9 +239,16 @@ def message_selfattention(self, x_i: Tensor, x_j: Tensor, edge_weights: Tensor, out *= alpha.view(-1, self.heads, 1) return out - def message(self, x_i: Tensor, x_j: Tensor, edge_weights: Tensor, edge_attr: OptTensor, - index: Tensor, ptr: OptTensor, - size_i: Optional[int]) -> Tensor: + def message( + self, + x_i: Tensor, + x_j: Tensor, + edge_weights: Tensor, + edge_attr: OptTensor, + index: Tensor, + ptr: OptTensor, + size_i: Optional[int], + ) -> Tensor: use_beta = self.use_beta if use_beta: n_step = self.p_t.shape[0] @@ -221,7 +259,7 @@ def message(self, x_i: Tensor, x_j: Tensor, edge_weights: Tensor, edge_attr: Opt p_emb = self.p_t.unsqueeze(0) - aa = torch.cat([w_v.repeat(1, n_step, 1, ), p_emb.repeat(n_edges, 1, 1)], dim=-1) + aa = torch.cat([w_v.repeat(1, n_step, 1,), p_emb.repeat(n_edges, 1, 1)], dim=-1) beta = torch.mean(h_W * aa, dim=-1) if edge_weights is not None: @@ -264,9 +302,13 @@ def message(self, x_i: Tensor, x_j: Tensor, edge_weights: Tensor, edge_attr: Opt out = out * gamma.view(-1, self.heads, 1) return out - def aggregate(self, inputs: Tensor, index: Tensor, - ptr: Optional[Tensor] = None, - dim_size: Optional[int] = None) -> Tensor: + def aggregate( + self, + inputs: Tensor, + index: Tensor, + ptr: Optional[Tensor] = None, + dim_size: Optional[int] = None, + ) -> Tensor: r"""Aggregates messages from neighbors as :math:`\square_{j \in \mathcal{N}(i)}`. @@ -278,19 +320,31 @@ def aggregate(self, inputs: Tensor, index: Tensor, :meth:`__init__` by the :obj:`aggr` argument. """ index = self.index - return scatter(inputs, index, dim=self.node_dim, dim_size=dim_size, - reduce=self.aggr) + return scatter(inputs, index, dim=self.node_dim, dim_size=dim_size, reduce=self.aggr) def __repr__(self): - return '{}({}, {}, heads={})'.format(self.__class__.__name__, - self.in_channels, - self.out_channels, - self.heads) + return "{}({}, {}, heads={})".format( + self.__class__.__name__, self.in_channels, self.out_channels, self.heads + ) class _Raindrop(nn.Module): - def __init__(self, n_layers, n_features, d_model, d_inner, n_heads, n_classes, dropout=0.3, max_len=215, d_static=9, - aggregation='mean', sensor_wise_mask=False, static=False, device=None): + def __init__( + self, + n_layers, + n_features, + d_model, + d_inner, + n_heads, + n_classes, + dropout=0.3, + max_len=215, + d_static=9, + aggregation="mean", + sensor_wise_mask=False, + static=False, + device=None, + ): super().__init__() self.n_layers = n_layers self.n_features = n_features @@ -310,18 +364,20 @@ def __init__(self, n_layers, n_features, d_model, d_inner, n_heads, n_classes, d self.global_structure = torch.ones(n_features, n_features, device=self.device) if self.static: self.emb = nn.Linear(d_static, n_features) - assert d_model % n_features == 0, 'd_model must be divisible by n_features' + assert d_model % n_features == 0, "d_model must be divisible by n_features" self.d_ob = int(d_model / n_features) self.encoder = nn.Linear(n_features * self.d_ob, n_features * self.d_ob) d_pe = 16 self.pos_encoder = PositionalEncodingTF(d_pe, max_len) if self.sensor_wise_mask: dim_check = n_features * (self.d_ob + d_pe) - assert dim_check % n_heads == 0, 'dim_check must be divisible by n_heads' - encoder_layers = TransformerEncoderLayer(n_features * (self.d_ob + d_pe), n_heads, d_inner, dropout) + assert dim_check % n_heads == 0, "dim_check must be divisible by n_heads" + encoder_layers = TransformerEncoderLayer( + n_features * (self.d_ob + d_pe), n_heads, d_inner, dropout + ) else: dim_check = d_model + d_pe - assert dim_check % n_heads == 0, 'dim_check must be divisible by n_heads' + assert dim_check % n_heads == 0, "dim_check must be divisible by n_heads" encoder_layers = TransformerEncoderLayer(d_model + d_pe, n_heads, d_inner, dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, n_layers) @@ -329,20 +385,27 @@ def __init__(self, n_layers, n_features, d_model, d_inner, n_heads, n_classes, d self.R_u = Parameter(torch.Tensor(1, self.n_features * self.d_ob)) - self.ob_propagation = ObservationPropagation(in_channels=max_len * self.d_ob, out_channels=max_len * self.d_ob, - heads=1, n_nodes=n_features, ob_dim=self.d_ob) - self.ob_propagation_layer2 = ObservationPropagation(in_channels=max_len * self.d_ob, - out_channels=max_len * self.d_ob, heads=1, - n_nodes=n_features, ob_dim=self.d_ob) + self.ob_propagation = ObservationPropagation( + in_channels=max_len * self.d_ob, + out_channels=max_len * self.d_ob, + heads=1, + n_nodes=n_features, + ob_dim=self.d_ob, + ) + self.ob_propagation_layer2 = ObservationPropagation( + in_channels=max_len * self.d_ob, + out_channels=max_len * self.d_ob, + heads=1, + n_nodes=n_features, + ob_dim=self.d_ob, + ) if static: d_final = d_model + d_pe + n_features else: d_final = d_model + d_pe self.mlp_static = nn.Sequential( - nn.Linear(d_final, d_final), - nn.ReLU(), - nn.Linear(d_final, n_classes), + nn.Linear(d_final, d_final), nn.ReLU(), nn.Linear(d_final, n_classes), ) self.dropout = nn.Dropout(dropout) @@ -376,29 +439,29 @@ def classify(self, inputs): Number of nonzero recordings. missing_mask : array, shape of [n_steps, n_samples, n_features] """ - src = inputs['X'] - static = inputs['static'] - times = inputs['timestamps'] - lengths = inputs['lengths'] - missing_mask = inputs['missing_mask'] + src = inputs["X"] + static = inputs["static"] + times = inputs["timestamps"] + lengths = inputs["lengths"] + missing_mask = inputs["missing_mask"] max_len, batch_size = src.shape[0], src.shape[1] src = torch.repeat_interleave(src, self.d_ob, dim=-1) h = F.relu(src * self.R_u) - pe = self.pos_encoder(times) + pe = self.pos_encoder(times).to(self.device) if static is not None: emb = self.emb(static) h = self.dropout(h) mask = torch.arange(max_len)[None, :] >= (lengths.cpu()[:, None]) - mask = mask.squeeze(1) + mask = mask.squeeze(1).to(self.device) x = h adj = self.global_structure - adj[torch.eye(self.n_features).byte()] = 1 + adj[torch.eye(self.n_features, dtype=torch.bool)] = 1 edge_index = torch.nonzero(adj).T edge_weights = adj[edge_index[0], edge_index[1]] @@ -417,18 +480,28 @@ def classify(self, inputs): step_data = step_data.reshape([n_step, self.n_features, self.d_ob]).permute(1, 0, 2) step_data = step_data.reshape(self.n_features, n_step * self.d_ob) - step_data, attention_weights = self.ob_propagation(step_data, p_t=p_t, edge_index=edge_index, - edge_weights=edge_weights, - use_beta=False, edge_attr=None, - return_attention_weights=True) + step_data, attention_weights = self.ob_propagation( + step_data, + p_t=p_t, + edge_index=edge_index, + edge_weights=edge_weights, + use_beta=False, + edge_attr=None, + return_attention_weights=True, + ) edge_index_layer2 = attention_weights[0] edge_weights_layer2 = attention_weights[1].squeeze(-1) - step_data, attention_weights = self.ob_propagation_layer2(step_data, p_t=p_t, edge_index=edge_index_layer2, - edge_weights=edge_weights_layer2, - use_beta=False, edge_attr=None, - return_attention_weights=True) + step_data, attention_weights = self.ob_propagation_layer2( + step_data, + p_t=p_t, + edge_index=edge_index_layer2, + edge_weights=edge_weights_layer2, + use_beta=False, + edge_attr=None, + return_attention_weights=True, + ) step_data = step_data.view([self.n_features, n_step, self.d_ob]) step_data = step_data.permute([1, 0, 2]) # [n_step, n_features, d_ob] @@ -452,7 +525,7 @@ def classify(self, inputs): sensor_wise_mask = self.sensor_wise_mask - lengths2 = lengths.unsqueeze(1) + lengths2 = lengths.unsqueeze(1).to(self.device) mask2 = mask.permute(1, 0).unsqueeze(2).long() if sensor_wise_mask: output = torch.zeros([batch_size, self.n_features, self.d_ob + 16], device=self.device) @@ -461,10 +534,12 @@ def classify(self, inputs): r_out = r_out.view(-1, batch_size, self.n_features, (self.d_ob + 16)) out = r_out[:, :, se, :] l_ = torch.sum(extended_missing_mask[:, :, se], dim=0).unsqueeze(1) # length - out_sensor = torch.sum(out * (1 - extended_missing_mask[:, :, se].unsqueeze(-1)), dim=0) / (l_ + 1) + out_sensor = torch.sum( + out * (1 - extended_missing_mask[:, :, se].unsqueeze(-1)), dim=0 + ) / (l_ + 1) output[:, se, :] = out_sensor output = output.view([-1, self.n_features * (self.d_ob + 16)]) - elif self.aggregation == 'mean': + elif self.aggregation == "mean": output = torch.sum(r_out * (1 - mask2), dim=0) / (lengths2 + 1) else: raise RuntimeError @@ -479,11 +554,11 @@ def classify(self, inputs): def forward(self, inputs): prediction = self.classify(inputs) - classification_loss = F.nll_loss(torch.log(prediction), inputs['label']) + classification_loss = F.nll_loss(torch.log(prediction), inputs["label"]) results = { - 'prediction': prediction, - 'loss': classification_loss + "prediction": prediction, + "loss": classification_loss # 'distance': distance, } @@ -510,32 +585,48 @@ class Raindrop(BaseNNClassifier): Run the model on which device. """ - def __init__(self, - n_features, - n_layers, - d_model, - d_inner, - n_heads, - n_classes, - dropout, - max_len, - d_static, - aggregation, - sensor_wise_mask, - static, - learning_rate=1e-3, - epochs=100, - patience=10, - batch_size=32, - weight_decay=1e-5, - device=None): - super().__init__(n_classes, learning_rate, epochs, patience, batch_size, - weight_decay, device) + def __init__( + self, + n_features, + n_layers, + d_model, + d_inner, + n_heads, + n_classes, + dropout, + max_len, + d_static, + aggregation, + sensor_wise_mask, + static, + learning_rate=1e-3, + epochs=100, + patience=10, + batch_size=32, + weight_decay=1e-5, + device=None, + ): + super().__init__( + n_classes, learning_rate, epochs, patience, batch_size, weight_decay, device + ) self.n_features = n_features self.n_steps = max_len - self.model = _Raindrop(n_layers, n_features, d_model, d_inner, n_heads, n_classes, dropout, max_len, d_static, - aggregation, sensor_wise_mask, static=static, device=self.device) + self.model = _Raindrop( + n_layers, + n_features, + d_model, + d_inner, + n_heads, + n_classes, + dropout, + max_len, + d_static, + aggregation, + sensor_wise_mask, + static=static, + device=self.device, + ) self.model = self.model.to(self.device) self._print_model_size() @@ -596,12 +687,12 @@ def assemble_input_data(self, data): times = times.permute(1, 0) inputs = { - 'X': X, - 'static': None, - 'timestamps': times, - 'lengths': lengths, - 'missing_mask': missing_mask, - 'label': label + "X": X, + "static": None, + "timestamps": times, + "lengths": lengths, + "missing_mask": missing_mask, + "label": label, } return inputs @@ -627,11 +718,11 @@ def classify(self, X): times = times.permute(1, 0) inputs = { - 'X': X, - 'static': None, - 'timestamps': times, - 'lengths': lengths, - 'missing_mask': missing_mask, + "X": X, + "static": None, + "timestamps": times, + "lengths": lengths, + "missing_mask": missing_mask, } prediction = self.model.classify(inputs) From 80d34be184d7a4e4481bb6fa6e230cc9c53b3687 Mon Sep 17 00:00:00 2001 From: WenjieDu Date: Mon, 22 Aug 2022 13:29:06 +0800 Subject: [PATCH 4/4] refactor: applying Black to format python code; --- README.md | 4 +- pypots/__version__.py | 2 +- pypots/base.py | 94 +++++--- pypots/classification/__init__.py | 7 +- pypots/classification/base.py | 75 +++--- pypots/classification/brits.py | 184 +++++++++------ pypots/classification/grud.py | 111 +++++---- pypots/classification/raindrop.py | 67 ++++-- pypots/clustering/__init__.py | 5 +- pypots/clustering/base.py | 74 +++--- pypots/clustering/crli.py | 320 ++++++++++++++++---------- pypots/clustering/vader.py | 263 +++++++++++++-------- pypots/data/__init__.py | 5 +- pypots/data/base.py | 8 +- pypots/data/dataset_for_brits.py | 43 ++-- pypots/data/dataset_for_grud.py | 13 +- pypots/data/dataset_for_mit.py | 8 +- pypots/data/generating.py | 56 +++-- pypots/data/load_specific_datasets.py | 43 ++-- pypots/forecasting/__init__.py | 5 +- pypots/forecasting/base.py | 67 +++--- pypots/forecasting/bttf.py | 243 +++++++++++++------ pypots/imputation/__init__.py | 9 +- pypots/imputation/base.py | 25 +- pypots/imputation/brits.py | 172 ++++++++------ pypots/imputation/locf.py | 8 +- pypots/imputation/saits.py | 173 +++++++++----- pypots/imputation/transformer.py | 183 ++++++++++----- pypots/tests/test_classification.py | 170 ++++++++------ pypots/tests/test_clustering.py | 74 +++--- pypots/tests/test_forecasting.py | 24 +- pypots/tests/test_imputation.py | 162 +++++++------ pypots/tests/unified_data_for_test.py | 106 +++++---- pypots/utils/__init__.py | 3 +- pypots/utils/metrics.py | 115 +++++---- setup.py | 56 +++-- 36 files changed, 1858 insertions(+), 1119 deletions(-) diff --git a/README.md b/README.md index 7d597108..a279eb02 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ - + -#

Welcome to PyPOTS

+##

Welcome to PyPOTS

**

A Python Toolbox for Data Mining on Partially-Observed Time Series

**

diff --git a/pypots/__version__.py b/pypots/__version__.py index 0323fd65..ae44ed76 100644 --- a/pypots/__version__.py +++ b/pypots/__version__.py @@ -21,4 +21,4 @@ # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' -version = '0.0.7' +version = "0.0.7" diff --git a/pypots/base.py b/pypots/base.py index 8d8e3acf..cdded5d5 100644 --- a/pypots/base.py +++ b/pypots/base.py @@ -12,8 +12,7 @@ class BaseModel(ABC): - """ Base class for all models. - """ + """Base class for all models.""" def __init__(self, device): self.logger = {} @@ -21,13 +20,17 @@ def __init__(self, device): if device is None: self.device = torch.device( - "cuda:0" if torch.cuda.is_available() and torch.cuda.device_count() > 0 else "cpu" + "cuda:0" + if torch.cuda.is_available() and torch.cuda.device_count() > 0 + else "cpu" ) else: self.device = device - def check_input(self, expected_n_steps, expected_n_features, X, y=None, out_dtype='tensor'): - """ Check value type and shape of input X and y + def check_input( + self, expected_n_steps, expected_n_features, X, y=None, out_dtype="tensor" + ): + """Check value type and shape of input X and y Parameters ---------- @@ -54,15 +57,20 @@ def check_input(self, expected_n_steps, expected_n_features, X, y=None, out_dtyp y : tensor """ - assert out_dtype in ['tensor', 'ndarray'], f'out_dtype should be "tensor" or "ndarray", but got {out_dtype}' + assert out_dtype in [ + "tensor", + "ndarray", + ], f'out_dtype should be "tensor" or "ndarray", but got {out_dtype}' is_list = isinstance(X, list) is_array = isinstance(X, np.ndarray) is_tensor = isinstance(X, torch.Tensor) - assert is_tensor or is_array or is_list, TypeError('X should be an instance of list/np.ndarray/torch.Tensor, ' - f'but got {type(X)}') + assert is_tensor or is_array or is_list, TypeError( + "X should be an instance of list/np.ndarray/torch.Tensor, " + f"but got {type(X)}" + ) # convert the data type if in need - if out_dtype == 'tensor': + if out_dtype == "tensor": if is_list: X = torch.tensor(X).to(self.device) elif is_array: @@ -80,29 +88,42 @@ def check_input(self, expected_n_steps, expected_n_features, X, y=None, out_dtyp # check the shape of X here X_shape = X.shape - assert len(X_shape) == 3, f'input should have 3 dimensions [n_samples, seq_len, n_features],' \ - f'but got shape={X.shape}' - assert X_shape[1] == expected_n_steps, f'expect X.shape[1] to be {expected_n_steps}, but got {X_shape[1]}' - assert X_shape[2] == expected_n_features, f'expect X.shape[2] to be {expected_n_features}, but got {X_shape[2]}' + assert len(X_shape) == 3, ( + f"input should have 3 dimensions [n_samples, seq_len, n_features]," + f"but got shape={X.shape}" + ) + assert ( + X_shape[1] == expected_n_steps + ), f"expect X.shape[1] to be {expected_n_steps}, but got {X_shape[1]}" + assert ( + X_shape[2] == expected_n_features + ), f"expect X.shape[2] to be {expected_n_features}, but got {X_shape[2]}" if y is not None: - assert len(X) == len(y), f'lengths of X and y must match, ' \ - f'but got f{len(X)} and {len(y)}' + assert len(X) == len(y), ( + f"lengths of X and y must match, " f"but got f{len(X)} and {len(y)}" + ) if isinstance(y, torch.Tensor): - y = y.to(self.device) if out_dtype == 'tensor' else y.numpy() + y = y.to(self.device) if out_dtype == "tensor" else y.numpy() elif isinstance(y, list): - y = torch.tensor(y).to(self.device) if out_dtype == 'tensor' else np.asarray(y) + y = ( + torch.tensor(y).to(self.device) + if out_dtype == "tensor" + else np.asarray(y) + ) elif isinstance(y, np.ndarray): - y = torch.from_numpy(y).to(self.device) if out_dtype == 'tensor' else y + y = torch.from_numpy(y).to(self.device) if out_dtype == "tensor" else y else: - raise TypeError('y should be an instance of list/np.ndarray/torch.Tensor, ' - f'but got {type(y)}') + raise TypeError( + "y should be an instance of list/np.ndarray/torch.Tensor, " + f"but got {type(y)}" + ) return X, y else: return X def save_logs_to_tensorboard(self, saving_path): - """ Save logs (self.logger) into a tensorboard file. + """Save logs (self.logger) into a tensorboard file. Parameters ---------- @@ -110,14 +131,14 @@ def save_logs_to_tensorboard(self, saving_path): Local disk path to save the tensorboard file. """ # TODO: find a solution for log saving - raise IOError('This function is not ready for users.') + raise IOError("This function is not ready for users.") # tb_summary_writer = SummaryWriter(saving_path) # tb_summary_writer.add_custom_scalars(self.logger) # tb_summary_writer.close() # print(f'Log saved successfully to {saving_path}.') def save_model(self, saving_path): - """ Save the model to a disk file. + """Save the model to a disk file. Parameters ---------- @@ -128,10 +149,10 @@ def save_model(self, saving_path): torch.save(self.model, saving_path) except Exception as e: print(e) - print(f'Saved successfully to {saving_path}.') + print(f"Saved successfully to {saving_path}.") def load_model(self, model_path): - """ Load the saved model from a disk file. + """Load the saved model from a disk file. Parameters ---------- @@ -152,14 +173,15 @@ def load_model(self, model_path): self.model = loaded_model.model except Exception as e: raise e - print(f'Model loaded successfully from {model_path}.') + print(f"Model loaded successfully from {model_path}.") class BaseNNModel(BaseModel): - """ Abstract class for all neural-network models. - """ + """Abstract class for all neural-network models.""" - def __init__(self, learning_rate, epochs, patience, batch_size, weight_decay, device): + def __init__( + self, learning_rate, epochs, patience, batch_size, weight_decay, device + ): super().__init__(device) # training hype-parameters @@ -173,14 +195,12 @@ def __init__(self, learning_rate, epochs, patience, batch_size, weight_decay, de self.model = None self.optimizer = None self.best_model_dict = None - self.best_loss = float('inf') - self.logger = { - 'training_loss': [], - 'validating_loss': [] - } + self.best_loss = float("inf") + self.logger = {"training_loss": [], "validating_loss": []} def _print_model_size(self): - """ Print the number of trainable parameters in the initialized NN model. - """ + """Print the number of trainable parameters in the initialized NN model.""" num_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad) - print(f'Model initialized successfully. Number of the trainable parameters: {num_params}') + print( + f"Model initialized successfully. Number of the trainable parameters: {num_params}" + ) diff --git a/pypots/classification/__init__.py b/pypots/classification/__init__.py index 80d42c90..56a7a03c 100644 --- a/pypots/classification/__init__.py +++ b/pypots/classification/__init__.py @@ -10,8 +10,7 @@ from pypots.classification.raindrop import Raindrop __all__ = [ - 'BRITS', - 'GRUD', - 'Raindrop', - + "BRITS", + "GRUD", + "Raindrop", ] diff --git a/pypots/classification/base.py b/pypots/classification/base.py index c1b706a7..4ee02af0 100644 --- a/pypots/classification/base.py +++ b/pypots/classification/base.py @@ -15,15 +15,14 @@ class BaseClassifier(BaseModel): - """ Abstract class for all classification models. - """ + """Abstract class for all classification models.""" def __init__(self, device): super().__init__(device) @abstractmethod def fit(self, train_X, train_y, val_X=None, val_y=None): - """ Train the classifier. + """Train the classifier. Parameters ---------- @@ -45,7 +44,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None): @abstractmethod def classify(self, X): - """ Classify the input with the trained model. + """Classify the input with the trained model. Parameters ---------- @@ -61,9 +60,19 @@ def classify(self, X): class BaseNNClassifier(BaseNNModel, BaseClassifier): - def __init__(self, n_classes, learning_rate, epochs, patience, batch_size, weight_decay, - device): - super().__init__(learning_rate, epochs, patience, batch_size, weight_decay, device) + def __init__( + self, + n_classes, + learning_rate, + epochs, + patience, + batch_size, + weight_decay, + device, + ): + super().__init__( + learning_rate, epochs, patience, batch_size, weight_decay, device + ) self.n_classes = n_classes @abstractmethod @@ -71,12 +80,12 @@ def assemble_input_data(self, data): pass def _train_model(self, training_loader, val_loader=None): - self.optimizer = torch.optim.Adam(self.model.parameters(), - lr=self.lr, - weight_decay=self.weight_decay) + self.optimizer = torch.optim.Adam( + self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay + ) # each training starts from the very beginning, so reset the loss and model dict here - self.best_loss = float('inf') + self.best_loss = float("inf") self.best_model_dict = None try: @@ -87,12 +96,14 @@ def _train_model(self, training_loader, val_loader=None): inputs = self.assemble_input_data(data) self.optimizer.zero_grad() results = self.model.forward(inputs) - results['loss'].backward() + results["loss"].backward() self.optimizer.step() - epoch_train_loss_collector.append(results['loss'].item()) + epoch_train_loss_collector.append(results["loss"].item()) - mean_train_loss = np.mean(epoch_train_loss_collector) # mean training loss of the current epoch - self.logger['training_loss'].append(mean_train_loss) + mean_train_loss = np.mean( + epoch_train_loss_collector + ) # mean training loss of the current epoch + self.logger["training_loss"].append(mean_train_loss) if val_loader is not None: self.model.eval() @@ -101,14 +112,16 @@ def _train_model(self, training_loader, val_loader=None): for idx, data in enumerate(val_loader): inputs = self.assemble_input_data(data) results = self.model.forward(inputs) - epoch_val_loss_collector.append(results['loss'].item()) + epoch_val_loss_collector.append(results["loss"].item()) mean_val_loss = np.mean(epoch_val_loss_collector) - self.logger['validating_loss'].append(mean_val_loss) - print(f'epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}') + self.logger["validating_loss"].append(mean_val_loss) + print( + f"epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}" + ) mean_loss = mean_val_loss else: - print(f'epoch {epoch}: training loss {mean_train_loss:.4f}') + print(f"epoch {epoch}: training loss {mean_train_loss:.4f}") mean_loss = mean_train_loss if mean_loss < self.best_loss: @@ -118,18 +131,24 @@ def _train_model(self, training_loader, val_loader=None): else: self.patience -= 1 if self.patience == 0: - print('Exceeded the training patience. Terminating the training procedure...') + print( + "Exceeded the training patience. Terminating the training procedure..." + ) break except Exception as e: - print(f'Exception: {e}') + print(f"Exception: {e}") if self.best_model_dict is None: - raise RuntimeError('Training got interrupted. Model was not get trained. Please try fit() again.') + raise RuntimeError( + "Training got interrupted. Model was not get trained. Please try fit() again." + ) else: - RuntimeWarning('Training got interrupted. ' - 'Model will load the best parameters so far for testing. ' - "If you don't want it, please try fit() again.") + RuntimeWarning( + "Training got interrupted. " + "Model will load the best parameters so far for testing. " + "If you don't want it, please try fit() again." + ) - if np.equal(self.best_loss, float('inf')): - raise ValueError('Something is wrong. best_loss is Nan after training.') + if np.equal(self.best_loss, float("inf")): + raise ValueError("Something is wrong. best_loss is Nan after training.") - print('Finished training.') + print("Finished training.") diff --git a/pypots/classification/brits.py b/pypots/classification/brits.py index 10618dd3..f73dbcf5 100644 --- a/pypots/classification/brits.py +++ b/pypots/classification/brits.py @@ -12,10 +12,7 @@ from pypots.classification.base import BaseNNClassifier from pypots.data import DatasetForBRITS -from pypots.imputation.brits import ( - RITS as imputation_RITS, - _BRITS as imputation_BRITS -) +from pypots.imputation.brits import RITS as imputation_RITS, _BRITS as imputation_BRITS class RITS(imputation_RITS): @@ -24,16 +21,24 @@ def __init__(self, n_steps, n_features, rnn_hidden_size, n_classes, device=None) self.dropout = nn.Dropout(p=0.25) self.classifier = nn.Linear(self.rnn_hidden_size, n_classes) - def forward(self, inputs, direction='forward'): + def forward(self, inputs, direction="forward"): ret_dict = super().forward(inputs, direction) - logits = self.classifier(ret_dict['final_hidden_state']) - ret_dict['prediction'] = torch.softmax(logits, dim=1) + logits = self.classifier(ret_dict["final_hidden_state"]) + ret_dict["prediction"] = torch.softmax(logits, dim=1) return ret_dict class _BRITS(imputation_BRITS, nn.Module): - def __init__(self, n_steps, n_features, rnn_hidden_size, n_classes, - classification_weight, reconstruction_weight, device=None): + def __init__( + self, + n_steps, + n_features, + rnn_hidden_size, + n_classes, + classification_weight, + reconstruction_weight, + device=None, + ): super().__init__(n_steps, n_features, rnn_hidden_size) self.n_steps = n_steps self.n_features = n_features @@ -46,7 +51,7 @@ def __init__(self, n_steps, n_features, rnn_hidden_size, n_classes, self.reconstruction_weight = reconstruction_weight def merge_ret(self, ret_f, ret_b): - """ Merge (average) results from two RITS models into one. + """Merge (average) results from two RITS models into one. Parameters ---------- @@ -61,19 +66,19 @@ def merge_ret(self, ret_f, ret_b): Merged results in a dictionary. """ results = { - 'imputed_data': (ret_f['imputed_data'] + ret_b['imputed_data']) / 2, - 'prediction': (ret_f['prediction'] + ret_b['prediction']) / 2 + "imputed_data": (ret_f["imputed_data"] + ret_b["imputed_data"]) / 2, + "prediction": (ret_f["prediction"] + ret_b["prediction"]) / 2, } return results def classify(self, inputs): - ret_f = self.rits_f(inputs, 'forward') - ret_b = self.reverse(self.rits_b(inputs, 'backward')) + ret_f = self.rits_f(inputs, "forward") + ret_b = self.reverse(self.rits_b(inputs, "backward")) merged_ret = self.merge_ret(ret_f, ret_b) return merged_ret, ret_f, ret_b def forward(self, inputs): - """ Forward processing of BRITS. + """Forward processing of BRITS. Parameters ---------- @@ -85,21 +90,32 @@ def forward(self, inputs): dict, A dictionary includes all results. """ merged_ret, ret_f, ret_b = self.classify(inputs) - ret_f['classification_loss'] = F.nll_loss(torch.log(ret_f['prediction']), inputs['label']) - ret_b['classification_loss'] = F.nll_loss(torch.log(ret_b['prediction']), inputs['label']) - consistency_loss = self.get_consistency_loss(ret_f['imputed_data'], ret_b['imputed_data']) - classification_loss = (ret_f['classification_loss'] + ret_b['classification_loss']) / 2 - merged_ret['consistency_loss'] = consistency_loss - merged_ret['classification_loss'] = classification_loss - merged_ret['loss'] = \ - consistency_loss + \ - (ret_f['reconstruction_loss'] + ret_b['reconstruction_loss']) * self.reconstruction_weight + \ - (ret_f['classification_loss'] + ret_b['classification_loss']) * self.classification_weight + ret_f["classification_loss"] = F.nll_loss( + torch.log(ret_f["prediction"]), inputs["label"] + ) + ret_b["classification_loss"] = F.nll_loss( + torch.log(ret_b["prediction"]), inputs["label"] + ) + consistency_loss = self.get_consistency_loss( + ret_f["imputed_data"], ret_b["imputed_data"] + ) + classification_loss = ( + ret_f["classification_loss"] + ret_b["classification_loss"] + ) / 2 + merged_ret["consistency_loss"] = consistency_loss + merged_ret["classification_loss"] = classification_loss + merged_ret["loss"] = ( + consistency_loss + + (ret_f["reconstruction_loss"] + ret_b["reconstruction_loss"]) + * self.reconstruction_weight + + (ret_f["classification_loss"] + ret_b["classification_loss"]) + * self.classification_weight + ) return merged_ret class BRITS(BaseNNClassifier): - """ BRITS implementation of BaseClassifier. + """BRITS implementation of BaseClassifier. Attributes ---------- @@ -128,20 +144,24 @@ class BRITS(BaseNNClassifier): Run the model on which device. """ - def __init__(self, - n_steps, - n_features, - rnn_hidden_size, - n_classes, - classification_weight=1, - reconstruction_weight=1, - learning_rate=1e-3, - epochs=100, - patience=10, - batch_size=32, - weight_decay=1e-5, - device=None): - super().__init__(n_classes, learning_rate, epochs, patience, batch_size, weight_decay, device) + def __init__( + self, + n_steps, + n_features, + rnn_hidden_size, + n_classes, + classification_weight=1, + reconstruction_weight=1, + learning_rate=1e-3, + epochs=100, + patience=10, + batch_size=32, + weight_decay=1e-5, + device=None, + ): + super().__init__( + n_classes, learning_rate, epochs, patience, batch_size, weight_decay, device + ) self.n_steps = n_steps self.n_features = n_features @@ -149,13 +169,20 @@ def __init__(self, self.classification_weight = classification_weight self.reconstruction_weight = reconstruction_weight - self.model = _BRITS(self.n_steps, self.n_features, self.rnn_hidden_size, self.n_classes, - self.classification_weight, self.reconstruction_weight, self.device) + self.model = _BRITS( + self.n_steps, + self.n_features, + self.rnn_hidden_size, + self.n_classes, + self.classification_weight, + self.reconstruction_weight, + self.device, + ) self.model = self.model.to(self.device) self._print_model_size() def fit(self, train_X, train_y, val_X=None, val_y=None): - """ Fit the model on the given training data. + """Fit the model on the given training data. Parameters ---------- @@ -169,11 +196,17 @@ def fit(self, train_X, train_y, val_X=None, val_y=None): self : object, Trained model. """ - train_X, train_y = self.check_input(self.n_steps, self.n_features, train_X, train_y) + train_X, train_y = self.check_input( + self.n_steps, self.n_features, train_X, train_y + ) val_X, val_y = self.check_input(self.n_steps, self.n_features, val_X, val_y) - training_set = DatasetForBRITS(train_X, train_y) # time_gaps is necessary for BRITS - training_loader = DataLoader(training_set, batch_size=self.batch_size, shuffle=True) + training_set = DatasetForBRITS( + train_X, train_y + ) # time_gaps is necessary for BRITS + training_loader = DataLoader( + training_set, batch_size=self.batch_size, shuffle=True + ) if val_X is None: self._train_model(training_loader) @@ -187,7 +220,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None): return self def assemble_input_data(self, data): - """ Assemble the input data into a dictionary. + """Assemble the input data into a dictionary. Parameters ---------- @@ -200,22 +233,27 @@ def assemble_input_data(self, data): A dictionary with data assembled. """ # fetch data - indices, X, missing_mask, deltas, back_X, back_missing_mask, back_deltas, label = data + ( + indices, + X, + missing_mask, + deltas, + back_X, + back_missing_mask, + back_deltas, + label, + ) = data # assemble input data inputs = { - 'indices': indices, - 'label': label, - 'forward': { - 'X': X, - 'missing_mask': missing_mask, - 'deltas': deltas + "indices": indices, + "label": label, + "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas}, + "backward": { + "X": back_X, + "missing_mask": back_missing_mask, + "deltas": back_deltas, }, - 'backward': { - 'X': back_X, - 'missing_mask': back_missing_mask, - 'deltas': back_deltas - } } return inputs @@ -229,24 +267,28 @@ def classify(self, X): with torch.no_grad(): for idx, data in enumerate(test_loader): # cannot use input_data_processing, cause here has no label - indices, X, missing_mask, deltas, back_X, back_missing_mask, back_deltas = data + ( + indices, + X, + missing_mask, + deltas, + back_X, + back_missing_mask, + back_deltas, + ) = data # assemble input data inputs = { - 'indices': indices, - 'forward': { - 'X': X, - 'missing_mask': missing_mask, - 'deltas': deltas + "indices": indices, + "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas}, + "backward": { + "X": back_X, + "missing_mask": back_missing_mask, + "deltas": back_deltas, }, - 'backward': { - 'X': back_X, - 'missing_mask': back_missing_mask, - 'deltas': back_deltas - } } results, _, _ = self.model.classify(inputs) - prediction_collector.append(results['prediction']) + prediction_collector.append(results["prediction"]) predictions = torch.cat(prediction_collector) return predictions.cpu().detach().numpy() diff --git a/pypots/classification/grud.py b/pypots/classification/grud.py index da23e88d..7b313eb0 100644 --- a/pypots/classification/grud.py +++ b/pypots/classification/grud.py @@ -24,19 +24,27 @@ def __init__(self, n_steps, n_features, rnn_hidden_size, n_classes, device=None) self.device = device # create models - self.rnn_cell = nn.GRUCell(self.n_features * 2 + self.rnn_hidden_size, self.rnn_hidden_size) - self.temp_decay_h = TemporalDecay(input_size=self.n_features, output_size=self.rnn_hidden_size, diag=False) - self.temp_decay_x = TemporalDecay(input_size=self.n_features, output_size=self.n_features, diag=True) + self.rnn_cell = nn.GRUCell( + self.n_features * 2 + self.rnn_hidden_size, self.rnn_hidden_size + ) + self.temp_decay_h = TemporalDecay( + input_size=self.n_features, output_size=self.rnn_hidden_size, diag=False + ) + self.temp_decay_x = TemporalDecay( + input_size=self.n_features, output_size=self.n_features, diag=True + ) self.classifier = nn.Linear(self.rnn_hidden_size, self.n_classes) def classify(self, inputs): - values = inputs['X'] - masks = inputs['missing_mask'] - deltas = inputs['deltas'] - empirical_mean = inputs['empirical_mean'] - X_filledLOCF = inputs['X_filledLOCF'] + values = inputs["X"] + masks = inputs["missing_mask"] + deltas = inputs["deltas"] + empirical_mean = inputs["empirical_mean"] + X_filledLOCF = inputs["X_filledLOCF"] - hidden_state = torch.zeros((values.size()[0], self.rnn_hidden_size), device=self.device) + hidden_state = torch.zeros( + (values.size()[0], self.rnn_hidden_size), device=self.device + ) for t in range(self.n_steps): # for data, [batch, time, features] @@ -59,7 +67,7 @@ def classify(self, inputs): return prediction def forward(self, inputs): - """ Forward processing of GRU-D. + """Forward processing of GRU-D. Parameters ---------- @@ -72,16 +80,13 @@ def forward(self, inputs): A dictionary includes all results. """ prediction = self.classify(inputs) - classification_loss = F.nll_loss(torch.log(prediction), inputs['label']) - results = { - 'prediction': prediction, - 'loss': classification_loss - } + classification_loss = F.nll_loss(torch.log(prediction), inputs["label"]) + results = {"prediction": prediction, "loss": classification_loss} return results class GRUD(BaseNNClassifier): - """ GRU-D implementation of BaseClassifier. + """GRU-D implementation of BaseClassifier. Attributes ---------- @@ -110,28 +115,38 @@ class GRUD(BaseNNClassifier): Run the model on which device. """ - def __init__(self, - n_steps, - n_features, - rnn_hidden_size, - n_classes, - learning_rate=1e-3, - epochs=100, - patience=10, - batch_size=32, - weight_decay=1e-5, - device=None): - super().__init__(n_classes, learning_rate, epochs, patience, batch_size, weight_decay, device) + def __init__( + self, + n_steps, + n_features, + rnn_hidden_size, + n_classes, + learning_rate=1e-3, + epochs=100, + patience=10, + batch_size=32, + weight_decay=1e-5, + device=None, + ): + super().__init__( + n_classes, learning_rate, epochs, patience, batch_size, weight_decay, device + ) self.n_steps = n_steps self.n_features = n_features self.rnn_hidden_size = rnn_hidden_size - self.model = _GRUD(self.n_steps, self.n_features, self.rnn_hidden_size, self.n_classes, self.device) + self.model = _GRUD( + self.n_steps, + self.n_features, + self.rnn_hidden_size, + self.n_classes, + self.device, + ) self.model = self.model.to(self.device) self._print_model_size() def fit(self, train_X, train_y, val_X=None, val_y=None): - """ Fit the model on the given training data. + """Fit the model on the given training data. Parameters ---------- @@ -145,11 +160,15 @@ def fit(self, train_X, train_y, val_X=None, val_y=None): self : object, Trained model. """ - train_X, train_y = self.check_input(self.n_steps, self.n_features, train_X, train_y) + train_X, train_y = self.check_input( + self.n_steps, self.n_features, train_X, train_y + ) val_X, val_y = self.check_input(self.n_steps, self.n_features, val_X, val_y) training_set = DatasetForGRUD(train_X, train_y) - training_loader = DataLoader(training_set, batch_size=self.batch_size, shuffle=True) + training_loader = DataLoader( + training_set, batch_size=self.batch_size, shuffle=True + ) if val_X is None: self._train_model(training_loader) @@ -163,7 +182,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None): return self def assemble_input_data(self, data): - """ Assemble the input data into a dictionary. + """Assemble the input data into a dictionary. Parameters ---------- @@ -180,13 +199,13 @@ def assemble_input_data(self, data): # assemble input data inputs = { - 'indices': indices, - 'X': X, - 'X_filledLOCF': X_filledLOCF, - 'missing_mask': missing_mask, - 'deltas': deltas, - 'empirical_mean': empirical_mean, - 'label': label, + "indices": indices, + "X": X, + "X_filledLOCF": X_filledLOCF, + "missing_mask": missing_mask, + "deltas": deltas, + "empirical_mean": empirical_mean, + "label": label, } return inputs @@ -203,12 +222,12 @@ def classify(self, X): indices, X, X_filledLOCF, missing_mask, deltas, empirical_mean = data # assemble input data inputs = { - 'indices': indices, - 'X': X, - 'X_filledLOCF': X_filledLOCF, - 'missing_mask': missing_mask, - 'deltas': deltas, - 'empirical_mean': empirical_mean, + "indices": indices, + "X": X, + "X_filledLOCF": X_filledLOCF, + "missing_mask": missing_mask, + "deltas": deltas, + "empirical_mean": empirical_mean, } prediction = self.model.classify(inputs) diff --git a/pypots/classification/raindrop.py b/pypots/classification/raindrop.py index 3424a822..9d2907d3 100644 --- a/pypots/classification/raindrop.py +++ b/pypots/classification/raindrop.py @@ -38,8 +38,7 @@ class PositionalEncodingTF(nn.Module): - """ Generate positional encoding according to time information. - """ + """Generate positional encoding according to time information.""" def __init__(self, d_pe, max_len=500): super().__init__() @@ -50,7 +49,7 @@ def __init__(self, d_pe, max_len=500): self._num_timescales = d_pe // 2 def forward(self, time_vectors): - """ Generate positional encoding. + """Generate positional encoding. Parameters ---------- @@ -131,7 +130,9 @@ def __init__( self.bias = Parameter(torch.Tensor(heads * out_channels)) self.n_nodes = n_nodes - self.nodewise_weights = Parameter(torch.Tensor(self.n_nodes, heads * out_channels)) + self.nodewise_weights = Parameter( + torch.Tensor(self.n_nodes, heads * out_channels) + ) self.increase_dim = Linear(in_channels[1], heads * out_channels * 8) self.map_weights = Parameter(torch.Tensor(self.n_nodes, heads * 16)) @@ -259,7 +260,17 @@ def message( p_emb = self.p_t.unsqueeze(0) - aa = torch.cat([w_v.repeat(1, n_step, 1,), p_emb.repeat(n_edges, 1, 1)], dim=-1) + aa = torch.cat( + [ + w_v.repeat( + 1, + n_step, + 1, + ), + p_emb.repeat(n_edges, 1, 1), + ], + dim=-1, + ) beta = torch.mean(h_W * aa, dim=-1) if edge_weights is not None: @@ -295,7 +306,9 @@ def message( target_nodes = self.edge_index[1] w1 = self.nodewise_weights[source_nodes].unsqueeze(-1) w2 = self.nodewise_weights[target_nodes].unsqueeze(1) - out = torch.bmm(x_i.view(-1, self.heads, self.out_channels), torch.bmm(w1, w2)) + out = torch.bmm( + x_i.view(-1, self.heads, self.out_channels), torch.bmm(w1, w2) + ) if use_beta: out = out * gamma.view(-1, self.heads, out.shape[-1]) else: @@ -320,7 +333,9 @@ def aggregate( :meth:`__init__` by the :obj:`aggr` argument. """ index = self.index - return scatter(inputs, index, dim=self.node_dim, dim_size=dim_size, reduce=self.aggr) + return scatter( + inputs, index, dim=self.node_dim, dim_size=dim_size, reduce=self.aggr + ) def __repr__(self): return "{}({}, {}, heads={})".format( @@ -378,7 +393,9 @@ def __init__( else: dim_check = d_model + d_pe assert dim_check % n_heads == 0, "dim_check must be divisible by n_heads" - encoder_layers = TransformerEncoderLayer(d_model + d_pe, n_heads, d_inner, dropout) + encoder_layers = TransformerEncoderLayer( + d_model + d_pe, n_heads, d_inner, dropout + ) self.transformer_encoder = TransformerEncoder(encoder_layers, n_layers) self.adj = torch.ones([self.n_features, self.n_features], device=self.device) @@ -405,7 +422,9 @@ def __init__( d_final = d_model + d_pe self.mlp_static = nn.Sequential( - nn.Linear(d_final, d_final), nn.ReLU(), nn.Linear(d_final, n_classes), + nn.Linear(d_final, d_final), + nn.ReLU(), + nn.Linear(d_final, n_classes), ) self.dropout = nn.Dropout(dropout) @@ -419,7 +438,7 @@ def init_weights(self): glorot(self.R_u) def classify(self, inputs): - """ Forward processing of BRITS. + """Forward processing of BRITS. Parameters ---------- @@ -468,7 +487,9 @@ def classify(self, inputs): batch_size = src.shape[1] n_step = src.shape[0] - output = torch.zeros([n_step, batch_size, self.n_features * self.d_ob], device=self.device) + output = torch.zeros( + [n_step, batch_size, self.n_features * self.d_ob], device=self.device + ) alpha_all = torch.zeros([edge_index.shape[1], batch_size], device=self.device) @@ -477,7 +498,9 @@ def classify(self, inputs): step_data = x[:, unit, :] p_t = pe[:, unit, :] - step_data = step_data.reshape([n_step, self.n_features, self.d_ob]).permute(1, 0, 2) + step_data = step_data.reshape([n_step, self.n_features, self.d_ob]).permute( + 1, 0, 2 + ) step_data = step_data.reshape(self.n_features, n_step * self.d_ob) step_data, attention_weights = self.ob_propagation( @@ -528,12 +551,16 @@ def classify(self, inputs): lengths2 = lengths.unsqueeze(1).to(self.device) mask2 = mask.permute(1, 0).unsqueeze(2).long() if sensor_wise_mask: - output = torch.zeros([batch_size, self.n_features, self.d_ob + 16], device=self.device) + output = torch.zeros( + [batch_size, self.n_features, self.d_ob + 16], device=self.device + ) extended_missing_mask = missing_mask.view(-1, batch_size, self.n_features) for se in range(self.n_features): r_out = r_out.view(-1, batch_size, self.n_features, (self.d_ob + 16)) out = r_out[:, :, se, :] - l_ = torch.sum(extended_missing_mask[:, :, se], dim=0).unsqueeze(1) # length + l_ = torch.sum(extended_missing_mask[:, :, se], dim=0).unsqueeze( + 1 + ) # length out_sensor = torch.sum( out * (1 - extended_missing_mask[:, :, se].unsqueeze(-1)), dim=0 ) / (l_ + 1) @@ -631,7 +658,7 @@ def __init__( self._print_model_size() def fit(self, train_X, train_y, val_X=None, val_y=None): - """ Fit the model on the given training data. + """Fit the model on the given training data. Parameters ---------- @@ -645,11 +672,15 @@ def fit(self, train_X, train_y, val_X=None, val_y=None): self : object, Trained model. """ - train_X, train_y = self.check_input(self.n_steps, self.n_features, train_X, train_y) + train_X, train_y = self.check_input( + self.n_steps, self.n_features, train_X, train_y + ) val_X, val_y = self.check_input(self.n_steps, self.n_features, val_X, val_y) training_set = DatasetForGRUD(train_X, train_y) - training_loader = DataLoader(training_set, batch_size=self.batch_size, shuffle=True) + training_loader = DataLoader( + training_set, batch_size=self.batch_size, shuffle=True + ) if val_X is None: self._train_model(training_loader) @@ -663,7 +694,7 @@ def fit(self, train_X, train_y, val_X=None, val_y=None): return self def assemble_input_data(self, data): - """ Assemble the input data into a dictionary. + """Assemble the input data into a dictionary. Parameters ---------- diff --git a/pypots/clustering/__init__.py b/pypots/clustering/__init__.py index 5ed2a560..b8fb3b8e 100644 --- a/pypots/clustering/__init__.py +++ b/pypots/clustering/__init__.py @@ -8,7 +8,4 @@ from pypots.clustering.crli import CRLI from pypots.clustering.vader import VaDER -__all__ = [ - 'CRLI', - 'VaDER' -] +__all__ = ["CRLI", "VaDER"] diff --git a/pypots/clustering/base.py b/pypots/clustering/base.py index 236f3c67..e7420521 100644 --- a/pypots/clustering/base.py +++ b/pypots/clustering/base.py @@ -15,15 +15,14 @@ class BaseClusterer(BaseModel): - """ Abstract class for all clustering models. - """ + """Abstract class for all clustering models.""" def __init__(self, device): super().__init__(device) @abstractmethod def fit(self, train_X): - """ Train the cluster. + """Train the cluster. Parameters ---------- @@ -39,7 +38,7 @@ def fit(self, train_X): @abstractmethod def cluster(self, X): - """ Cluster the input with the trained model. + """Cluster the input with the trained model. Parameters ---------- @@ -55,8 +54,19 @@ def cluster(self, X): class BaseNNClusterer(BaseNNModel, BaseClusterer): - def __init__(self, n_clusters, learning_rate, epochs, patience, batch_size, weight_decay, device): - super().__init__(learning_rate, epochs, patience, batch_size, weight_decay, device) + def __init__( + self, + n_clusters, + learning_rate, + epochs, + patience, + batch_size, + weight_decay, + device, + ): + super().__init__( + learning_rate, epochs, patience, batch_size, weight_decay, device + ) self.n_clusters = n_clusters @abstractmethod @@ -64,12 +74,12 @@ def assemble_input_data(self, data): pass def _train_model(self, training_loader, val_loader=None): - self.optimizer = torch.optim.Adam(self.model.parameters(), - lr=self.lr, - weight_decay=self.weight_decay) + self.optimizer = torch.optim.Adam( + self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay + ) # each training starts from the very beginning, so reset the loss and model dict here - self.best_loss = float('inf') + self.best_loss = float("inf") self.best_model_dict = None try: @@ -80,12 +90,14 @@ def _train_model(self, training_loader, val_loader=None): inputs = self.assemble_input_data(data) self.optimizer.zero_grad() results = self.model.forward(inputs) - results['loss'].backward() + results["loss"].backward() self.optimizer.step() - epoch_train_loss_collector.append(results['loss'].item()) + epoch_train_loss_collector.append(results["loss"].item()) - mean_train_loss = np.mean(epoch_train_loss_collector) # mean training loss of the current epoch - self.logger['training_loss'].append(mean_train_loss) + mean_train_loss = np.mean( + epoch_train_loss_collector + ) # mean training loss of the current epoch + self.logger["training_loss"].append(mean_train_loss) if val_loader is not None: self.model.eval() @@ -94,14 +106,16 @@ def _train_model(self, training_loader, val_loader=None): for idx, data in enumerate(val_loader): inputs = self.assemble_input_data(data) results = self.model.forward(inputs) - epoch_val_loss_collector.append(results['loss'].item()) + epoch_val_loss_collector.append(results["loss"].item()) mean_val_loss = np.mean(epoch_val_loss_collector) - self.logger['validating_loss'].append(mean_val_loss) - print(f'epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}') + self.logger["validating_loss"].append(mean_val_loss) + print( + f"epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}" + ) mean_loss = mean_val_loss else: - print(f'epoch {epoch}: training loss {mean_train_loss:.4f}') + print(f"epoch {epoch}: training loss {mean_train_loss:.4f}") mean_loss = mean_train_loss if mean_loss < self.best_loss: @@ -111,18 +125,24 @@ def _train_model(self, training_loader, val_loader=None): else: self.patience -= 1 if self.patience == 0: - print('Exceeded the training patience. Terminating the training procedure...') + print( + "Exceeded the training patience. Terminating the training procedure..." + ) break except Exception as e: - print(f'Exception: {e}') + print(f"Exception: {e}") if self.best_model_dict is None: - raise RuntimeError('Training got interrupted. Model was not get trained. Please try fit() again.') + raise RuntimeError( + "Training got interrupted. Model was not get trained. Please try fit() again." + ) else: - RuntimeWarning('Training got interrupted. ' - 'Model will load the best parameters so far for testing. ' - "If you don't want it, please try fit() again.") + RuntimeWarning( + "Training got interrupted. " + "Model will load the best parameters so far for testing. " + "If you don't want it, please try fit() again." + ) - if np.equal(self.best_loss, float('inf')): - raise ValueError('Something is wrong. best_loss is Nan after training.') + if np.equal(self.best_loss, float("inf")): + raise ValueError("Something is wrong. best_loss is Nan after training.") - print('Finished training.') + print("Finished training.") diff --git a/pypots/clustering/crli.py b/pypots/clustering/crli.py index 81b70d43..007a00ff 100644 --- a/pypots/clustering/crli.py +++ b/pypots/clustering/crli.py @@ -19,8 +19,8 @@ from pypots.utils.metrics import cal_mse RNN_CELL = { - 'LSTM': nn.LSTMCell, - 'GRU': nn.GRUCell, + "LSTM": nn.LSTMCell, + "GRU": nn.GRUCell, } @@ -28,7 +28,9 @@ def reverse_tensor(tensor_): if tensor_.dim() <= 1: return tensor_ indices = range(tensor_.size()[1])[::-1] - indices = torch.tensor(indices, dtype=torch.long, device=tensor_.device, requires_grad=False) + indices = torch.tensor( + indices, dtype=torch.long, device=tensor_.device, requires_grad=False + ) return tensor_.index_select(1, indices) @@ -42,7 +44,7 @@ def __init__(self, cell_type, n_layer, d_input, d_hidden, device): self.device = device self.model = nn.ModuleList() - if cell_type in ['LSTM', 'GRU']: + if cell_type in ["LSTM", "GRU"]: for i in range(n_layer): if i == 0: self.model.append(RNN_CELL[cell_type](d_input, d_hidden)) @@ -52,32 +54,42 @@ def __init__(self, cell_type, n_layer, d_input, d_hidden, device): self.output_layer = nn.Linear(d_hidden, d_input) def forward(self, inputs): - X, missing_mask = inputs['X'], inputs['missing_mask'] + X, missing_mask = inputs["X"], inputs["missing_mask"] bz, n_steps, _ = X.shape hidden_state = torch.zeros((bz, self.d_hidden), device=self.device) - hidden_state_collector = torch.empty((bz, n_steps, self.d_hidden), device=self.device) + hidden_state_collector = torch.empty( + (bz, n_steps, self.d_hidden), device=self.device + ) output_collector = torch.empty((bz, n_steps, self.d_input), device=self.device) - if self.cell_type == 'LSTM': + if self.cell_type == "LSTM": # TODO: cell states should have different shapes cell_states = torch.zeros((self.d_input, self.d_hidden), device=self.device) for step in range(n_steps): x = X[:, step, :] estimation = self.output_layer(hidden_state) output_collector[:, step] = estimation - imputed_x = missing_mask[:, step] * x + (1 - missing_mask[:, step]) * estimation + imputed_x = ( + missing_mask[:, step] * x + (1 - missing_mask[:, step]) * estimation + ) for i in range(self.n_layer): if i == 0: - hidden_state, cell_states = self.model[i](imputed_x, (hidden_state, cell_states)) + hidden_state, cell_states = self.model[i]( + imputed_x, (hidden_state, cell_states) + ) else: - hidden_state, cell_states = self.model[i](hidden_state, (hidden_state, cell_states)) + hidden_state, cell_states = self.model[i]( + hidden_state, (hidden_state, cell_states) + ) hidden_state_collector[:, step, :] = hidden_state - elif self.cell_type == 'GRU': + elif self.cell_type == "GRU": for step in range(n_steps): x = X[:, step, :] estimation = self.output_layer(hidden_state) output_collector[:, step] = estimation - imputed_x = missing_mask[:, step] * x + (1 - missing_mask[:, step]) * estimation + imputed_x = ( + missing_mask[:, step] * x + (1 - missing_mask[:, step]) * estimation + ) for i in range(self.n_layer): if i == 0: hidden_state = self.model[i](imputed_x, hidden_state) @@ -103,50 +115,60 @@ def forward(self, inputs): b_outputs, b_final_hidden_state = self.b_rnn(inputs) b_outputs = reverse_tensor(b_outputs) # reverse the output of the backward rnn imputation = (f_outputs + b_outputs) / 2 - imputed_X = inputs['X'] * inputs['missing_mask'] + imputation * (1 - inputs['missing_mask']) - fb_final_hidden_states = torch.concat([f_final_hidden_state, b_final_hidden_state], dim=-1) + imputed_X = inputs["X"] * inputs["missing_mask"] + imputation * ( + 1 - inputs["missing_mask"] + ) + fb_final_hidden_states = torch.concat( + [f_final_hidden_state, b_final_hidden_state], dim=-1 + ) return imputation, imputed_X, fb_final_hidden_states class Discriminator(nn.Module): - def __init__(self, cell_type, d_input, device='cpu'): + def __init__(self, cell_type, d_input, device="cpu"): super().__init__() self.cell_type = cell_type self.device = device # this setting is the same with the official implementation - self.rnn_cell_module_list = nn.ModuleList([ - RNN_CELL[cell_type](d_input, 32), - RNN_CELL[cell_type](32, 16), - RNN_CELL[cell_type](16, 8), - RNN_CELL[cell_type](8, 16), - RNN_CELL[cell_type](16, 32), - ]) + self.rnn_cell_module_list = nn.ModuleList( + [ + RNN_CELL[cell_type](d_input, 32), + RNN_CELL[cell_type](32, 16), + RNN_CELL[cell_type](16, 8), + RNN_CELL[cell_type](8, 16), + RNN_CELL[cell_type](16, 32), + ] + ) self.output_layer = nn.Linear(32, d_input) def forward(self, inputs): - imputed_X = inputs['imputed_X'] + imputed_X = inputs["imputed_X"] bz, n_steps, _ = imputed_X.shape hidden_states = [ torch.zeros((bz, 32), device=self.device), torch.zeros((bz, 16), device=self.device), torch.zeros((bz, 8), device=self.device), torch.zeros((bz, 16), device=self.device), - torch.zeros((bz, 32), device=self.device) + torch.zeros((bz, 32), device=self.device), ] hidden_state_collector = torch.empty((bz, n_steps, 32), device=self.device) - if self.cell_type == 'LSTM': + if self.cell_type == "LSTM": cell_states = torch.zeros((self.d_input, self.d_hidden), device=self.device) for step in range(n_steps): x = imputed_X[:, step, :] for i, rnn_cell in enumerate(self.rnn_cell_module_list): if i == 0: - hidden_state, cell_states = rnn_cell(x, (hidden_states[i], cell_states)) + hidden_state, cell_states = rnn_cell( + x, (hidden_states[i], cell_states) + ) else: - hidden_state, cell_states = rnn_cell(hidden_states[i - 1], (hidden_states[i], cell_states)) + hidden_state, cell_states = rnn_cell( + hidden_states[i - 1], (hidden_states[i], cell_states) + ) hidden_states[i] = hidden_state hidden_state_collector[:, step, :] = hidden_state - elif self.cell_type == 'GRU': + elif self.cell_type == "GRU": for step in range(n_steps): x = imputed_X[:, step, :] for i, rnn_cell in enumerate(self.rnn_cell_module_list): @@ -162,7 +184,9 @@ def forward(self, inputs): class Decoder(nn.Module): - def __init__(self, n_steps, d_input, d_output, fcn_output_dims: list = None, device='cpu'): + def __init__( + self, n_steps, d_input, d_output, fcn_output_dims: list = None, device="cpu" + ): super().__init__() self.n_steps = n_steps self.d_output = d_output @@ -181,13 +205,15 @@ def __init__(self, n_steps, d_input, d_output, fcn_output_dims: list = None, dev self.output_layer = nn.Linear(fcn_output_dims[-1], d_output) def forward(self, inputs): - generator_fb_hidden_states = inputs['generator_fb_hidden_states'] + generator_fb_hidden_states = inputs["generator_fb_hidden_states"] bz, _ = generator_fb_hidden_states.shape fcn_latent = generator_fb_hidden_states for layer in self.fcn: fcn_latent = layer(fcn_latent) hidden_state = fcn_latent - hidden_state_collector = torch.empty((bz, self.n_steps, self.fcn_output_dims[-1]), device=self.device) + hidden_state_collector = torch.empty( + (bz, self.n_steps, self.fcn_output_dims[-1]), device=self.device + ) for i in range(self.n_steps): hidden_state = self.rnn_cell(hidden_state, hidden_state) hidden_state_collector[:, i, :] = hidden_state @@ -196,112 +222,148 @@ def forward(self, inputs): class _CRLI(nn.Module): - def __init__(self, n_steps, n_features, n_clusters, n_generator_layers, rnn_hidden_size, decoder_fcn_output_dims, - lambda_kmeans, rnn_cell_type='GRU', device='cpu'): + def __init__( + self, + n_steps, + n_features, + n_clusters, + n_generator_layers, + rnn_hidden_size, + decoder_fcn_output_dims, + lambda_kmeans, + rnn_cell_type="GRU", + device="cpu", + ): super().__init__() - self.generator = Generator(n_generator_layers, n_features, rnn_hidden_size, rnn_cell_type, device) + self.generator = Generator( + n_generator_layers, n_features, rnn_hidden_size, rnn_cell_type, device + ) self.discriminator = Discriminator(rnn_cell_type, n_features, device) self.decoder = Decoder( n_steps, rnn_hidden_size * 2, n_features, decoder_fcn_output_dims, device ) # fully connected network is included in Decoder - self.kmeans = KMeans(n_clusters=n_clusters) # TODO: implement KMean with torch for gpu acceleration + self.kmeans = KMeans( + n_clusters=n_clusters + ) # TODO: implement KMean with torch for gpu acceleration self.n_clusters = n_clusters self.lambda_kmeans = lambda_kmeans self.device = device - def cluster(self, inputs, training_object='generator'): + def cluster(self, inputs, training_object="generator"): # concat final states from generator and input it as the initial state of decoder imputation, imputed_X, generator_fb_hidden_states = self.generator(inputs) - inputs['imputation'] = imputation - inputs['imputed_X'] = imputed_X - inputs['generator_fb_hidden_states'] = generator_fb_hidden_states - if training_object == 'discriminator': + inputs["imputation"] = imputation + inputs["imputed_X"] = imputed_X + inputs["generator_fb_hidden_states"] = generator_fb_hidden_states + if training_object == "discriminator": discrimination = self.discriminator(inputs) - inputs['discrimination'] = discrimination + inputs["discrimination"] = discrimination return inputs # if only train discriminator, then no need to run decoder reconstruction, fcn_latent = self.decoder(inputs) - inputs['reconstruction'] = reconstruction - inputs['fcn_latent'] = fcn_latent + inputs["reconstruction"] = reconstruction + inputs["fcn_latent"] = fcn_latent return inputs - def forward(self, inputs, training_object='generator'): - assert training_object in ['generator', 'discriminator'], \ - 'training_object should be "generator" or "discriminator"' + def forward(self, inputs, training_object="generator"): + assert training_object in [ + "generator", + "discriminator", + ], 'training_object should be "generator" or "discriminator"' - X = inputs['X'] - missing_mask = inputs['missing_mask'] + X = inputs["X"] + missing_mask = inputs["missing_mask"] batch_size, n_steps, n_features = X.shape losses = {} inputs = self.cluster(inputs, training_object) - if training_object == 'discriminator': - l_D = F.binary_cross_entropy_with_logits(inputs['discrimination'], missing_mask) - losses['l_disc'] = l_D + if training_object == "discriminator": + l_D = F.binary_cross_entropy_with_logits( + inputs["discrimination"], missing_mask + ) + losses["l_disc"] = l_D else: - inputs['discrimination'] = inputs['discrimination'].detach() - l_G = F.binary_cross_entropy_with_logits(inputs['discrimination'], 1 - missing_mask, - weight=1 - missing_mask) - l_pre = cal_mse(inputs['imputation'], X, missing_mask) - l_rec = cal_mse(inputs['reconstruction'], X, missing_mask) - HTH = torch.matmul(inputs['fcn_latent'], inputs['fcn_latent'].permute(1, 0)) + inputs["discrimination"] = inputs["discrimination"].detach() + l_G = F.binary_cross_entropy_with_logits( + inputs["discrimination"], 1 - missing_mask, weight=1 - missing_mask + ) + l_pre = cal_mse(inputs["imputation"], X, missing_mask) + l_rec = cal_mse(inputs["reconstruction"], X, missing_mask) + HTH = torch.matmul(inputs["fcn_latent"], inputs["fcn_latent"].permute(1, 0)) term_F = torch.nn.init.orthogonal_( - torch.randn(batch_size, self.n_clusters, device=self.device), - gain=1 + torch.randn(batch_size, self.n_clusters, device=self.device), gain=1 ) FTHTHF = torch.matmul(torch.matmul(term_F.permute(1, 0), HTH), term_F) l_kmeans = torch.trace(HTH) - torch.trace(FTHTHF) # k-means loss loss_gene = l_G + l_pre + l_rec + l_kmeans * self.lambda_kmeans - losses['l_gene'] = loss_gene + losses["l_gene"] = loss_gene return losses class CRLI(BaseNNClusterer): - def __init__(self, - n_steps, - n_features, - n_clusters, - n_generator_layers, - rnn_hidden_size, - decoder_fcn_output_dims=None, - lambda_kmeans=1, - rnn_cell_type='GRU', - G_steps=1, - D_steps=1, - learning_rate=1e-3, - epochs=100, - patience=10, - batch_size=32, - weight_decay=1e-5, - device=None): - super().__init__(n_clusters, learning_rate, epochs, patience, batch_size, weight_decay, device) - assert G_steps > 0 and D_steps > 0, 'G_steps and D_steps should both >0' + def __init__( + self, + n_steps, + n_features, + n_clusters, + n_generator_layers, + rnn_hidden_size, + decoder_fcn_output_dims=None, + lambda_kmeans=1, + rnn_cell_type="GRU", + G_steps=1, + D_steps=1, + learning_rate=1e-3, + epochs=100, + patience=10, + batch_size=32, + weight_decay=1e-5, + device=None, + ): + super().__init__( + n_clusters, + learning_rate, + epochs, + patience, + batch_size, + weight_decay, + device, + ) + assert G_steps > 0 and D_steps > 0, "G_steps and D_steps should both >0" self.n_steps = n_steps self.n_features = n_features self.G_steps = G_steps self.D_steps = D_steps - self.model = _CRLI(n_steps, n_features, n_clusters, n_generator_layers, rnn_hidden_size, - decoder_fcn_output_dims, lambda_kmeans, rnn_cell_type, device) + self.model = _CRLI( + n_steps, + n_features, + n_clusters, + n_generator_layers, + rnn_hidden_size, + decoder_fcn_output_dims, + lambda_kmeans, + rnn_cell_type, + device, + ) self.model = self.model.to(self.device) self._print_model_size() - self.logger = { - 'training_loss_generator': [], - 'training_loss_discriminator': [] - } + self.logger = {"training_loss_generator": [], "training_loss_discriminator": []} def fit(self, train_X): train_X = self.check_input(self.n_steps, self.n_features, train_X) training_set = DatasetForGRUD(train_X) - training_loader = DataLoader(training_set, batch_size=self.batch_size, shuffle=True) + training_loader = DataLoader( + training_set, batch_size=self.batch_size, shuffle=True + ) self._train_model(training_loader) self.model.load_state_dict(self.best_model_dict) self.model.eval() # set the model as eval status to freeze it. return self def assemble_input_data(self, data): - """ Assemble the input data into a dictionary. + """Assemble the input data into a dictionary. Parameters ---------- @@ -317,24 +379,28 @@ def assemble_input_data(self, data): indices, X, _, missing_mask, _, _ = data inputs = { - 'X': X, - 'missing_mask': missing_mask, + "X": X, + "missing_mask": missing_mask, } return inputs def _train_model(self, training_loader, val_loader=None): self.G_optimizer = torch.optim.Adam( [ - {'params': self.model.generator.parameters()}, - {'params': self.model.decoder.parameters()} + {"params": self.model.generator.parameters()}, + {"params": self.model.decoder.parameters()}, ], - lr=self.lr, weight_decay=self.weight_decay + lr=self.lr, + weight_decay=self.weight_decay, + ) + self.D_optimizer = torch.optim.Adam( + self.model.discriminator.parameters(), + lr=self.lr, + weight_decay=self.weight_decay, ) - self.D_optimizer = torch.optim.Adam(self.model.discriminator.parameters(), lr=self.lr, - weight_decay=self.weight_decay) # each training starts from the very beginning, so reset the loss and model dict here - self.best_loss = float('inf') + self.best_loss = float("inf") self.best_model_dict = None try: @@ -347,25 +413,35 @@ def _train_model(self, training_loader, val_loader=None): for _ in range(self.D_steps): self.D_optimizer.zero_grad() - results = self.model.forward(inputs, training_object='discriminator') - results['l_disc'].backward(retain_graph=True) + results = self.model.forward( + inputs, training_object="discriminator" + ) + results["l_disc"].backward(retain_graph=True) self.D_optimizer.step() - epoch_train_loss_D_collector.append(results['l_disc'].item()) + epoch_train_loss_D_collector.append(results["l_disc"].item()) for _ in range(self.G_steps): self.G_optimizer.zero_grad() - results = self.model.forward(inputs, training_object='generator') - results['l_gene'].backward() + results = self.model.forward( + inputs, training_object="generator" + ) + results["l_gene"].backward() self.G_optimizer.step() - epoch_train_loss_G_collector.append(results['l_gene'].item()) - - mean_train_G_loss = np.mean(epoch_train_loss_G_collector) # mean training loss of the current epoch - mean_train_D_loss = np.mean(epoch_train_loss_D_collector) # mean training loss of the current epoch - self.logger['training_loss_generator'].append(mean_train_G_loss) - self.logger['training_loss_discriminator'].append(mean_train_D_loss) - print(f'epoch {epoch}: ' - f'training loss_generator {mean_train_G_loss:.4f}, ' - f'train loss_discriminator {mean_train_D_loss:.4f}') + epoch_train_loss_G_collector.append(results["l_gene"].item()) + + mean_train_G_loss = np.mean( + epoch_train_loss_G_collector + ) # mean training loss of the current epoch + mean_train_D_loss = np.mean( + epoch_train_loss_D_collector + ) # mean training loss of the current epoch + self.logger["training_loss_generator"].append(mean_train_G_loss) + self.logger["training_loss_discriminator"].append(mean_train_D_loss) + print( + f"epoch {epoch}: " + f"training loss_generator {mean_train_G_loss:.4f}, " + f"train loss_discriminator {mean_train_D_loss:.4f}" + ) mean_loss = mean_train_G_loss if mean_loss < self.best_loss: @@ -375,21 +451,27 @@ def _train_model(self, training_loader, val_loader=None): else: self.patience -= 1 if self.patience == 0: - print('Exceeded the training patience. Terminating the training procedure...') + print( + "Exceeded the training patience. Terminating the training procedure..." + ) break except Exception as e: - print(f'Exception: {e}') + print(f"Exception: {e}") if self.best_model_dict is None: - raise RuntimeError('Training got interrupted. Model was not get trained. Please try fit() again.') + raise RuntimeError( + "Training got interrupted. Model was not get trained. Please try fit() again." + ) else: - RuntimeWarning('Training got interrupted. ' - 'Model will load the best parameters so far for testing. ' - "If you don't want it, please try fit() again.") + RuntimeWarning( + "Training got interrupted. " + "Model will load the best parameters so far for testing. " + "If you don't want it, please try fit() again." + ) - if np.equal(self.best_loss, float('inf')): - raise ValueError('Something is wrong. best_loss is Nan after training.') + if np.equal(self.best_loss, float("inf")): + raise ValueError("Something is wrong. best_loss is Nan after training.") - print('Finished training.') + print("Finished training.") def cluster(self, X): X = self.check_input(self.n_steps, self.n_features, X) @@ -402,7 +484,7 @@ def cluster(self, X): for idx, data in enumerate(test_loader): inputs = self.assemble_input_data(data) inputs = self.model.cluster(inputs) - latent_collector.append(inputs['fcn_latent']) + latent_collector.append(inputs["fcn_latent"]) latent_collector = torch.cat(latent_collector).cpu().detach().numpy() clustering = self.model.kmeans.fit_predict(latent_collector) diff --git a/pypots/clustering/vader.py b/pypots/clustering/vader.py index 1114c964..b0a1dff8 100644 --- a/pypots/clustering/vader.py +++ b/pypots/clustering/vader.py @@ -49,13 +49,15 @@ def __init__(self, input_size, hidden_size, bias=True): if bias: self.bias_ch = Parameter(torch.Tensor(3 * hidden_size)) else: - self.register_parameter('bias_ch', None) - self.register_buffer('wc_blank', torch.zeros(hidden_size)) + self.register_parameter("bias_ch", None) + self.register_buffer("wc_blank", torch.zeros(hidden_size)) self.reset_parameters() def forward(self, input, hx=None): if hx is None: - zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device) + zeros = torch.zeros( + input.size(0), self.hidden_size, dtype=input.dtype, device=input.device + ) hx = (zeros, zeros) h, c = hx @@ -64,21 +66,23 @@ def forward(self, input, hx=None): wh = F.linear(h, self.weight_hh, self.bias_hh) wc = F.linear(c, self.weight_ch, self.bias_ch) - wxhc = wx + \ - wh + \ - torch.cat( - ( - wc[:, :2 * self.hidden_size], - Variable(self.wc_blank).expand_as(h), - wc[:, 2 * self.hidden_size:] - ), - dim=1 - ) - - i = torch.sigmoid(wxhc[:, :self.hidden_size]) - f = torch.sigmoid(wxhc[:, self.hidden_size:2 * self.hidden_size]) - g = torch.tanh(wxhc[:, 2 * self.hidden_size:3 * self.hidden_size]) - o = torch.sigmoid(wxhc[:, 3 * self.hidden_size:]) + wxhc = ( + wx + + wh + + torch.cat( + ( + wc[:, : 2 * self.hidden_size], + Variable(self.wc_blank).expand_as(h), + wc[:, 2 * self.hidden_size :], + ), + dim=1, + ) + ) + + i = torch.sigmoid(wxhc[:, : self.hidden_size]) + f = torch.sigmoid(wxhc[:, self.hidden_size : 2 * self.hidden_size]) + g = torch.tanh(wxhc[:, 2 * self.hidden_size : 3 * self.hidden_size]) + o = torch.sigmoid(wxhc[:, 3 * self.hidden_size :]) c = f * c + i * g h = o * torch.tanh(c) @@ -128,7 +132,16 @@ class _VaDER(nn.Module): """ - def __init__(self, n_steps, d_input, n_clusters, d_rnn_hidden, d_mu_stddev, eps=1e-9, alpha=1.0): + def __init__( + self, + n_steps, + d_input, + n_clusters, + d_rnn_hidden, + d_mu_stddev, + eps=1e-9, + alpha=1.0, + ): super().__init__() self.n_steps = n_steps self.d_input = d_input @@ -143,15 +156,15 @@ def __init__(self, n_steps, d_input, n_clusters, d_rnn_hidden, d_mu_stddev, eps= self.encoder = PeepholeLSTMCell(d_input, d_rnn_hidden) self.decoder = PeepholeLSTMCell(d_input, d_rnn_hidden) self.ae_encode_layers = nn.Sequential( - nn.Linear(d_rnn_hidden, d_rnn_hidden), - nn.Softplus() + nn.Linear(d_rnn_hidden, d_rnn_hidden), nn.Softplus() ) self.ae_decode_layers = nn.Sequential( - nn.Linear(d_mu_stddev, d_rnn_hidden), - nn.Softplus() + nn.Linear(d_mu_stddev, d_rnn_hidden), nn.Softplus() ) self.mu_layer = nn.Linear(d_rnn_hidden, d_mu_stddev) # layer for mean - self.stddev_layer = nn.Linear(d_rnn_hidden, d_mu_stddev) # layer for standard variance + self.stddev_layer = nn.Linear( + d_rnn_hidden, d_mu_stddev + ) # layer for standard variance self.rnn_transform_layer = nn.Linear(d_rnn_hidden, d_input) self.gmm_layer = GMMLayer(d_mu_stddev, n_clusters) @@ -165,8 +178,12 @@ def encode(self, X, missing_mask): X_imputed = self.implicit_imputation_layer(X, missing_mask) - hidden_state = torch.zeros((batch_size, self.d_rnn_hidden), dtype=X.dtype, device=X.device) - cell_state = torch.zeros((batch_size, self.d_rnn_hidden), dtype=X.dtype, device=X.device) + hidden_state = torch.zeros( + (batch_size, self.d_rnn_hidden), dtype=X.dtype, device=X.device + ) + cell_state = torch.zeros( + (batch_size, self.d_rnn_hidden), dtype=X.dtype, device=X.device + ) # cell_state_collector = torch.empty((batch_size, self.n_steps, self.d_rnn_hidden), # dtype=X.dtype, device=X.device) for i in range(self.n_steps): @@ -185,10 +202,13 @@ def decode(self, z): hidden_state = self.ae_decode_layers(hidden_state) cell_state = torch.zeros(hidden_state.size(), dtype=z.dtype, device=z.device) - inputs = torch.zeros((z.size(0), self.n_steps, self.d_input), dtype=z.dtype, device=z.device) + inputs = torch.zeros( + (z.size(0), self.n_steps, self.d_input), dtype=z.dtype, device=z.device + ) - hidden_state_collector = torch.empty((z.size(0), self.n_steps, self.d_rnn_hidden), - dtype=z.dtype, device=z.device) + hidden_state_collector = torch.empty( + (z.size(0), self.n_steps, self.d_rnn_hidden), dtype=z.dtype, device=z.device + ) for i in range(self.n_steps): x = inputs[:, i, :] hidden_state, cell_state = self.decoder(x, (hidden_state, cell_state)) @@ -204,34 +224,59 @@ def get_results(self, X, missing_mask): return X_reconstructed, mu_c, var_c, phi_c, z, mu_tilde, stddev_tilde def cluster(self, inputs): - X, missing_mask = inputs['X'], inputs['missing_mask'] - X_reconstructed, mu_c, var_c, phi_c, z, mu_tilde, stddev_tilde = self.get_results(X, missing_mask) + X, missing_mask = inputs["X"], inputs["missing_mask"] + ( + X_reconstructed, + mu_c, + var_c, + phi_c, + z, + mu_tilde, + stddev_tilde, + ) = self.get_results(X, missing_mask) def func_to_apply(mu_t_, mu_, stddev_, phi_): # the covariance matrix is diagonal, so we can just take the product - return np.log(self.eps + phi_) + \ - np.log(self.eps + multivariate_normal.pdf(mu_t_, mean=mu_, cov=np.diag(stddev_))) + return np.log(self.eps + phi_) + np.log( + self.eps + + multivariate_normal.pdf(mu_t_, mean=mu_, cov=np.diag(stddev_)) + ) mu_tilde = mu_tilde.detach().cpu().numpy() mu = mu_c.detach().cpu().numpy() var = var_c.detach().cpu().numpy() phi = phi_c.detach().cpu().numpy() - p = np.array([func_to_apply(mu_tilde, mu[i], var[i], phi[i]) for i in np.arange(mu.shape[0])]) + p = np.array( + [ + func_to_apply(mu_tilde, mu[i], var[i], phi[i]) + for i in np.arange(mu.shape[0]) + ] + ) clustering_results = np.argmax(p, axis=0) return clustering_results def forward(self, inputs, pretrain=False): - X, missing_mask = inputs['X'], inputs['missing_mask'] - X_reconstructed, mu_c, var_c, phi_c, z, mu_tilde, stddev_tilde = self.get_results(X, missing_mask) + X, missing_mask = inputs["X"], inputs["missing_mask"] + ( + X_reconstructed, + mu_c, + var_c, + phi_c, + z, + mu_tilde, + stddev_tilde, + ) = self.get_results(X, missing_mask) # calculate the reconstruction loss unscaled_reconstruction_loss = cal_mse(X_reconstructed, X, missing_mask) - reconstruction_loss = unscaled_reconstruction_loss * self.n_steps * self.d_input / missing_mask.sum() + reconstruction_loss = ( + unscaled_reconstruction_loss + * self.n_steps + * self.d_input + / missing_mask.sum() + ) if pretrain: - results = { - 'loss': reconstruction_loss, - 'z': z - } + results = {"loss": reconstruction_loss, "z": z} return results # calculate the latent loss @@ -244,7 +289,7 @@ def forward(self, inputs, pretrain=False): ii, jj = torch.meshgrid( torch.arange(self.n_clusters, dtype=torch.int64, device=X.device), - torch.arange(batch_size, dtype=torch.int64, device=X.device) + torch.arange(batch_size, dtype=torch.int64, device=X.device), ) ii = ii.flatten() jj = jj.flatten() @@ -253,7 +298,7 @@ def forward(self, inputs, pretrain=False): mc_b = mu_c.index_select(dim=0, index=ii) sc_b = var_c.index_select(dim=0, index=ii) z_b = z.index_select(dim=0, index=jj) - log_pdf_z = - 0.5 * (lsc_b + log_2pi + torch.square(z_b - mc_b) / sc_b) + log_pdf_z = -0.5 * (lsc_b + log_2pi + torch.square(z_b - mc_b) / sc_b) log_pdf_z = log_pdf_z.reshape([batch_size, self.n_clusters, self.d_mu_stddev]) log_p = log_phi_c + log_pdf_z.sum(dim=2) @@ -264,28 +309,28 @@ def forward(self, inputs, pretrain=False): term1 = torch.log(var_c + self.eps) st_b = var_tilde.index_select(dim=0, index=jj) sc_b = var_c.index_select(dim=0, index=ii) - term2 = torch.reshape(st_b / (sc_b + self.eps), [batch_size, self.n_clusters, self.d_mu_stddev]) + term2 = torch.reshape( + st_b / (sc_b + self.eps), [batch_size, self.n_clusters, self.d_mu_stddev] + ) mt_b = mu_tilde.index_select(dim=0, index=jj) mc_b = mu_c.index_select(dim=0, index=ii) term3 = torch.reshape( torch.square(mt_b - mc_b) / (sc_b + self.eps), - [batch_size, self.n_clusters, self.d_mu_stddev] + [batch_size, self.n_clusters, self.d_mu_stddev], ) - latent_loss1 = 0.5 * torch.sum(gamma_c * torch.sum(term1 + term2 + term3, dim=2), dim=1) - latent_loss2 = - torch.sum(gamma_c * (log_phi_c - log_gamma_c), dim=1) - latent_loss3 = - 0.5 * torch.sum(1 + stddev_tilde, dim=1) + latent_loss1 = 0.5 * torch.sum( + gamma_c * torch.sum(term1 + term2 + term3, dim=2), dim=1 + ) + latent_loss2 = -torch.sum(gamma_c * (log_phi_c - log_gamma_c), dim=1) + latent_loss3 = -0.5 * torch.sum(1 + stddev_tilde, dim=1) latent_loss1 = latent_loss1.mean() latent_loss2 = latent_loss2.mean() latent_loss3 = latent_loss3.mean() latent_loss = latent_loss1 + latent_loss2 + latent_loss3 - results = { - 'loss': reconstruction_loss + self.alpha * latent_loss, - 'z': z - - } + results = {"loss": reconstruction_loss + self.alpha * latent_loss, "z": z} return results @@ -297,38 +342,52 @@ def inverse_softplus(x): class VaDER(BaseNNClusterer): - def __init__(self, - n_steps, - n_features, - n_clusters, - rnn_hidden_size, - d_mu_stddev, - learning_rate=1e-3, - pretrain_epochs=10, - epochs=100, - patience=10, - batch_size=32, - weight_decay=1e-5, - device=None): - super().__init__(n_clusters, learning_rate, epochs, patience, batch_size, weight_decay, device) + def __init__( + self, + n_steps, + n_features, + n_clusters, + rnn_hidden_size, + d_mu_stddev, + learning_rate=1e-3, + pretrain_epochs=10, + epochs=100, + patience=10, + batch_size=32, + weight_decay=1e-5, + device=None, + ): + super().__init__( + n_clusters, + learning_rate, + epochs, + patience, + batch_size, + weight_decay, + device, + ) self.n_steps = n_steps self.n_features = n_features self.pretrain_epochs = pretrain_epochs - self.model = _VaDER(n_steps, n_features, n_clusters, rnn_hidden_size, d_mu_stddev) + self.model = _VaDER( + n_steps, n_features, n_clusters, rnn_hidden_size, d_mu_stddev + ) self.model = self.model.to(self.device) self._print_model_size() def fit(self, train_X): train_X = self.check_input(self.n_steps, self.n_features, train_X) training_set = DatasetForGRUD(train_X) - training_loader = DataLoader(training_set, batch_size=self.batch_size, shuffle=True) + training_loader = DataLoader( + training_set, batch_size=self.batch_size, shuffle=True + ) self._train_model(training_loader) self.model.load_state_dict(self.best_model_dict) self.model.eval() # set the model as eval status to freeze it. return self def assemble_input_data(self, data): - """ Assemble the input data into a dictionary. + """Assemble the input data into a dictionary. Parameters ---------- @@ -344,18 +403,18 @@ def assemble_input_data(self, data): indices, X, _, missing_mask, _, _ = data inputs = { - 'X': X, - 'missing_mask': missing_mask, + "X": X, + "missing_mask": missing_mask, } return inputs def _train_model(self, training_loader, val_loader=None): - self.optimizer = torch.optim.Adam(self.model.parameters(), - lr=self.lr, - weight_decay=self.weight_decay) + self.optimizer = torch.optim.Adam( + self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay + ) # each training starts from the very beginning, so reset the loss and model dict here - self.best_loss = float('inf') + self.best_loss = float("inf") self.best_model_dict = None # pretrain to initialize parameters of GMM layer @@ -365,7 +424,7 @@ def _train_model(self, training_loader, val_loader=None): inputs = self.assemble_input_data(data) self.optimizer.zero_grad() results = self.model.forward(inputs, pretrain=True) - results['loss'].backward() + results["loss"].backward() self.optimizer.step() with torch.no_grad(): sample_collector = [] @@ -373,9 +432,11 @@ def _train_model(self, training_loader, val_loader=None): for idx, data in enumerate(training_loader): inputs = self.assemble_input_data(data) results = self.model.forward(inputs, pretrain=True) - sample_collector.append(results['z']) + sample_collector.append(results["z"]) samples = torch.cat(sample_collector).cpu().detach().numpy() - gmm = GaussianMixture(n_components=self.n_clusters, covariance_type="diag", reg_covar=1e-04) + gmm = GaussianMixture( + n_components=self.n_clusters, covariance_type="diag", reg_covar=1e-04 + ) gmm.fit(samples) # get GMM parameters phi = np.log(gmm.weights_ + 1e-9) # inverse softmax @@ -395,12 +456,14 @@ def _train_model(self, training_loader, val_loader=None): inputs = self.assemble_input_data(data) self.optimizer.zero_grad() results = self.model.forward(inputs) - results['loss'].backward() + results["loss"].backward() self.optimizer.step() - epoch_train_loss_collector.append(results['loss'].item()) + epoch_train_loss_collector.append(results["loss"].item()) - mean_train_loss = np.mean(epoch_train_loss_collector) # mean training loss of the current epoch - self.logger['training_loss'].append(mean_train_loss) + mean_train_loss = np.mean( + epoch_train_loss_collector + ) # mean training loss of the current epoch + self.logger["training_loss"].append(mean_train_loss) if val_loader is not None: self.model.eval() @@ -409,14 +472,16 @@ def _train_model(self, training_loader, val_loader=None): for idx, data in enumerate(val_loader): inputs = self.assemble_input_data(data) results = self.model.forward(inputs) - epoch_val_loss_collector.append(results['loss'].item()) + epoch_val_loss_collector.append(results["loss"].item()) mean_val_loss = np.mean(epoch_val_loss_collector) - self.logger['validating_loss'].append(mean_val_loss) - print(f'epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}') + self.logger["validating_loss"].append(mean_val_loss) + print( + f"epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}" + ) mean_loss = mean_val_loss else: - print(f'epoch {epoch}: training loss {mean_train_loss:.4f}') + print(f"epoch {epoch}: training loss {mean_train_loss:.4f}") mean_loss = mean_train_loss if mean_loss < self.best_loss: @@ -426,21 +491,27 @@ def _train_model(self, training_loader, val_loader=None): else: self.patience -= 1 if self.patience == 0: - print('Exceeded the training patience. Terminating the training procedure...') + print( + "Exceeded the training patience. Terminating the training procedure..." + ) break except Exception as e: - print(f'Exception: {e}') + print(f"Exception: {e}") if self.best_model_dict is None: - raise RuntimeError('Training got interrupted. Model was not get trained. Please try fit() again.') + raise RuntimeError( + "Training got interrupted. Model was not get trained. Please try fit() again." + ) else: - RuntimeWarning('Training got interrupted. ' - 'Model will load the best parameters so far for testing. ' - "If you don't want it, please try fit() again.") + RuntimeWarning( + "Training got interrupted. " + "Model will load the best parameters so far for testing. " + "If you don't want it, please try fit() again." + ) - if np.equal(self.best_loss, float('inf')): - raise ValueError('Something is wrong. best_loss is Nan after training.') + if np.equal(self.best_loss, float("inf")): + raise ValueError("Something is wrong. best_loss is Nan after training.") - print('Finished training.') + print("Finished training.") def cluster(self, X): X = self.check_input(self.n_steps, self.n_features, X) diff --git a/pypots/data/__init__.py b/pypots/data/__init__.py index a847af8f..3e3cefc2 100644 --- a/pypots/data/__init__.py +++ b/pypots/data/__init__.py @@ -9,7 +9,10 @@ from pypots.data.dataset_for_brits import DatasetForBRITS from pypots.data.dataset_for_grud import DatasetForGRUD from pypots.data.dataset_for_mit import DatasetForMIT -from pypots.data.generating import generate_random_walk, generate_random_walk_for_classification +from pypots.data.generating import ( + generate_random_walk, + generate_random_walk_for_classification, +) from pypots.data.integration import ( masked_fill, mcar, diff --git a/pypots/data/base.py b/pypots/data/base.py index cc7a5988..827b5d93 100644 --- a/pypots/data/base.py +++ b/pypots/data/base.py @@ -10,7 +10,7 @@ class BaseDataset(Dataset): - """ Base dataset class in PyPOTS. + """Base dataset class in PyPOTS. Parameters ---------- @@ -34,7 +34,7 @@ def __len__(self): return len(self.X) def __getitem__(self, idx): - """ Fetch data according to index. + """Fetch data according to index. Parameters ---------- @@ -52,8 +52,6 @@ def __getitem__(self, idx): ] if self.y is not None: - sample.append( - self.y[idx].to(torch.long) - ) + sample.append(self.y[idx].to(torch.long)) return sample diff --git a/pypots/data/dataset_for_brits.py b/pypots/data/dataset_for_brits.py index 3b505f6c..0f3ee6a7 100644 --- a/pypots/data/dataset_for_brits.py +++ b/pypots/data/dataset_for_brits.py @@ -11,7 +11,7 @@ def parse_delta(missing_mask): - """ Generate time-gap (delta) matrix from missing masks. + """Generate time-gap (delta) matrix from missing masks. Parameters ---------- @@ -34,7 +34,10 @@ def parse_delta(missing_mask): if step == 0: delta.append(torch.zeros(1, n_features, device=device)) else: - delta.append(torch.ones(1, n_features, device=device) + (1 - m_mask[step]) * delta[-1]) + delta.append( + torch.ones(1, n_features, device=device) + + (1 - m_mask[step]) * delta[-1] + ) delta = torch.concat(delta, dim=0) delta_collector.append(delta.unsqueeze(0)) delta = torch.concat(delta_collector, dim=0) @@ -42,7 +45,7 @@ def parse_delta(missing_mask): class DatasetForBRITS(BaseDataset): - """ Dataset class for BRITS. + """Dataset class for BRITS. Parameters ---------- @@ -66,20 +69,20 @@ def __init__(self, X, y=None): backward_delta = parse_delta(backward_missing_mask) self.data = { - 'forward': { - 'X': forward_X, - 'missing_mask': forward_missing_mask, - 'delta': forward_delta + "forward": { + "X": forward_X, + "missing_mask": forward_missing_mask, + "delta": forward_delta, }, - 'backward': { - 'X': backward_X, - 'missing_mask': backward_missing_mask, - 'delta': backward_delta + "backward": { + "X": backward_X, + "missing_mask": backward_missing_mask, + "delta": backward_delta, }, } def __getitem__(self, idx): - """ Fetch data according to index. + """Fetch data according to index. Parameters ---------- @@ -109,18 +112,16 @@ def __getitem__(self, idx): sample = [ torch.tensor(idx), # for forward - self.data['forward']['X'][idx].to(torch.float32), - self.data['forward']['missing_mask'][idx].to(torch.float32), - self.data['forward']['delta'][idx].to(torch.float32), + self.data["forward"]["X"][idx].to(torch.float32), + self.data["forward"]["missing_mask"][idx].to(torch.float32), + self.data["forward"]["delta"][idx].to(torch.float32), # for backward - self.data['backward']['X'][idx].to(torch.float32), - self.data['backward']['missing_mask'][idx].to(torch.float32), - self.data['backward']['delta'][idx].to(torch.float32), + self.data["backward"]["X"][idx].to(torch.float32), + self.data["backward"]["missing_mask"][idx].to(torch.float32), + self.data["backward"]["delta"][idx].to(torch.float32), ] if self.y is not None: - sample.append( - self.y[idx].to(torch.long) - ) + sample.append(self.y[idx].to(torch.long)) return sample diff --git a/pypots/data/dataset_for_grud.py b/pypots/data/dataset_for_grud.py index 0e504906..f3dd1d80 100644 --- a/pypots/data/dataset_for_grud.py +++ b/pypots/data/dataset_for_grud.py @@ -14,7 +14,7 @@ class DatasetForGRUD(BaseDataset): - """ Dataset class for model GRUD. + """Dataset class for model GRUD. Parameters ---------- @@ -33,11 +33,12 @@ def __init__(self, X, y=None): self.X = torch.nan_to_num(X) self.deltas = parse_delta(self.missing_mask) self.X_filledLOCF = self.locf.locf_torch(X) - self.empirical_mean = \ - torch.sum(self.missing_mask * self.X, dim=[0, 1]) / torch.sum(self.missing_mask, dim=[0, 1]) + self.empirical_mean = torch.sum( + self.missing_mask * self.X, dim=[0, 1] + ) / torch.sum(self.missing_mask, dim=[0, 1]) def __getitem__(self, idx): - """ Fetch data according to index. + """Fetch data according to index. Parameters ---------- @@ -77,8 +78,6 @@ def __getitem__(self, idx): ] if self.y is not None: - sample.append( - self.y[idx].to(torch.long) - ) + sample.append(self.y[idx].to(torch.long)) return sample diff --git a/pypots/data/dataset_for_mit.py b/pypots/data/dataset_for_mit.py index 07676ca1..b24e3f75 100644 --- a/pypots/data/dataset_for_mit.py +++ b/pypots/data/dataset_for_mit.py @@ -12,7 +12,7 @@ class DatasetForMIT(BaseDataset): - """ Dataset for models that need MIT (masked imputation task) in their training, such as SAITS. + """Dataset for models that need MIT (masked imputation task) in their training, such as SAITS. For more information about MIT, please refer to :cite:`du2022SAITS`. @@ -41,7 +41,7 @@ def __init__(self, X, y=None, rate=0.2): self.rate = rate def __getitem__(self, idx): - """ Fetch data according to index. + """Fetch data according to index. Parameters ---------- @@ -80,8 +80,6 @@ def __getitem__(self, idx): ] if self.y is not None: - sample.append( - self.y[idx].to(torch.long) - ) + sample.append(self.y[idx].to(torch.long)) return sample diff --git a/pypots/data/generating.py b/pypots/data/generating.py index f67e1129..664bec00 100644 --- a/pypots/data/generating.py +++ b/pypots/data/generating.py @@ -11,8 +11,10 @@ from sklearn.utils import check_random_state -def generate_random_walk(n_samples=1000, n_steps=24, n_features=10, mu=0., std=1., random_state=None): - """ Generate random walk time-series data. +def generate_random_walk( + n_samples=1000, n_steps=24, n_features=10, mu=0.0, std=1.0, random_state=None +): + """Generate random walk time-series data. Parameters ---------- @@ -44,9 +46,15 @@ def generate_random_walk(n_samples=1000, n_steps=24, n_features=10, mu=0., std=1 return ts_samples -def generate_random_walk_for_classification(n_classes=2, n_samples_each_class=500, n_steps=24, n_features=10, - shuffle=True, random_state=None): - """ Generate random walk time-series data for the classification task. +def generate_random_walk_for_classification( + n_classes=2, + n_samples_each_class=500, + n_steps=24, + n_features=10, + shuffle=True, + random_state=None, +): + """Generate random walk time-series data for the classification task. Parameters ---------- @@ -82,7 +90,9 @@ def generate_random_walk_for_classification(n_classes=2, n_samples_each_class=50 std = 1 for c_ in range(n_classes): - ts_samples = generate_random_walk(n_samples_each_class, n_steps, n_features, mu, std, random_state) + ts_samples = generate_random_walk( + n_samples_each_class, n_steps, n_features, mu, std, random_state + ) label_samples = np.asarray([1 for _ in range(n_samples_each_class)]) * c_ ts_collector.extend(ts_samples) label_collector.extend(label_samples) @@ -101,10 +111,18 @@ def generate_random_walk_for_classification(n_classes=2, n_samples_each_class=50 return X, y -def generate_random_walk_for_anomaly_detection(n_samples=1000, n_steps=24, n_features=10, mu=0., std=1., - anomaly_proportion=0.1, anomaly_fraction=0.02, anomaly_scale_factor=2.0, - random_state=None): - """ Generate random walk time-series data for the anomaly-detection task. +def generate_random_walk_for_anomaly_detection( + n_samples=1000, + n_steps=24, + n_features=10, + mu=0.0, + std=1.0, + anomaly_proportion=0.1, + anomaly_fraction=0.02, + anomaly_scale_factor=2.0, + random_state=None, +): + """Generate random walk time-series data for the anomaly-detection task. Parameters ---------- @@ -134,8 +152,12 @@ def generate_random_walk_for_anomaly_detection(n_samples=1000, n_steps=24, n_fea y : array, shape of [n_classes*n_samples_each_class] Labels indicating if time-series samples are anomalies. """ - assert 0 < anomaly_proportion < 1, f'anomaly_proportion should be >0 and <1, but got {anomaly_proportion}' - assert 0 < anomaly_fraction < 1, f'anomaly_fraction should be >0 and <1, but got {anomaly_fraction}' + assert ( + 0 < anomaly_proportion < 1 + ), f"anomaly_proportion should be >0 and <1, but got {anomaly_proportion}" + assert ( + 0 < anomaly_fraction < 1 + ), f"anomaly_fraction should be >0 and <1, but got {anomaly_fraction}" seed = check_random_state(random_state) X = seed.randn(n_samples, n_steps, n_features) * std + mu n_anomaly = math.floor(n_samples * anomaly_proportion) @@ -148,10 +170,14 @@ def generate_random_walk_for_anomaly_detection(n_samples=1000, n_steps=24, n_fea max_difference = min_val - max_val n_points = n_steps * n_features n_anomaly_points = int(n_points * anomaly_fraction) - point_indices = np.random.choice(a=n_points, size=n_anomaly_points, replace=False) + point_indices = np.random.choice( + a=n_points, size=n_anomaly_points, replace=False + ) for p_i in point_indices: - anomaly_sample[p_i] = mu + np.random.uniform(low=min_val - anomaly_scale_factor * max_difference, - high=max_val + anomaly_scale_factor * max_difference) + anomaly_sample[p_i] = mu + np.random.uniform( + low=min_val - anomaly_scale_factor * max_difference, + high=max_val + anomaly_scale_factor * max_difference, + ) X[a_i] = anomaly_sample.reshape(n_steps, n_features) # create labels diff --git a/pypots/data/load_specific_datasets.py b/pypots/data/load_specific_datasets.py index 98adcb82..7c5352e3 100644 --- a/pypots/data/load_specific_datasets.py +++ b/pypots/data/load_specific_datasets.py @@ -9,7 +9,7 @@ import tsdb SUPPORTED_DATASETS = [ - 'physionet_2012', + "physionet_2012", ] @@ -38,33 +38,28 @@ def preprocess_physionet2012(data): A dict containing processed data. """ - X = data['X'].drop(data['static_features'], axis=1) + X = data["X"].drop(data["static_features"], axis=1) def apply_func(df_temp): # pad and truncate to set the max length of samples as 48 - missing = list(set(range(0, 48)).difference(set(df_temp['Time']))) - missing_part = pd.DataFrame({'Time': missing}) + missing = list(set(range(0, 48)).difference(set(df_temp["Time"]))) + missing_part = pd.DataFrame({"Time": missing}) df_temp = df_temp.append(missing_part, ignore_index=False, sort=False) # pad - df_temp = df_temp.set_index('Time').sort_index().reset_index() + df_temp = df_temp.set_index("Time").sort_index().reset_index() df_temp = df_temp.iloc[:48] # truncate return df_temp - X = X.groupby('RecordID').apply(apply_func) - X = X.drop('RecordID', axis=1) # + X = X.groupby("RecordID").apply(apply_func) + X = X.drop("RecordID", axis=1) # X = X.reset_index() - X = X.drop(['level_1', 'Time'], axis=1) - return { - 'X': X, - 'y': data['y'] - } + X = X.drop(["level_1", "Time"], axis=1) + return {"X": X, "y": data["y"]} -PREPROCESSING = { - 'physionet_2012': preprocess_physionet2012 -} +PREPROCESSING = {"physionet_2012": preprocess_physionet2012} def load_specific_dataset(dataset_name, use_cache=True): - """ Load specific datasets supported by PyPOTS. + """Load specific datasets supported by PyPOTS. Different from tsdb.load_dataset(), which only produces merely raw data, load_specific_dataset here does some preprocessing operations, like truncating time series to generate samples with the same length. @@ -85,12 +80,16 @@ def load_specific_dataset(dataset_name, use_cache=True): e.g. standardizing and splitting. """ - print(f'Loading the dataset {dataset_name} with TSDB (https://github.com/WenjieDu/Time_Series_Database)...') - assert dataset_name in SUPPORTED_DATASETS, f'Dataset {dataset_name} is not supported. ' \ - f'If you believe this dataset is valuable to be supported by PyPOTS,' \ - f'please create an issue on GitHub ' \ - f'https://github.com/WenjieDu/PyPOTS/issues' - print(f'Starting preprocessing {dataset_name}...') + print( + f"Loading the dataset {dataset_name} with TSDB (https://github.com/WenjieDu/Time_Series_Database)..." + ) + assert dataset_name in SUPPORTED_DATASETS, ( + f"Dataset {dataset_name} is not supported. " + f"If you believe this dataset is valuable to be supported by PyPOTS," + f"please create an issue on GitHub " + f"https://github.com/WenjieDu/PyPOTS/issues" + ) + print(f"Starting preprocessing {dataset_name}...") data = tsdb.load_dataset(dataset_name, use_cache) data = PREPROCESSING[dataset_name](data) return data diff --git a/pypots/forecasting/__init__.py b/pypots/forecasting/__init__.py index b88497a2..c28fe0fd 100644 --- a/pypots/forecasting/__init__.py +++ b/pypots/forecasting/__init__.py @@ -7,7 +7,4 @@ from pypots.forecasting.bttf import BTTF -__all__ = [ - 'BTTF' - -] +__all__ = ["BTTF"] diff --git a/pypots/forecasting/base.py b/pypots/forecasting/base.py index 2942f86c..ce930fed 100644 --- a/pypots/forecasting/base.py +++ b/pypots/forecasting/base.py @@ -15,15 +15,14 @@ class BaseForecaster(BaseModel): - """ Abstract class for all forecasting models. - """ + """Abstract class for all forecasting models.""" def __init__(self, device): super().__init__(device) @abstractmethod def fit(self, train_X): - """ Train the cluster. + """Train the cluster. Parameters ---------- @@ -39,7 +38,7 @@ def fit(self, train_X): @abstractmethod def forecast(self, X): - """ Forecast the future the input with the trained model. + """Forecast the future the input with the trained model. Parameters ---------- @@ -55,20 +54,24 @@ def forecast(self, X): class BaseNNForecaster(BaseNNModel, BaseForecaster): - def __init__(self, learning_rate, epochs, patience, batch_size, weight_decay, device): - super().__init__(learning_rate, epochs, patience, batch_size, weight_decay, device) + def __init__( + self, learning_rate, epochs, patience, batch_size, weight_decay, device + ): + super().__init__( + learning_rate, epochs, patience, batch_size, weight_decay, device + ) @abstractmethod def assemble_input_data(self, data): pass def _train_model(self, training_loader, val_loader=None): - self.optimizer = torch.optim.Adam(self.model.parameters(), - lr=self.lr, - weight_decay=self.weight_decay) + self.optimizer = torch.optim.Adam( + self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay + ) # each training starts from the very beginning, so reset the loss and model dict here - self.best_loss = float('inf') + self.best_loss = float("inf") self.best_model_dict = None try: @@ -79,12 +82,14 @@ def _train_model(self, training_loader, val_loader=None): inputs = self.assemble_input_data(data) self.optimizer.zero_grad() results = self.model.forward(inputs) - results['loss'].backward() + results["loss"].backward() self.optimizer.step() - epoch_train_loss_collector.append(results['loss'].item()) + epoch_train_loss_collector.append(results["loss"].item()) - mean_train_loss = np.mean(epoch_train_loss_collector) # mean training loss of the current epoch - self.logger['training_loss'].append(mean_train_loss) + mean_train_loss = np.mean( + epoch_train_loss_collector + ) # mean training loss of the current epoch + self.logger["training_loss"].append(mean_train_loss) if val_loader is not None: self.model.eval() @@ -93,14 +98,16 @@ def _train_model(self, training_loader, val_loader=None): for idx, data in enumerate(val_loader): inputs = self.assemble_input_data(data) results = self.model.forward(inputs) - epoch_val_loss_collector.append(results['loss'].item()) + epoch_val_loss_collector.append(results["loss"].item()) mean_val_loss = np.mean(epoch_val_loss_collector) - self.logger['validating_loss'].append(mean_val_loss) - print(f'epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}') + self.logger["validating_loss"].append(mean_val_loss) + print( + f"epoch {epoch}: training loss {mean_train_loss:.4f}, validating loss {mean_val_loss:.4f}" + ) mean_loss = mean_val_loss else: - print(f'epoch {epoch}: training loss {mean_train_loss:.4f}') + print(f"epoch {epoch}: training loss {mean_train_loss:.4f}") mean_loss = mean_train_loss if mean_loss < self.best_loss: @@ -110,18 +117,24 @@ def _train_model(self, training_loader, val_loader=None): else: self.patience -= 1 if self.patience == 0: - print('Exceeded the training patience. Terminating the training procedure...') + print( + "Exceeded the training patience. Terminating the training procedure..." + ) break except Exception as e: - print(f'Exception: {e}') + print(f"Exception: {e}") if self.best_model_dict is None: - raise RuntimeError('Training got interrupted. Model was not get trained. Please try fit() again.') + raise RuntimeError( + "Training got interrupted. Model was not get trained. Please try fit() again." + ) else: - RuntimeWarning('Training got interrupted. ' - 'Model will load the best parameters so far for testing. ' - "If you don't want it, please try fit() again.") + RuntimeWarning( + "Training got interrupted. " + "Model will load the best parameters so far for testing. " + "If you don't want it, please try fit() again." + ) - if np.equal(self.best_loss, float('inf')): - raise ValueError('Something is wrong. best_loss is Nan after training.') + if np.equal(self.best_loss, float("inf")): + raise ValueError("Something is wrong. best_loss is Nan after training.") - print('Finished training.') + print("Finished training.") diff --git a/pypots/forecasting/bttf.py b/pypots/forecasting/bttf.py index 9967300e..e119783c 100644 --- a/pypots/forecasting/bttf.py +++ b/pypots/forecasting/bttf.py @@ -26,8 +26,16 @@ def mvnrnd_pre(mu, Lambda): src = normrnd(size=(mu.shape[0],)) - return solve_ut(cholesky_upper(Lambda, overwrite_a=True, check_finite=False), - src, lower=False, check_finite=False, overwrite_b=True) + mu + return ( + solve_ut( + cholesky_upper(Lambda, overwrite_a=True, check_finite=False), + src, + lower=False, + check_finite=False, + overwrite_b=True, + ) + + mu + ) def cov_mat(mat, mat_bar): @@ -36,25 +44,31 @@ def cov_mat(mat, mat_bar): def ten2mat(tensor, mode): - return np.reshape(np.moveaxis(tensor, mode, 0), (tensor.shape[mode], -1), order='F') + return np.reshape(np.moveaxis(tensor, mode, 0), (tensor.shape[mode], -1), order="F") def sample_factor_u(tau_sparse_tensor, tau_ind, U, V, X, beta0=1): - """Sampling M-by-R factor matrix U and its hyper-parameters (mu_u, Lambda_u). - """ + """Sampling M-by-R factor matrix U and its hyper-parameters (mu_u, Lambda_u).""" dim1, rank = U.shape U_bar = np.mean(U, axis=0) temp = dim1 / (dim1 + beta0) var_mu_hyper = temp * U_bar - var_U_hyper = inv(np.eye(rank) + cov_mat(U, U_bar) + temp * beta0 * np.outer(U_bar, U_bar)) + var_U_hyper = inv( + np.eye(rank) + cov_mat(U, U_bar) + temp * beta0 * np.outer(U_bar, U_bar) + ) var_Lambda_hyper = wishart.rvs(df=dim1 + rank, scale=var_U_hyper) var_mu_hyper = mvnrnd_pre(var_mu_hyper, (dim1 + beta0) * var_Lambda_hyper) var1 = kr_prod(X, V).T var2 = kr_prod(var1, var1) - var3 = (var2 @ ten2mat(tau_ind, 0).T).reshape([rank, rank, dim1]) + var_Lambda_hyper[:, :, None] - var4 = var1 @ ten2mat(tau_sparse_tensor, 0).T + (var_Lambda_hyper @ var_mu_hyper)[:, None] + var3 = (var2 @ ten2mat(tau_ind, 0).T).reshape( + [rank, rank, dim1] + ) + var_Lambda_hyper[:, :, None] + var4 = ( + var1 @ ten2mat(tau_sparse_tensor, 0).T + + (var_Lambda_hyper @ var_mu_hyper)[:, None] + ) for i in range(dim1): U[i, :] = mvnrnd_pre(solve(var3[:, :, i], var4[:, i]), var3[:, :, i]) @@ -62,21 +76,27 @@ def sample_factor_u(tau_sparse_tensor, tau_ind, U, V, X, beta0=1): def sample_factor_v(tau_sparse_tensor, tau_ind, U, V, X, beta0=1): - """Sampling N-by-R factor matrix V and its hyper-parameters (mu_v, Lambda_v). - """ + """Sampling N-by-R factor matrix V and its hyper-parameters (mu_v, Lambda_v).""" dim2, rank = V.shape V_bar = np.mean(V, axis=0) temp = dim2 / (dim2 + beta0) var_mu_hyper = temp * V_bar - var_V_hyper = inv(np.eye(rank) + cov_mat(V, V_bar) + temp * beta0 * np.outer(V_bar, V_bar)) + var_V_hyper = inv( + np.eye(rank) + cov_mat(V, V_bar) + temp * beta0 * np.outer(V_bar, V_bar) + ) var_Lambda_hyper = wishart.rvs(df=dim2 + rank, scale=var_V_hyper) var_mu_hyper = mvnrnd_pre(var_mu_hyper, (dim2 + beta0) * var_Lambda_hyper) var1 = kr_prod(X, U).T var2 = kr_prod(var1, var1) - var3 = (var2 @ ten2mat(tau_ind, 1).T).reshape([rank, rank, dim2]) + var_Lambda_hyper[:, :, None] - var4 = var1 @ ten2mat(tau_sparse_tensor, 1).T + (var_Lambda_hyper @ var_mu_hyper)[:, None] + var3 = (var2 @ ten2mat(tau_ind, 1).T).reshape( + [rank, rank, dim2] + ) + var_Lambda_hyper[:, :, None] + var4 = ( + var1 @ ten2mat(tau_sparse_tensor, 1).T + + (var_Lambda_hyper @ var_mu_hyper)[:, None] + ) for j in range(dim2): V[j, :] = mvnrnd_pre(solve(var3[:, :, j], var4[:, j]), var3[:, :, j]) @@ -101,10 +121,12 @@ def sample_var_coefficient(X, time_lags): d = time_lags.shape[0] tmax = np.max(time_lags) - Z_mat = X[tmax: dim, :] + Z_mat = X[tmax:dim, :] Q_mat = np.zeros((dim - tmax, rank * d)) for k in range(d): - Q_mat[:, k * rank: (k + 1) * rank] = X[tmax - time_lags[k]: dim - time_lags[k], :] + Q_mat[:, k * rank : (k + 1) * rank] = X[ + tmax - time_lags[k] : dim - time_lags[k], : + ] var_Psi0 = np.eye(rank * d) + Q_mat.T @ Q_mat var_Psi = inv(var_Psi0) var_M = var_Psi @ Q_mat.T @ Z_mat @@ -123,14 +145,16 @@ def sample_factor_x(tau_sparse_tensor, tau_ind, time_lags, U, V, X, A, Lambda_x) d = time_lags.shape[0] A0 = np.dstack([A] * d) for k in range(d): - A0[k * rank: (k + 1) * rank, :, k] = 0 + A0[k * rank : (k + 1) * rank, :, k] = 0 mat0 = Lambda_x @ A.T - mat1 = np.einsum('kij, jt -> kit', A.reshape([d, rank, rank]), Lambda_x) - mat2 = np.einsum('kit, kjt -> ij', mat1, A.reshape([d, rank, rank])) + mat1 = np.einsum("kij, jt -> kit", A.reshape([d, rank, rank]), Lambda_x) + mat2 = np.einsum("kit, kjt -> ij", mat1, A.reshape([d, rank, rank])) var1 = kr_prod(V, U).T var2 = kr_prod(var1, var1) - var3 = (var2 @ ten2mat(tau_ind, 2).T).reshape([rank, rank, dim3]) + Lambda_x[:, :, None] + var3 = (var2 @ ten2mat(tau_ind, 2).T).reshape([rank, rank, dim3]) + Lambda_x[ + :, :, None + ] var4 = var1 @ ten2mat(tau_sparse_tensor, 2).T for t in range(dim3): Mt = np.zeros((rank, rank)) @@ -149,8 +173,10 @@ def sample_factor_x(tau_sparse_tensor, tau_ind, time_lags, U, V, X, A, Lambda_x) for k in index: temp[:, n] = X[t + time_lags[k] - time_lags, :].reshape(rank * d) n += 1 - temp0 = X[t + time_lags[index], :].T - np.einsum('ijk, ik -> jk', A0[:, :, index], temp) - Nt = np.einsum('kij, jk -> i', mat1[index, :, :], temp0) + temp0 = X[t + time_lags[index], :].T - np.einsum( + "ijk, ik -> jk", A0[:, :, index], temp + ) + Nt = np.einsum("kij, jk -> i", mat1[index, :, :], temp0) var3[:, :, t] = var3[:, :, t] + Mt if t < tmax: @@ -178,7 +204,16 @@ def ar4cast(A, X, Sigma, time_lags, multi_step): return X_new -def _BTTF(dense_tensor, sparse_tensor, init, rank, time_lags, burn_iter, gibbs_iter, multi_step=1): +def _BTTF( + dense_tensor, + sparse_tensor, + init, + rank, + time_lags, + burn_iter, + gibbs_iter, + multi_step=1, +): """Bayesian Temporal Tensor Factorization, BTTF.""" dim1, dim2, dim3 = sparse_tensor.shape @@ -213,10 +248,14 @@ def _BTTF(dense_tensor, sparse_tensor, init, rank, time_lags, burn_iter, gibbs_i U = sample_factor_u(tau_sparse_tensor, tau_ind, U, V, X) V = sample_factor_v(tau_sparse_tensor, tau_ind, U, V, X) A, Sigma = sample_var_coefficient(X, time_lags) - X = sample_factor_x(tau_sparse_tensor, tau_ind, time_lags, U, V, X, A, inv(Sigma)) - tensor_hat = np.einsum('is, js, ts -> ijt', U, V, X) - tau = np.random.gamma(1e-6 + 0.5 * np.sum(ind), - 1 / (1e-6 + 0.5 * np.sum(((sparse_tensor - tensor_hat) ** 2) * ind))) + X = sample_factor_x( + tau_sparse_tensor, tau_ind, time_lags, U, V, X, A, inv(Sigma) + ) + tensor_hat = np.einsum("is, js, ts -> ijt", U, V, X) + tau = np.random.gamma( + 1e-6 + 0.5 * np.sum(ind), + 1 / (1e-6 + 0.5 * np.sum(((sparse_tensor - tensor_hat) ** 2) * ind)), + ) temp_hat += tensor_hat[pos_test] if (it + 1) % show_iter == 0 and it < burn_iter: # temp_hat = temp_hat / show_iter @@ -233,7 +272,7 @@ def _BTTF(dense_tensor, sparse_tensor, init, rank, time_lags, burn_iter, gibbs_i tensor_hat_plus += tensor_hat X0 = ar4cast(A, X, Sigma, time_lags, multi_step) X_plus[:, :, it - burn_iter] = X0 - tensor_new_plus += np.einsum('is, js, ts -> ijt', U, V, X0[- multi_step:, :]) + tensor_new_plus += np.einsum("is, js, ts -> ijt", U, V, X0[-multi_step:, :]) tensor_hat = tensor_hat_plus / gibbs_iter # print('Imputation MAPE: {:.6}'.format(compute_mape(dense_test, tensor_hat[:, :, : dim3][pos_test]))) # print('Imputation RMSE: {:.6}'.format(compute_rmse(dense_test, tensor_hat[:, :, : dim3][pos_test]))) @@ -243,7 +282,9 @@ def _BTTF(dense_tensor, sparse_tensor, init, rank, time_lags, burn_iter, gibbs_i return tensor_hat, U_plus, V_plus, X_plus, A_plus, Sigma_plus, tau_plus -def sample_factor_x_partial(tau_sparse_tensor, tau_ind, time_lags, U, V, X, A, Lambda_x, back_step): +def sample_factor_x_partial( + tau_sparse_tensor, tau_ind, time_lags, U, V, X, A, Lambda_x, back_step +): """Sampling T-by-R factor matrix X.""" dim3, rank = X.shape @@ -252,15 +293,17 @@ def sample_factor_x_partial(tau_sparse_tensor, tau_ind, time_lags, U, V, X, A, L d = time_lags.shape[0] A0 = np.dstack([A] * d) for k in range(d): - A0[k * rank: (k + 1) * rank, :, k] = 0 + A0[k * rank : (k + 1) * rank, :, k] = 0 mat0 = Lambda_x @ A.T - mat1 = np.einsum('kij, jt -> kit', A.reshape([d, rank, rank]), Lambda_x) - mat2 = np.einsum('kit, kjt -> ij', mat1, A.reshape([d, rank, rank])) + mat1 = np.einsum("kij, jt -> kit", A.reshape([d, rank, rank]), Lambda_x) + mat2 = np.einsum("kit, kjt -> ij", mat1, A.reshape([d, rank, rank])) var1 = kr_prod(V, U).T var2 = kr_prod(var1, var1) - var3 = (var2 @ ten2mat(tau_ind[:, :, - back_step:], 2).T).reshape([rank, rank, back_step]) + Lambda_x[:, :, None] - var4 = var1 @ ten2mat(tau_sparse_tensor[:, :, - back_step:], 2).T + var3 = (var2 @ ten2mat(tau_ind[:, :, -back_step:], 2).T).reshape( + [rank, rank, back_step] + ) + Lambda_x[:, :, None] + var4 = var1 @ ten2mat(tau_sparse_tensor[:, :, -back_step:], 2).T for t in range(dim3 - back_step, dim3): Mt = np.zeros((rank, rank)) Nt = np.zeros(rank) @@ -275,15 +318,24 @@ def sample_factor_x_partial(tau_sparse_tensor, tau_ind, time_lags, U, V, X, A, L for k in index: temp[:, n] = X[t + time_lags[k] - time_lags, :].reshape(rank * d) n += 1 - temp0 = X[t + time_lags[index], :].T - np.einsum('ijk, ik -> jk', A0[:, :, index], temp) - Nt = np.einsum('kij, jk -> i', mat1[index, :, :], temp0) + temp0 = X[t + time_lags[index], :].T - np.einsum( + "ijk, ik -> jk", A0[:, :, index], temp + ) + Nt = np.einsum("kij, jk -> i", mat1[index, :, :], temp0) var3[:, :, t + back_step - dim3] = var3[:, :, t + back_step - dim3] + Mt - X[t, :] = mvnrnd_pre(solve(var3[:, :, t + back_step - dim3], - var4[:, t + back_step - dim3] + Nt + Qt), var3[:, :, t + back_step - dim3]) + X[t, :] = mvnrnd_pre( + solve( + var3[:, :, t + back_step - dim3], + var4[:, t + back_step - dim3] + Nt + Qt, + ), + var3[:, :, t + back_step - dim3], + ) return X -def _BTTF_partial(sparse_tensor, init, rank, time_lags, gibbs_iter, multi_step=1, gamma=10): +def _BTTF_partial( + sparse_tensor, init, rank, time_lags, gibbs_iter, multi_step=1, gamma=10 +): """Bayesian Temporal Tensor Factorization, BTTF.""" dim1, dim2, dim3 = sparse_tensor.shape @@ -304,50 +356,97 @@ def _BTTF_partial(sparse_tensor, init, rank, time_lags, gibbs_iter, multi_step=1 for it in range(gibbs_iter): tau_ind = tau_plus[it] * ind tau_sparse_tensor = tau_plus[it] * sparse_tensor - X = sample_factor_x_partial(tau_sparse_tensor, tau_ind, time_lags, U_plus[:, :, it], V_plus[:, :, it], - X_plus[:, :, it], A_plus[:, :, it], inv(Sigma_plus[:, :, it]), back_step) + X = sample_factor_x_partial( + tau_sparse_tensor, + tau_ind, + time_lags, + U_plus[:, :, it], + V_plus[:, :, it], + X_plus[:, :, it], + A_plus[:, :, it], + inv(Sigma_plus[:, :, it]), + back_step, + ) X0 = ar4cast(A_plus[:, :, it], X, Sigma_plus[:, :, it], time_lags, multi_step) X_new_plus[:, :, it] = X0 - tensor_new_plus += np.einsum('is, js, ts -> ijt', U_plus[:, :, it], V_plus[:, :, it], X0[- multi_step:, :]) + tensor_new_plus += np.einsum( + "is, js, ts -> ijt", U_plus[:, :, it], V_plus[:, :, it], X0[-multi_step:, :] + ) tensor_hat = tensor_new_plus / gibbs_iter tensor_hat[tensor_hat < 0] = 0 return tensor_hat, U_plus, V_plus, X_new_plus, A_plus, Sigma_plus, tau_plus -def BTTF_forecast(dense_tensor, sparse_tensor, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter, gamma=10): +def BTTF_forecast( + dense_tensor, + sparse_tensor, + pred_step, + multi_step, + rank, + time_lags, + burn_iter, + gibbs_iter, + gamma=10, +): dim1, dim2, T = dense_tensor.shape start_time = T - pred_step max_count = int(np.ceil(pred_step / multi_step)) tensor_hat = np.zeros((dim1, dim2, max_count * multi_step)) for t in range(max_count): if t == 0: - init = {"U": 0.1 * np.random.randn(dim1, rank), - "V": 0.1 * np.random.randn(dim2, rank), - "X": 0.1 * np.random.randn(start_time, rank)} - tensor, U, V, X_new, A, Sigma, tau = _BTTF(dense_tensor[:, :, : start_time], - sparse_tensor[:, :, : start_time], - init, - rank, - time_lags, - burn_iter, - gibbs_iter, - multi_step) + init = { + "U": 0.1 * np.random.randn(dim1, rank), + "V": 0.1 * np.random.randn(dim2, rank), + "X": 0.1 * np.random.randn(start_time, rank), + } + tensor, U, V, X_new, A, Sigma, tau = _BTTF( + dense_tensor[:, :, :start_time], + sparse_tensor[:, :, :start_time], + init, + rank, + time_lags, + burn_iter, + gibbs_iter, + multi_step, + ) else: - init = {"U_plus": U, "V_plus": V, "X_plus": X_new, "A_plus": A, "Sigma_plus": Sigma, "tau_plus": tau} - tensor, U, V, X_new, A, Sigma, tau = _BTTF_partial(sparse_tensor[:, :, : start_time + t * multi_step], - init, - rank, - time_lags, - gibbs_iter, - multi_step, - gamma) - tensor_hat[:, :, t * multi_step: (t + 1) * multi_step] = tensor[:, :, - multi_step:] + init = { + "U_plus": U, + "V_plus": V, + "X_plus": X_new, + "A_plus": A, + "Sigma_plus": Sigma, + "tau_plus": tau, + } + tensor, U, V, X_new, A, Sigma, tau = _BTTF_partial( + sparse_tensor[:, :, : start_time + t * multi_step], + init, + rank, + time_lags, + gibbs_iter, + multi_step, + gamma, + ) + tensor_hat[:, :, t * multi_step : (t + 1) * multi_step] = tensor[ + :, :, -multi_step: + ] return tensor_hat class BTTF(BaseForecaster): - def __init__(self, n_steps, n_features, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter, device=None): + def __init__( + self, + n_steps, + n_features, + pred_step, + multi_step, + rank, + time_lags, + burn_iter, + gibbs_iter, + device=None, + ): super().__init__(device) self.n_steps = n_steps self.n_features = n_features @@ -359,15 +458,21 @@ def __init__(self, n_steps, n_features, pred_step, multi_step, rank, time_lags, self.gibbs_iter = gibbs_iter def fit(self, train_X): - warnings.warn( - 'Please run func forecast(X) directly.' - ) + warnings.warn("Please run func forecast(X) directly.") def forecast(self, X): - self.check_input(self.n_steps, self.n_features, X, out_dtype='ndarray') + self.check_input(self.n_steps, self.n_features, X, out_dtype="ndarray") X = X.transpose((0, 2, 1)) - pred = BTTF_forecast(X, X.copy(), self.pred_step, self.multi_step, - self.rank, self.time_lags, self.burn_iter, self.gibbs_iter) + pred = BTTF_forecast( + X, + X.copy(), + self.pred_step, + self.multi_step, + self.rank, + self.time_lags, + self.burn_iter, + self.gibbs_iter, + ) pred = pred.transpose((0, 2, 1)) return pred diff --git a/pypots/imputation/__init__.py b/pypots/imputation/__init__.py index 3b274edb..bc176266 100644 --- a/pypots/imputation/__init__.py +++ b/pypots/imputation/__init__.py @@ -11,9 +11,8 @@ from pypots.imputation.transformer import Transformer __all__ = [ - 'BRITS', - 'Transformer', - 'SAITS', - 'LOCF', - + "BRITS", + "Transformer", + "SAITS", + "LOCF", ] diff --git a/pypots/imputation/base.py b/pypots/imputation/base.py index a7c2a626..c40c09d0 100644 --- a/pypots/imputation/base.py +++ b/pypots/imputation/base.py @@ -21,15 +21,14 @@ class BaseImputer(BaseModel): - """ Abstract class for all imputation models. - """ + """Abstract class for all imputation models.""" def __init__(self, device): super().__init__(device) @abstractmethod def fit(self, train_X, val_X=None): - """ Train the imputer. + """Train the imputer. Parameters ---------- @@ -47,7 +46,7 @@ def fit(self, train_X, val_X=None): @abstractmethod def impute(self, X): - """ Impute missing data with the trained model. + """Impute missing data with the trained model. Parameters ---------- @@ -63,15 +62,23 @@ def impute(self, X): class BaseNNImputer(BaseNNModel, BaseImputer): - def __init__(self, learning_rate, epochs, patience, batch_size, weight_decay, device): - super().__init__(learning_rate, epochs, patience, batch_size, weight_decay, device) + def __init__( + self, learning_rate, epochs, patience, batch_size, weight_decay, device + ): + super().__init__( + learning_rate, epochs, patience, batch_size, weight_decay, device + ) @abstractmethod def assemble_input_data(self, data): pass def _train_model( - self, training_loader, val_loader=None, val_X_intact=None, val_indicating_mask=None + self, + training_loader, + val_loader=None, + val_X_intact=None, + val_indicating_mask=None, ): self.optimizer = torch.optim.Adam( self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay @@ -135,7 +142,9 @@ def _train_model( nni.report_final_result(self.best_loss) if self.patience == 0: - print("Exceeded the training patience. Terminating the training procedure...") + print( + "Exceeded the training patience. Terminating the training procedure..." + ) break except Exception as e: diff --git a/pypots/imputation/brits.py b/pypots/imputation/brits.py index 8f9317fb..46587d81 100644 --- a/pypots/imputation/brits.py +++ b/pypots/imputation/brits.py @@ -21,7 +21,7 @@ class FeatureRegression(nn.Module): - """ The module used to capture the correlation between features for imputation. + """The module used to capture the correlation between features for imputation. Attributes ---------- @@ -45,18 +45,18 @@ def __init__(self, input_size): self.b = Parameter(torch.Tensor(input_size)) m = torch.ones(input_size, input_size) - torch.eye(input_size, input_size) - self.register_buffer('m', m) + self.register_buffer("m", m) self.reset_parameters() def reset_parameters(self): - stdv = 1. / math.sqrt(self.W.size(0)) + stdv = 1.0 / math.sqrt(self.W.size(0)) self.W.data.uniform_(-stdv, stdv) if self.b is not None: self.b.data.uniform_(-stdv, stdv) def forward(self, x): - """ Forward processing of the NN module. + """Forward processing of the NN module. Parameters ---------- @@ -74,7 +74,7 @@ def forward(self, x): class TemporalDecay(nn.Module): - """ The module used to generate the temporal decay factor gamma in the original paper. + """The module used to generate the temporal decay factor gamma in the original paper. Attributes ---------- @@ -100,20 +100,20 @@ def __init__(self, input_size, output_size, diag=False): self.b = Parameter(torch.Tensor(output_size)) if self.diag: - assert (input_size == output_size) + assert input_size == output_size m = torch.eye(input_size, input_size) - self.register_buffer('m', m) + self.register_buffer("m", m) self.reset_parameters() def reset_parameters(self): - stdv = 1. / math.sqrt(self.W.size(0)) + stdv = 1.0 / math.sqrt(self.W.size(0)) self.W.data.uniform_(-stdv, stdv) if self.b is not None: self.b.data.uniform_(-stdv, stdv) def forward(self, delta): - """ Forward processing of the NN module. + """Forward processing of the NN module. Parameters ---------- @@ -134,7 +134,7 @@ def forward(self, delta): class RITS(nn.Module): - """ model RITS: Recurrent Imputation for Time Series + """model RITS: Recurrent Imputation for Time Series Attributes ---------- @@ -179,14 +179,18 @@ def __init__(self, n_steps, n_features, rnn_hidden_size, device=None): self.device = device self.rnn_cell = nn.LSTMCell(self.n_features * 2, self.rnn_hidden_size) - self.temp_decay_h = TemporalDecay(input_size=self.n_features, output_size=self.rnn_hidden_size, diag=False) - self.temp_decay_x = TemporalDecay(input_size=self.n_features, output_size=self.n_features, diag=True) + self.temp_decay_h = TemporalDecay( + input_size=self.n_features, output_size=self.rnn_hidden_size, diag=False + ) + self.temp_decay_x = TemporalDecay( + input_size=self.n_features, output_size=self.n_features, diag=True + ) self.hist_reg = nn.Linear(self.rnn_hidden_size, self.n_features) self.feat_reg = FeatureRegression(self.n_features) self.combining_weight = nn.Linear(self.n_features * 2, self.n_features) def impute(self, inputs, direction): - """ The imputation function. + """The imputation function. Parameters ---------- inputs : dict, @@ -203,13 +207,17 @@ def impute(self, inputs, direction): reconstruction_loss : float tensor, reconstruction loss """ - values = inputs[direction]['X'] # feature values - masks = inputs[direction]['missing_mask'] # missing masks - deltas = inputs[direction]['deltas'] # time-gap values + values = inputs[direction]["X"] # feature values + masks = inputs[direction]["missing_mask"] # missing masks + deltas = inputs[direction]["deltas"] # time-gap values # create hidden states and cell states for the lstm cell - hidden_states = torch.zeros((values.size()[0], self.rnn_hidden_size), device=self.device) - cell_states = torch.zeros((values.size()[0], self.rnn_hidden_size), device=self.device) + hidden_states = torch.zeros( + (values.size()[0], self.rnn_hidden_size), device=self.device + ) + cell_states = torch.zeros( + (values.size()[0], self.rnn_hidden_size), device=self.device + ) estimations = [] reconstruction_loss = 0.0 @@ -242,14 +250,16 @@ def impute(self, inputs, direction): estimations.append(c_h.unsqueeze(dim=1)) inputs = torch.cat([c_c, m], dim=1) - hidden_states, cell_states = self.rnn_cell(inputs, (hidden_states, cell_states)) + hidden_states, cell_states = self.rnn_cell( + inputs, (hidden_states, cell_states) + ) estimations = torch.cat(estimations, dim=1) imputed_data = masks * values + (1 - masks) * estimations return imputed_data, hidden_states, reconstruction_loss - def forward(self, inputs, direction='forward'): - """ Forward processing of the NN module. + def forward(self, inputs, direction="forward"): + """Forward processing of the NN module. Parameters ---------- inputs : dict, @@ -265,19 +275,21 @@ def forward(self, inputs, direction='forward'): """ imputed_data, hidden_state, reconstruction_loss = self.impute(inputs, direction) # for each iteration, reconstruction_loss increases its value for 3 times - reconstruction_loss /= (self.n_steps * 3) + reconstruction_loss /= self.n_steps * 3 ret_dict = { - 'consistency_loss': torch.tensor(0.0, device=self.device), # single direction, has no consistency loss - 'reconstruction_loss': reconstruction_loss, - 'imputed_data': imputed_data, - 'final_hidden_state': hidden_state + "consistency_loss": torch.tensor( + 0.0, device=self.device + ), # single direction, has no consistency loss + "reconstruction_loss": reconstruction_loss, + "imputed_data": imputed_data, + "final_hidden_state": hidden_state, } return ret_dict class _BRITS(nn.Module): - """ model BRITS: Bidirectional RITS + """model BRITS: Bidirectional RITS BRITS consists of two RITS, which take time-series data from two directions (forward/backward) respectively. Attributes @@ -306,7 +318,7 @@ def __init__(self, n_steps, n_features, rnn_hidden_size, device=None): self.rits_b = RITS(n_steps, n_features, rnn_hidden_size, device) def impute(self, inputs): - """ Impute the missing data. Only impute, this is for test stage. + """Impute the missing data. Only impute, this is for test stage. Parameters ---------- @@ -319,16 +331,16 @@ def impute(self, inputs): The feature vectors with missing part imputed. """ - imputed_data_f, _, _ = self.rits_f.impute(inputs, 'forward') - imputed_data_b, _, _ = self.rits_b.impute(inputs, 'backward') - imputed_data_b = {'imputed_data_b': imputed_data_b} - imputed_data_b = self.reverse(imputed_data_b)['imputed_data_b'] + imputed_data_f, _, _ = self.rits_f.impute(inputs, "forward") + imputed_data_b, _, _ = self.rits_b.impute(inputs, "backward") + imputed_data_b = {"imputed_data_b": imputed_data_b} + imputed_data_b = self.reverse(imputed_data_b)["imputed_data_b"] imputed_data = (imputed_data_f + imputed_data_b) / 2 return imputed_data @staticmethod def get_consistency_loss(pred_f, pred_b): - """ Calculate the consistency loss between the imputation from two RITS models. + """Calculate the consistency loss between the imputation from two RITS models. Parameters ---------- @@ -347,7 +359,7 @@ def get_consistency_loss(pred_f, pred_b): @staticmethod def reverse(ret): - """ Reverse the array values on the time dimension in the given dictionary. + """Reverse the array values on the time dimension in the given dictionary. Parameters ---------- @@ -363,7 +375,9 @@ def reverse_tensor(tensor_): if tensor_.dim() <= 1: return tensor_ indices = range(tensor_.size()[1])[::-1] - indices = torch.tensor(indices, dtype=torch.long, device=tensor_.device, requires_grad=False) + indices = torch.tensor( + indices, dtype=torch.long, device=tensor_.device, requires_grad=False + ) return tensor_.index_select(1, indices) for key in ret: @@ -372,7 +386,7 @@ def reverse_tensor(tensor_): return ret def merge_ret(self, ret_f, ret_b): - """ Merge (average) results from two RITS models into one. + """Merge (average) results from two RITS models into one. Parameters ---------- @@ -386,17 +400,21 @@ def merge_ret(self, ret_f, ret_b): dict, Merged results in a dictionary. """ - consistency_loss = self.get_consistency_loss(ret_f['imputed_data'], ret_b['imputed_data']) - ret_f['imputed_data'] = (ret_f['imputed_data'] + ret_b['imputed_data']) / 2 - ret_f['consistency_loss'] = consistency_loss - ret_f['loss'] = consistency_loss + \ - ret_f['reconstruction_loss'] + \ - ret_b['reconstruction_loss'] + consistency_loss = self.get_consistency_loss( + ret_f["imputed_data"], ret_b["imputed_data"] + ) + ret_f["imputed_data"] = (ret_f["imputed_data"] + ret_b["imputed_data"]) / 2 + ret_f["consistency_loss"] = consistency_loss + ret_f["loss"] = ( + consistency_loss + + ret_f["reconstruction_loss"] + + ret_b["reconstruction_loss"] + ) return ret_f def forward(self, inputs): - """ Forward processing of BRITS. + """Forward processing of BRITS. Parameters ---------- @@ -407,14 +425,14 @@ def forward(self, inputs): ------- dict, A dictionary includes all results. """ - ret_f = self.rits_f(inputs, 'forward') - ret_b = self.reverse(self.rits_b(inputs, 'backward')) + ret_f = self.rits_f(inputs, "forward") + ret_b = self.reverse(self.rits_b(inputs, "backward")) ret = self.merge_ret(ret_f, ret_b) return ret class BRITS(BaseNNImputer): - """ BRITS implementation + """BRITS implementation Attributes ---------- @@ -451,28 +469,34 @@ class BRITS(BaseNNImputer): Run the model on which device. """ - def __init__(self, - n_steps, - n_features, - rnn_hidden_size, - learning_rate=1e-3, - epochs=100, - patience=10, - batch_size=32, - weight_decay=1e-5, - device=None): - super().__init__(learning_rate, epochs, patience, batch_size, weight_decay, device) + def __init__( + self, + n_steps, + n_features, + rnn_hidden_size, + learning_rate=1e-3, + epochs=100, + patience=10, + batch_size=32, + weight_decay=1e-5, + device=None, + ): + super().__init__( + learning_rate, epochs, patience, batch_size, weight_decay, device + ) self.n_steps = n_steps self.n_features = n_features self.rnn_hidden_size = rnn_hidden_size - self.model = _BRITS(self.n_steps, self.n_features, self.rnn_hidden_size, self.device) + self.model = _BRITS( + self.n_steps, self.n_features, self.rnn_hidden_size, self.device + ) self.model = self.model.to(self.device) self._print_model_size() def fit(self, train_X, val_X=None): - """ Fit the model on the given training data. + """Fit the model on the given training data. Parameters ---------- @@ -492,23 +516,29 @@ def fit(self, train_X, val_X=None): val_X = self.check_input(self.n_steps, self.n_features, val_X) training_set = DatasetForBRITS(train_X) # time_gaps is necessary for BRITS - training_loader = DataLoader(training_set, batch_size=self.batch_size, shuffle=True) + training_loader = DataLoader( + training_set, batch_size=self.batch_size, shuffle=True + ) if val_X is None: self._train_model(training_loader) else: - val_X_intact, val_X, val_X_missing_mask, val_X_indicating_mask = mcar(val_X, 0.2) + val_X_intact, val_X, val_X_missing_mask, val_X_indicating_mask = mcar( + val_X, 0.2 + ) val_X = masked_fill(val_X, 1 - val_X_missing_mask, torch.nan) val_set = DatasetForBRITS(val_X) val_loader = DataLoader(val_set, batch_size=self.batch_size, shuffle=False) - self._train_model(training_loader, val_loader, val_X_intact, val_X_indicating_mask) + self._train_model( + training_loader, val_loader, val_X_intact, val_X_indicating_mask + ) self.model.load_state_dict(self.best_model_dict) self.model.eval() # set the model as eval status to freeze it. return self def assemble_input_data(self, data): - """ Assemble the input data into a dictionary. + """Assemble the input data into a dictionary. Parameters ---------- @@ -524,17 +554,13 @@ def assemble_input_data(self, data): indices, X, missing_mask, deltas, back_X, back_missing_mask, back_deltas = data # assemble input data inputs = { - 'indices': indices, - 'forward': { - 'X': X, - 'missing_mask': missing_mask, - 'deltas': deltas + "indices": indices, + "forward": {"X": X, "missing_mask": missing_mask, "deltas": deltas}, + "backward": { + "X": back_X, + "missing_mask": back_missing_mask, + "deltas": back_deltas, }, - 'backward': { - 'X': back_X, - 'missing_mask': back_missing_mask, - 'deltas': back_deltas - } } return inputs diff --git a/pypots/imputation/locf.py b/pypots/imputation/locf.py index 322e91af..2d391bb9 100644 --- a/pypots/imputation/locf.py +++ b/pypots/imputation/locf.py @@ -14,7 +14,7 @@ class LOCF(BaseImputer): - """ LOCF (Last Observed Carried Forward) imputation method. + """LOCF (Last Observed Carried Forward) imputation method. Attributes ---------- @@ -33,7 +33,7 @@ def fit(self, train_X, val_X=None): ) def locf_numpy(self, X): - """ Numpy implementation of LOCF. + """Numpy implementation of LOCF. Parameters ---------- @@ -71,7 +71,7 @@ def locf_numpy(self, X): return X_imputed def locf_torch(self, X): - """ Torch implementation of LOCF. + """Torch implementation of LOCF. Parameters ---------- @@ -104,7 +104,7 @@ def locf_torch(self, X): return X_imputed def impute(self, X): - """ Impute missing values + """Impute missing values Parameters ---------- diff --git a/pypots/imputation/saits.py b/pypots/imputation/saits.py index b521aa0c..3badbbbe 100644 --- a/pypots/imputation/saits.py +++ b/pypots/imputation/saits.py @@ -20,24 +20,61 @@ class _SAITS(nn.Module): - def __init__(self, n_layers, d_time, d_feature, d_model, d_inner, n_head, d_k, d_v, dropout, - diagonal_attention_mask=True, ORT_weight=1, MIT_weight=1): + def __init__( + self, + n_layers, + d_time, + d_feature, + d_model, + d_inner, + n_head, + d_k, + d_v, + dropout, + diagonal_attention_mask=True, + ORT_weight=1, + MIT_weight=1, + ): super().__init__() self.n_layers = n_layers actual_d_feature = d_feature * 2 self.ORT_weight = ORT_weight self.MIT_weight = MIT_weight - self.layer_stack_for_first_block = nn.ModuleList([ - EncoderLayer(d_time, actual_d_feature, d_model, d_inner, n_head, d_k, d_v, dropout, 0, - diagonal_attention_mask) - for _ in range(n_layers) - ]) - self.layer_stack_for_second_block = nn.ModuleList([ - EncoderLayer(d_time, actual_d_feature, d_model, d_inner, n_head, d_k, d_v, dropout, 0, - diagonal_attention_mask) - for _ in range(n_layers) - ]) + self.layer_stack_for_first_block = nn.ModuleList( + [ + EncoderLayer( + d_time, + actual_d_feature, + d_model, + d_inner, + n_head, + d_k, + d_v, + dropout, + 0, + diagonal_attention_mask, + ) + for _ in range(n_layers) + ] + ) + self.layer_stack_for_second_block = nn.ModuleList( + [ + EncoderLayer( + d_time, + actual_d_feature, + d_model, + d_inner, + n_head, + d_k, + d_v, + dropout, + 0, + diagonal_attention_mask, + ) + for _ in range(n_layers) + ] + ) self.dropout = nn.Dropout(p=dropout) self.position_enc = PositionalEncoding(d_model, n_position=d_time) @@ -52,11 +89,13 @@ def __init__(self, n_layers, d_time, d_feature, d_model, d_inner, n_head, d_k, d self.weight_combine = nn.Linear(d_feature + d_time, d_feature) def impute(self, inputs): - X, masks = inputs['X'], inputs['missing_mask'] + X, masks = inputs["X"], inputs["missing_mask"] # first DMSA block input_X_for_first = torch.cat([X, masks], dim=2) input_X_for_first = self.embedding_1(input_X_for_first) - enc_output = self.dropout(self.position_enc(input_X_for_first)) # namely, term e in the math equation + enc_output = self.dropout( + self.position_enc(input_X_for_first) + ) # namely, term e in the math equation for encoder_layer in self.layer_stack_for_first_block: enc_output, _ = encoder_layer(enc_output) @@ -66,7 +105,9 @@ def impute(self, inputs): # second DMSA block input_X_for_second = torch.cat([X_prime, masks], dim=2) input_X_for_second = self.embedding_2(input_X_for_second) - enc_output = self.position_enc(input_X_for_second) # namely term alpha in math algo + enc_output = self.position_enc( + input_X_for_second + ) # namely term alpha in math algo for encoder_layer in self.layer_stack_for_second_block: enc_output, attn_weights = encoder_layer(enc_output) @@ -85,11 +126,13 @@ def impute(self, inputs): ) # namely term eta # combine X_tilde_1 and X_tilde_2 X_tilde_3 = (1 - combining_weights) * X_tilde_2 + combining_weights * X_tilde_1 - X_c = masks * X + (1 - masks) * X_tilde_3 # replace non-missing part with original data + X_c = ( + masks * X + (1 - masks) * X_tilde_3 + ) # replace non-missing part with original data return X_c, [X_tilde_1, X_tilde_2, X_tilde_3] def forward(self, inputs): - X, masks = inputs['X'], inputs['missing_mask'] + X, masks = inputs["X"], inputs["missing_mask"] reconstruction_loss = 0 imputed_data, [X_tilde_1, X_tilde_2, X_tilde_3] = self.impute(inputs) @@ -100,38 +143,45 @@ def forward(self, inputs): reconstruction_loss /= 3 # have to cal imputation loss in the val stage; no need to cal imputation loss here in the tests stage - imputation_loss = cal_mae(X_tilde_3, inputs['X_intact'], inputs['indicating_mask']) + imputation_loss = cal_mae( + X_tilde_3, inputs["X_intact"], inputs["indicating_mask"] + ) loss = self.ORT_weight * reconstruction_loss + self.MIT_weight * imputation_loss return { - 'imputed_data': imputed_data, - 'reconstruction_loss': reconstruction_loss, 'imputation_loss': imputation_loss, - 'loss': loss + "imputed_data": imputed_data, + "reconstruction_loss": reconstruction_loss, + "imputation_loss": imputation_loss, + "loss": loss, } class SAITS(BaseNNImputer): - def __init__(self, - n_steps, - n_features, - n_layers, - d_model, - d_inner, - n_head, - d_k, - d_v, - dropout, - diagonal_attention_mask=True, - ORT_weight=1, - MIT_weight=1, - learning_rate=1e-3, - epochs=100, - patience=10, - batch_size=32, - weight_decay=1e-5, - device=None): - super().__init__(learning_rate, epochs, patience, batch_size, weight_decay, device) + def __init__( + self, + n_steps, + n_features, + n_layers, + d_model, + d_inner, + n_head, + d_k, + d_v, + dropout, + diagonal_attention_mask=True, + ORT_weight=1, + MIT_weight=1, + learning_rate=1e-3, + epochs=100, + patience=10, + batch_size=32, + weight_decay=1e-5, + device=None, + ): + super().__init__( + learning_rate, epochs, patience, batch_size, weight_decay, device + ) self.n_steps = n_steps self.n_features = n_features @@ -147,9 +197,20 @@ def __init__(self, self.ORT_weight = ORT_weight self.MIT_weight = MIT_weight - self.model = _SAITS(self.n_layers, self.n_steps, self.n_features, self.d_model, self.d_inner, self.n_head, - self.d_k, self.d_v, self.dropout, self.diagonal_attention_mask, - self.ORT_weight, self.MIT_weight) + self.model = _SAITS( + self.n_layers, + self.n_steps, + self.n_features, + self.d_model, + self.d_inner, + self.n_head, + self.d_k, + self.d_v, + self.dropout, + self.diagonal_attention_mask, + self.ORT_weight, + self.MIT_weight, + ) self.model = self.model.to(self.device) self._print_model_size() @@ -159,21 +220,27 @@ def fit(self, train_X, val_X=None): val_X = self.check_input(self.n_steps, self.n_features, val_X) training_set = DatasetForMIT(train_X) - training_loader = DataLoader(training_set, batch_size=self.batch_size, shuffle=True) + training_loader = DataLoader( + training_set, batch_size=self.batch_size, shuffle=True + ) if val_X is None: self._train_model(training_loader) else: - val_X_intact, val_X, val_X_missing_mask, val_X_indicating_mask = mcar(val_X, 0.2) + val_X_intact, val_X, val_X_missing_mask, val_X_indicating_mask = mcar( + val_X, 0.2 + ) val_X = masked_fill(val_X, 1 - val_X_missing_mask, torch.nan) val_set = DatasetForMIT(val_X) val_loader = DataLoader(val_set, batch_size=self.batch_size, shuffle=False) - self._train_model(training_loader, val_loader, val_X_intact, val_X_indicating_mask) + self._train_model( + training_loader, val_loader, val_X_intact, val_X_indicating_mask + ) self.model.load_state_dict(self.best_model_dict) self.model.eval() # set the model as eval status to freeze it. def assemble_input_data(self, data): - """ Assemble the input data into a dictionary. + """Assemble the input data into a dictionary. Parameters ---------- @@ -188,10 +255,10 @@ def assemble_input_data(self, data): indices, X_intact, X, missing_mask, indicating_mask = data inputs = { - 'X': X, - 'X_intact': X_intact, - 'missing_mask': missing_mask, - 'indicating_mask': indicating_mask + "X": X, + "X_intact": X_intact, + "missing_mask": missing_mask, + "indicating_mask": indicating_mask, } return inputs @@ -205,7 +272,7 @@ def impute(self, X): with torch.no_grad(): for idx, data in enumerate(test_loader): - inputs = {'X': data[1], 'missing_mask': data[2]} + inputs = {"X": data[1], "missing_mask": data[2]} imputed_data, _ = self.model.impute(inputs) imputation_collector.append(imputed_data) diff --git a/pypots/imputation/transformer.py b/pypots/imputation/transformer.py index 5f68fcdd..8146a266 100644 --- a/pypots/imputation/transformer.py +++ b/pypots/imputation/transformer.py @@ -50,7 +50,7 @@ def __init__(self, n_head, d_model, d_k, d_v, attn_dropout): self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False) self.w_vs = nn.Linear(d_model, n_head * d_v, bias=False) - self.attention = ScaledDotProductAttention(d_k ** 0.5, attn_dropout) + self.attention = ScaledDotProductAttention(d_k**0.5, attn_dropout) self.fc = nn.Linear(n_head * d_v, d_model, bias=False) def forward(self, q, k, v, attn_mask=None): @@ -68,7 +68,9 @@ def forward(self, q, k, v, attn_mask=None): if attn_mask is not None: # this mask is imputation mask, which is not generated from each batch, so needs broadcasting on batch dim - attn_mask = attn_mask.unsqueeze(0).unsqueeze(1) # For batch and head axis broadcasting. + attn_mask = attn_mask.unsqueeze(0).unsqueeze( + 1 + ) # For batch and head axis broadcasting. v, attn_weights = self.attention(q, k, v, attn_mask) @@ -97,8 +99,19 @@ def forward(self, x): class EncoderLayer(nn.Module): - def __init__(self, d_time, d_feature, d_model, d_inner, n_head, d_k, d_v, dropout=0.1, attn_dropout=0.1, - diagonal_attention_mask=False): + def __init__( + self, + d_time, + d_feature, + d_model, + d_inner, + n_head, + d_k, + d_v, + dropout=0.1, + attn_dropout=0.1, + diagonal_attention_mask=False, + ): super().__init__() self.diagonal_attention_mask = diagonal_attention_mask @@ -119,7 +132,9 @@ def forward(self, enc_input): residual = enc_input # here we apply LN before attention cal, namely Pre-LN, refer paper https://arxiv.org/abs/2002.04745 enc_input = self.layer_norm(enc_input) - enc_output, attn_weights = self.slf_attn(enc_input, enc_input, enc_input, attn_mask=mask_time) + enc_output, attn_weights = self.slf_attn( + enc_input, enc_input, enc_input, attn_mask=mask_time + ) enc_output = self.dropout(enc_output) enc_output += residual @@ -131,38 +146,69 @@ class PositionalEncoding(nn.Module): def __init__(self, d_hid, n_position=200): super().__init__() # Not a parameter - self.register_buffer('pos_table', self._get_sinusoid_encoding_table(n_position, d_hid)) + self.register_buffer( + "pos_table", self._get_sinusoid_encoding_table(n_position, d_hid) + ) @staticmethod def _get_sinusoid_encoding_table(n_position, d_hid): - """ Sinusoid position encoding table """ + """Sinusoid position encoding table""" def get_position_angle_vec(position): - return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)] - - sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)]) + return [ + position / np.power(10000, 2 * (hid_j // 2) / d_hid) + for hid_j in range(d_hid) + ] + + sinusoid_table = np.array( + [get_position_angle_vec(pos_i) for pos_i in range(n_position)] + ) sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1 return torch.FloatTensor(sinusoid_table).unsqueeze(0) def forward(self, x): - return x + self.pos_table[:, :x.size(1)].clone().detach() + return x + self.pos_table[:, : x.size(1)].clone().detach() class _TransformerEncoder(nn.Module): - def __init__(self, n_layers, d_time, d_feature, d_model, d_inner, n_head, d_k, d_v, dropout, - ORT_weight=1, MIT_weight=1): + def __init__( + self, + n_layers, + d_time, + d_feature, + d_model, + d_inner, + n_head, + d_k, + d_v, + dropout, + ORT_weight=1, + MIT_weight=1, + ): super().__init__() self.n_layers = n_layers actual_d_feature = d_feature * 2 self.ORT_weight = ORT_weight self.MIT_weight = MIT_weight - self.layer_stack = nn.ModuleList([ - EncoderLayer(d_time, actual_d_feature, d_model, d_inner, n_head, d_k, d_v, dropout, 0, - False) - for _ in range(n_layers) - ]) + self.layer_stack = nn.ModuleList( + [ + EncoderLayer( + d_time, + actual_d_feature, + d_model, + d_inner, + n_head, + d_k, + d_v, + dropout, + 0, + False, + ) + for _ in range(n_layers) + ] + ) self.embedding = nn.Linear(actual_d_feature, d_model) self.position_enc = PositionalEncoding(d_model, n_position=d_time) @@ -170,7 +216,7 @@ def __init__(self, n_layers, d_time, d_feature, d_model, d_inner, n_head, d_k, d self.reduce_dim = nn.Linear(d_model, d_feature) def impute(self, inputs): - X, masks = inputs['X'], inputs['missing_mask'] + X, masks = inputs["X"], inputs["missing_mask"] input_X = torch.cat([X, masks], dim=2) input_X = self.embedding(input_X) enc_output = self.dropout(self.position_enc(input_X)) @@ -179,46 +225,55 @@ def impute(self, inputs): enc_output, _ = encoder_layer(enc_output) learned_presentation = self.reduce_dim(enc_output) - imputed_data = masks * X + (1 - masks) * learned_presentation # replace non-missing part with original data + imputed_data = ( + masks * X + (1 - masks) * learned_presentation + ) # replace non-missing part with original data return imputed_data, learned_presentation def forward(self, inputs): - X, masks = inputs['X'], inputs['missing_mask'] + X, masks = inputs["X"], inputs["missing_mask"] imputed_data, learned_presentation = self.impute(inputs) reconstruction_loss = cal_mae(learned_presentation, X, masks) # have to cal imputation loss in the val stage; no need to cal imputation loss here in the tests stage - imputation_loss = cal_mae(learned_presentation, inputs['X_intact'], inputs['indicating_mask']) + imputation_loss = cal_mae( + learned_presentation, inputs["X_intact"], inputs["indicating_mask"] + ) loss = self.ORT_weight * reconstruction_loss + self.MIT_weight * imputation_loss return { - 'imputed_data': imputed_data, - 'reconstruction_loss': reconstruction_loss, 'imputation_loss': imputation_loss, - 'loss': loss + "imputed_data": imputed_data, + "reconstruction_loss": reconstruction_loss, + "imputation_loss": imputation_loss, + "loss": loss, } class Transformer(BaseNNImputer): - def __init__(self, - n_steps, - n_features, - n_layers, - d_model, - d_inner, - n_head, - d_k, - d_v, - dropout, - ORT_weight=1, - MIT_weight=1, - learning_rate=1e-3, - epochs=100, - patience=10, - batch_size=32, - weight_decay=1e-5, - device=None): - super().__init__(learning_rate, epochs, patience, batch_size, weight_decay, device) + def __init__( + self, + n_steps, + n_features, + n_layers, + d_model, + d_inner, + n_head, + d_k, + d_v, + dropout, + ORT_weight=1, + MIT_weight=1, + learning_rate=1e-3, + epochs=100, + patience=10, + batch_size=32, + weight_decay=1e-5, + device=None, + ): + super().__init__( + learning_rate, epochs, patience, batch_size, weight_decay, device + ) self.n_steps = n_steps self.n_features = n_features @@ -233,9 +288,19 @@ def __init__(self, self.ORT_weight = ORT_weight self.MIT_weight = MIT_weight - self.model = _TransformerEncoder(self.n_layers, self.n_steps, self.n_features, self.d_model, self.d_inner, - self.n_head, self.d_k, self.d_v, self.dropout, - self.ORT_weight, self.MIT_weight) + self.model = _TransformerEncoder( + self.n_layers, + self.n_steps, + self.n_features, + self.d_model, + self.d_inner, + self.n_head, + self.d_k, + self.d_v, + self.dropout, + self.ORT_weight, + self.MIT_weight, + ) self.model = self.model.to(self.device) self._print_model_size() @@ -245,22 +310,28 @@ def fit(self, train_X, val_X=None): val_X = self.check_input(self.n_steps, self.n_features, val_X) training_set = DatasetForMIT(train_X) - training_loader = DataLoader(training_set, batch_size=self.batch_size, shuffle=True) + training_loader = DataLoader( + training_set, batch_size=self.batch_size, shuffle=True + ) if val_X is None: self._train_model(training_loader) else: - val_X_intact, val_X, val_X_missing_mask, val_X_indicating_mask = mcar(val_X, 0.2) + val_X_intact, val_X, val_X_missing_mask, val_X_indicating_mask = mcar( + val_X, 0.2 + ) val_X = masked_fill(val_X, 1 - val_X_missing_mask, np.nan) val_set = DatasetForMIT(val_X) val_loader = DataLoader(val_set, batch_size=self.batch_size, shuffle=False) - self._train_model(training_loader, val_loader, val_X_intact, val_X_indicating_mask) + self._train_model( + training_loader, val_loader, val_X_intact, val_X_indicating_mask + ) self.model.load_state_dict(self.best_model_dict) self.model.eval() # set the model as eval status to freeze it. return self def assemble_input_data(self, data): - """ Assemble the input data into a dictionary. + """Assemble the input data into a dictionary. Parameters ---------- @@ -276,10 +347,10 @@ def assemble_input_data(self, data): indices, X_intact, X, missing_mask, indicating_mask = data inputs = { - 'X': X, - 'X_intact': X_intact, - 'missing_mask': missing_mask, - 'indicating_mask': indicating_mask + "X": X, + "X_intact": X_intact, + "missing_mask": missing_mask, + "indicating_mask": indicating_mask, } return inputs @@ -293,7 +364,7 @@ def impute(self, X): with torch.no_grad(): for idx, data in enumerate(test_loader): - inputs = {'X': data[1], 'missing_mask': data[2]} + inputs = {"X": data[1], "missing_mask": data[2]} imputed_data, _ = self.model.impute(inputs) imputation_collector.append(imputed_data) diff --git a/pypots/tests/test_classification.py b/pypots/tests/test_classification.py index bcda07df..9f283ab2 100644 --- a/pypots/tests/test_classification.py +++ b/pypots/tests/test_classification.py @@ -16,113 +16,145 @@ class TestBRITS(unittest.TestCase): def setUp(self) -> None: - self.train_X = DATA['train_X'] - self.train_y = DATA['train_y'] - self.val_X = DATA['val_X'] - self.val_y = DATA['val_y'] - self.test_X = DATA['test_X'] - self.test_y = DATA['test_y'] - print('Running test cases for BRITS...') - self.brits = BRITS(DATA['n_steps'], DATA['n_features'], 256, - n_classes=DATA['n_classes'], epochs=EPOCHS) + self.train_X = DATA["train_X"] + self.train_y = DATA["train_y"] + self.val_X = DATA["val_X"] + self.val_y = DATA["val_y"] + self.test_X = DATA["test_X"] + self.test_y = DATA["test_y"] + print("Running test cases for BRITS...") + self.brits = BRITS( + DATA["n_steps"], + DATA["n_features"], + 256, + n_classes=DATA["n_classes"], + epochs=EPOCHS, + ) self.brits.fit(self.train_X, self.train_y, self.val_X, self.val_y) def test_parameters(self): - assert (hasattr(self.brits, 'model') - and self.brits.model is not None) + assert hasattr(self.brits, "model") and self.brits.model is not None - assert (hasattr(self.brits, 'optimizer') - and self.brits.optimizer is not None) + assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None - assert hasattr(self.brits, 'best_loss') - self.assertNotEqual(self.brits.best_loss, float('inf')) + assert hasattr(self.brits, "best_loss") + self.assertNotEqual(self.brits.best_loss, float("inf")) - assert (hasattr(self.brits, 'best_model_dict') - and self.brits.best_model_dict is not None) + assert ( + hasattr(self.brits, "best_model_dict") + and self.brits.best_model_dict is not None + ) def test_classify(self): predictions = self.brits.classify(self.test_X) metrics = cal_binary_classification_metrics(predictions, self.test_y) - print(f'ROC_AUC: {metrics["roc_auc"]}, \n' - f'PR_AUC: {metrics["pr_auc"]},\n' - f'F1: {metrics["f1"]},\n' - f'Precision: {metrics["precision"]},\n' - f'Recall: {metrics["recall"]},\n') - assert metrics['roc_auc'] >= 0.5, 'ROC-AUC < 0.5' + print( + f'ROC_AUC: {metrics["roc_auc"]}, \n' + f'PR_AUC: {metrics["pr_auc"]},\n' + f'F1: {metrics["f1"]},\n' + f'Precision: {metrics["precision"]},\n' + f'Recall: {metrics["recall"]},\n' + ) + assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" class TestGRUD(unittest.TestCase): def setUp(self) -> None: - self.train_X = DATA['train_X'] - self.train_y = DATA['train_y'] - self.val_X = DATA['val_X'] - self.val_y = DATA['val_y'] - self.test_X = DATA['test_X'] - self.test_y = DATA['test_y'] - print('Running test cases for GRUD...') - self.grud = GRUD(DATA['n_steps'], DATA['n_features'], 256, n_classes=DATA['n_classes'], epochs=EPOCHS) + self.train_X = DATA["train_X"] + self.train_y = DATA["train_y"] + self.val_X = DATA["val_X"] + self.val_y = DATA["val_y"] + self.test_X = DATA["test_X"] + self.test_y = DATA["test_y"] + print("Running test cases for GRUD...") + self.grud = GRUD( + DATA["n_steps"], + DATA["n_features"], + 256, + n_classes=DATA["n_classes"], + epochs=EPOCHS, + ) self.grud.fit(self.train_X, self.train_y, self.val_X, self.val_y) def test_parameters(self): - assert (hasattr(self.grud, 'model') - and self.grud.model is not None) + assert hasattr(self.grud, "model") and self.grud.model is not None - assert (hasattr(self.grud, 'optimizer') - and self.grud.optimizer is not None) + assert hasattr(self.grud, "optimizer") and self.grud.optimizer is not None - assert hasattr(self.grud, 'best_loss') - self.assertNotEqual(self.grud.best_loss, float('inf')) + assert hasattr(self.grud, "best_loss") + self.assertNotEqual(self.grud.best_loss, float("inf")) - assert (hasattr(self.grud, 'best_model_dict') - and self.grud.best_model_dict is not None) + assert ( + hasattr(self.grud, "best_model_dict") + and self.grud.best_model_dict is not None + ) def test_classify(self): predictions = self.grud.classify(self.test_X) metrics = cal_binary_classification_metrics(predictions, self.test_y) - print(f'ROC_AUC: {metrics["roc_auc"]}, \n' - f'PR_AUC: {metrics["pr_auc"]},\n' - f'F1: {metrics["f1"]},\n' - f'Precision: {metrics["precision"]},\n' - f'Recall: {metrics["recall"]},\n') - assert metrics['roc_auc'] >= 0.5, 'ROC-AUC < 0.5' + print( + f'ROC_AUC: {metrics["roc_auc"]}, \n' + f'PR_AUC: {metrics["pr_auc"]},\n' + f'F1: {metrics["f1"]},\n' + f'Precision: {metrics["precision"]},\n' + f'Recall: {metrics["recall"]},\n' + ) + assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" class TestRaindrop(unittest.TestCase): def setUp(self) -> None: - self.train_X = DATA['train_X'] - self.train_y = DATA['train_y'] - self.val_X = DATA['val_X'] - self.val_y = DATA['val_y'] - self.test_X = DATA['test_X'] - self.test_y = DATA['test_y'] - print('Running test cases for Raindrop...') - self.raindrop = Raindrop(DATA['n_features'], 2, DATA['n_features'] * 4, 256, 2, DATA['n_classes'], 0.3, - DATA['n_steps'], 0, 'mean', False, False, epochs=EPOCHS) + self.train_X = DATA["train_X"] + self.train_y = DATA["train_y"] + self.val_X = DATA["val_X"] + self.val_y = DATA["val_y"] + self.test_X = DATA["test_X"] + self.test_y = DATA["test_y"] + print("Running test cases for Raindrop...") + self.raindrop = Raindrop( + DATA["n_features"], + 2, + DATA["n_features"] * 4, + 256, + 2, + DATA["n_classes"], + 0.3, + DATA["n_steps"], + 0, + "mean", + False, + False, + epochs=EPOCHS, + ) self.raindrop.fit(self.train_X, self.train_y, self.val_X, self.val_y) def test_parameters(self): - assert (hasattr(self.raindrop, 'model') - and self.raindrop.model is not None) + assert hasattr(self.raindrop, "model") and self.raindrop.model is not None - assert (hasattr(self.raindrop, 'optimizer') - and self.raindrop.optimizer is not None) + assert ( + hasattr(self.raindrop, "optimizer") and self.raindrop.optimizer is not None + ) - assert hasattr(self.raindrop, 'best_loss') - self.assertNotEqual(self.raindrop.best_loss, float('inf')) + assert hasattr(self.raindrop, "best_loss") + self.assertNotEqual(self.raindrop.best_loss, float("inf")) - assert (hasattr(self.raindrop, 'best_model_dict') - and self.raindrop.best_model_dict is not None) + assert ( + hasattr(self.raindrop, "best_model_dict") + and self.raindrop.best_model_dict is not None + ) def test_classify(self): predictions = self.raindrop.classify(self.test_X) metrics = cal_binary_classification_metrics(predictions, self.test_y) - print(f'ROC_AUC: {metrics["roc_auc"]}, \n' - f'PR_AUC: {metrics["pr_auc"]},\n' - f'F1: {metrics["f1"]},\n' - f'Precision: {metrics["precision"]},\n' - f'Recall: {metrics["recall"]},\n') - assert metrics['roc_auc'] >= 0.5, 'ROC-AUC < 0.5' + print( + f'ROC_AUC: {metrics["roc_auc"]}, \n' + f'PR_AUC: {metrics["pr_auc"]},\n' + f'F1: {metrics["f1"]},\n' + f'Precision: {metrics["precision"]},\n' + f'Recall: {metrics["recall"]},\n' + ) + assert metrics["roc_auc"] >= 0.5, "ROC-AUC < 0.5" -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/pypots/tests/test_clustering.py b/pypots/tests/test_clustering.py index 7dbdc6d2..52584e35 100644 --- a/pypots/tests/test_clustering.py +++ b/pypots/tests/test_clustering.py @@ -17,63 +17,75 @@ class TestCRLI(unittest.TestCase): def setUp(self) -> None: - self.train_X = DATA['train_X'] - self.train_y = DATA['train_y'] - print('Running test cases for CRLI...') - self.crli = CRLI(n_steps=DATA['n_steps'], n_features=DATA['n_features'], n_clusters=DATA['n_classes'], - n_generator_layers=2, rnn_hidden_size=128, epochs=EPOCHS) + self.train_X = DATA["train_X"] + self.train_y = DATA["train_y"] + print("Running test cases for CRLI...") + self.crli = CRLI( + n_steps=DATA["n_steps"], + n_features=DATA["n_features"], + n_clusters=DATA["n_classes"], + n_generator_layers=2, + rnn_hidden_size=128, + epochs=EPOCHS, + ) self.crli.fit(self.train_X) def test_parameters(self): - assert (hasattr(self.crli, 'model') - and self.crli.model is not None) + assert hasattr(self.crli, "model") and self.crli.model is not None - assert (hasattr(self.crli, 'G_optimizer') - and self.crli.G_optimizer is not None) - assert (hasattr(self.crli, 'D_optimizer') - and self.crli.D_optimizer is not None) + assert hasattr(self.crli, "G_optimizer") and self.crli.G_optimizer is not None + assert hasattr(self.crli, "D_optimizer") and self.crli.D_optimizer is not None - assert hasattr(self.crli, 'best_loss') - self.assertNotEqual(self.crli.best_loss, float('inf')) + assert hasattr(self.crli, "best_loss") + self.assertNotEqual(self.crli.best_loss, float("inf")) - assert (hasattr(self.crli, 'best_model_dict') - and self.crli.best_model_dict is not None) + assert ( + hasattr(self.crli, "best_model_dict") + and self.crli.best_model_dict is not None + ) def test_cluster(self): clustering = self.crli.cluster(self.train_X) RI = cal_rand_index(clustering, self.train_y) CP = cal_cluster_purity(clustering, self.train_y) - print(f'RI: {RI}\nCP: {CP}') + print(f"RI: {RI}\nCP: {CP}") class TestVaDER(unittest.TestCase): def setUp(self) -> None: - self.train_X = DATA['train_X'] - self.train_y = DATA['train_y'] - print('Running test cases for VaDER...') - self.vader = VaDER(n_steps=DATA['n_steps'], n_features=DATA['n_features'], n_clusters=DATA['n_classes'], - rnn_hidden_size=64, d_mu_stddev=5, pretrain_epochs=20, epochs=EPOCHS) + self.train_X = DATA["train_X"] + self.train_y = DATA["train_y"] + print("Running test cases for VaDER...") + self.vader = VaDER( + n_steps=DATA["n_steps"], + n_features=DATA["n_features"], + n_clusters=DATA["n_classes"], + rnn_hidden_size=64, + d_mu_stddev=5, + pretrain_epochs=20, + epochs=EPOCHS, + ) self.vader.fit(self.train_X) def test_parameters(self): - assert (hasattr(self.vader, 'model') - and self.vader.model is not None) + assert hasattr(self.vader, "model") and self.vader.model is not None - assert (hasattr(self.vader, 'optimizer') - and self.vader.optimizer is not None) + assert hasattr(self.vader, "optimizer") and self.vader.optimizer is not None - assert hasattr(self.vader, 'best_loss') - self.assertNotEqual(self.vader.best_loss, float('inf')) + assert hasattr(self.vader, "best_loss") + self.assertNotEqual(self.vader.best_loss, float("inf")) - assert (hasattr(self.vader, 'best_model_dict') - and self.vader.best_model_dict is not None) + assert ( + hasattr(self.vader, "best_model_dict") + and self.vader.best_model_dict is not None + ) def test_cluster(self): clustering = self.vader.cluster(self.train_X) RI = cal_rand_index(clustering, self.train_y) CP = cal_cluster_purity(clustering, self.train_y) - print(f'RI: {RI}\nCP: {CP}') + print(f"RI: {RI}\nCP: {CP}") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/pypots/tests/test_forecasting.py b/pypots/tests/test_forecasting.py index 1956c1e8..74a9fb60 100644 --- a/pypots/tests/test_forecasting.py +++ b/pypots/tests/test_forecasting.py @@ -19,19 +19,25 @@ class TestBTTF(unittest.TestCase): def setUp(self) -> None: DATA = gene_random_walk_data(n_steps=120, n_features=10) - self.test_X = DATA['test_X'] - self.test_X_intact = DATA['test_X_intact'] + self.test_X = DATA["test_X"] + self.test_X_intact = DATA["test_X_intact"] self.test_X_for_input = self.test_X[:, :100] - print('Running test cases for BTTF...') - self.bttf = BTTF(100, 10, - 20, 2, 10, - np.asarray([1, 2, 3, 10, 10 + 1, 10 + 2, 20, 20 + 1, 20 + 2]), - 5, 5) + print("Running test cases for BTTF...") + self.bttf = BTTF( + 100, + 10, + 20, + 2, + 10, + np.asarray([1, 2, 3, 10, 10 + 1, 10 + 2, 20, 20 + 1, 20 + 2]), + 5, + 5, + ) def test_forecasting(self): predictions = self.bttf.forecast(self.test_X_for_input) mae = cal_mae(predictions, self.test_X_intact[:, 100:]) - print(f'prediction MAE: {mae}') + print(f"prediction MAE: {mae}") - if __name__ == '__main__': + if __name__ == "__main__": unittest.main() diff --git a/pypots/tests/test_imputation.py b/pypots/tests/test_imputation.py index 160d5e92..4219aa4a 100644 --- a/pypots/tests/test_imputation.py +++ b/pypots/tests/test_imputation.py @@ -24,119 +24,151 @@ class TestSAITS(unittest.TestCase): def setUp(self) -> None: - self.train_X = DATA['train_X'] - self.val_X = DATA['val_X'] - self.test_X = DATA['test_X'] - self.test_X_intact = DATA['test_X_intact'] - self.test_X_indicating_mask = DATA['test_X_indicating_mask'] - print('Running test cases for SAITS...') - self.saits = SAITS(DATA['n_steps'], DATA['n_features'], n_layers=2, d_model=256, d_inner=128, n_head=4, - d_k=64, d_v=64, dropout=0.1, epochs=EPOCH) + self.train_X = DATA["train_X"] + self.val_X = DATA["val_X"] + self.test_X = DATA["test_X"] + self.test_X_intact = DATA["test_X_intact"] + self.test_X_indicating_mask = DATA["test_X_indicating_mask"] + print("Running test cases for SAITS...") + self.saits = SAITS( + DATA["n_steps"], + DATA["n_features"], + n_layers=2, + d_model=256, + d_inner=128, + n_head=4, + d_k=64, + d_v=64, + dropout=0.1, + epochs=EPOCH, + ) self.saits.fit(self.train_X, self.val_X) def test_parameters(self): - assert (hasattr(self.saits, 'model') - and self.saits.model is not None) + assert hasattr(self.saits, "model") and self.saits.model is not None - assert (hasattr(self.saits, 'optimizer') - and self.saits.optimizer is not None) + assert hasattr(self.saits, "optimizer") and self.saits.optimizer is not None - assert hasattr(self.saits, 'best_loss') - self.assertNotEqual(self.saits.best_loss, float('inf')) + assert hasattr(self.saits, "best_loss") + self.assertNotEqual(self.saits.best_loss, float("inf")) - assert (hasattr(self.saits, 'best_model_dict') - and self.saits.best_model_dict is not None) + assert ( + hasattr(self.saits, "best_model_dict") + and self.saits.best_model_dict is not None + ) def test_impute(self): imputed_X = self.saits.impute(self.test_X) - assert not np.isnan(imputed_X).any(), 'Output still has missing values after running impute().' + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." test_MAE = cal_mae(imputed_X, self.test_X_intact, self.test_X_indicating_mask) - print(f'SAITS test_MAE: {test_MAE}') + print(f"SAITS test_MAE: {test_MAE}") class TestTransformer(unittest.TestCase): def setUp(self) -> None: - self.train_X = DATA['train_X'] - self.val_X = DATA['val_X'] - self.test_X = DATA['test_X'] - self.test_X_intact = DATA['test_X_intact'] - self.test_X_indicating_mask = DATA['test_X_indicating_mask'] - print('Running test cases for Transformer...') - self.transformer = Transformer(DATA['n_steps'], DATA['n_features'], n_layers=2, d_model=256, d_inner=128, - n_head=4, d_k=64, d_v=64, dropout=0.1, epochs=EPOCH) + self.train_X = DATA["train_X"] + self.val_X = DATA["val_X"] + self.test_X = DATA["test_X"] + self.test_X_intact = DATA["test_X_intact"] + self.test_X_indicating_mask = DATA["test_X_indicating_mask"] + print("Running test cases for Transformer...") + self.transformer = Transformer( + DATA["n_steps"], + DATA["n_features"], + n_layers=2, + d_model=256, + d_inner=128, + n_head=4, + d_k=64, + d_v=64, + dropout=0.1, + epochs=EPOCH, + ) self.transformer.fit(self.train_X, self.val_X) def test_parameters(self): - assert (hasattr(self.transformer, 'model') - and self.transformer.model is not None) + assert hasattr(self.transformer, "model") and self.transformer.model is not None - assert (hasattr(self.transformer, 'optimizer') - and self.transformer.optimizer is not None) + assert ( + hasattr(self.transformer, "optimizer") + and self.transformer.optimizer is not None + ) - assert hasattr(self.transformer, 'best_loss') - self.assertNotEqual(self.transformer.best_loss, float('inf')) + assert hasattr(self.transformer, "best_loss") + self.assertNotEqual(self.transformer.best_loss, float("inf")) - assert (hasattr(self.transformer, 'best_model_dict') - and self.transformer.best_model_dict is not None) + assert ( + hasattr(self.transformer, "best_model_dict") + and self.transformer.best_model_dict is not None + ) def test_impute(self): imputed_X = self.transformer.impute(self.test_X) - assert not np.isnan(imputed_X).any(), 'Output still has missing values after running impute().' + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." test_MAE = cal_mae(imputed_X, self.test_X_intact, self.test_X_indicating_mask) - print(f'Transformer test_MAE: {test_MAE}') + print(f"Transformer test_MAE: {test_MAE}") class TestBRITS(unittest.TestCase): def setUp(self) -> None: - self.train_X = DATA['train_X'] - self.val_X = DATA['val_X'] - self.test_X = DATA['test_X'] - self.test_X_intact = DATA['test_X_intact'] - self.test_X_indicating_mask = DATA['test_X_indicating_mask'] - print('Running test cases for BRITS...') - self.brits = BRITS(DATA['n_steps'], DATA['n_features'], 256, epochs=EPOCH) + self.train_X = DATA["train_X"] + self.val_X = DATA["val_X"] + self.test_X = DATA["test_X"] + self.test_X_intact = DATA["test_X_intact"] + self.test_X_indicating_mask = DATA["test_X_indicating_mask"] + print("Running test cases for BRITS...") + self.brits = BRITS(DATA["n_steps"], DATA["n_features"], 256, epochs=EPOCH) self.brits.fit(self.train_X, self.val_X) def test_parameters(self): - assert (hasattr(self.brits, 'model') - and self.brits.model is not None) + assert hasattr(self.brits, "model") and self.brits.model is not None - assert (hasattr(self.brits, 'optimizer') - and self.brits.optimizer is not None) + assert hasattr(self.brits, "optimizer") and self.brits.optimizer is not None - assert hasattr(self.brits, 'best_loss') - self.assertNotEqual(self.brits.best_loss, float('inf')) + assert hasattr(self.brits, "best_loss") + self.assertNotEqual(self.brits.best_loss, float("inf")) - assert (hasattr(self.brits, 'best_model_dict') - and self.brits.best_model_dict is not None) + assert ( + hasattr(self.brits, "best_model_dict") + and self.brits.best_model_dict is not None + ) def test_impute(self): imputed_X = self.brits.impute(self.test_X) - assert not np.isnan(imputed_X).any(), 'Output still has missing values after running impute().' + assert not np.isnan( + imputed_X + ).any(), "Output still has missing values after running impute()." test_MAE = cal_mae(imputed_X, self.test_X_intact, self.test_X_indicating_mask) - print(f'BRITS test_MAE: {test_MAE}') + print(f"BRITS test_MAE: {test_MAE}") class TestLOCF(unittest.TestCase): def setUp(self) -> None: - self.train_X = DATA['train_X'] - self.val_X = DATA['val_X'] - self.test_X = DATA['test_X'] - self.test_X_intact = DATA['test_X_intact'] - self.test_X_indicating_mask = DATA['test_X_indicating_mask'] - print('Running test cases for LOCF...') + self.train_X = DATA["train_X"] + self.val_X = DATA["val_X"] + self.test_X = DATA["test_X"] + self.test_X_intact = DATA["test_X_intact"] + self.test_X_indicating_mask = DATA["test_X_indicating_mask"] + print("Running test cases for LOCF...") self.locf = LOCF(nan=0) def test_parameters(self): - assert (hasattr(self.locf, 'nan') - and self.locf.nan is not None) + assert hasattr(self.locf, "nan") and self.locf.nan is not None def test_impute(self): test_X_imputed = self.locf.impute(self.test_X) - assert not np.isnan(test_X_imputed).any(), 'Output still has missing values after running impute().' - test_MAE = cal_mae(test_X_imputed, self.test_X_intact, self.test_X_indicating_mask) - print(f'LOCF test_MAE: {test_MAE}') + assert not np.isnan( + test_X_imputed + ).any(), "Output still has missing values after running impute()." + test_MAE = cal_mae( + test_X_imputed, self.test_X_intact, self.test_X_indicating_mask + ) + print(f"LOCF test_MAE: {test_MAE}") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/pypots/tests/unified_data_for_test.py b/pypots/tests/unified_data_for_test.py index c90c45bb..2bdf89fc 100644 --- a/pypots/tests/unified_data_for_test.py +++ b/pypots/tests/unified_data_for_test.py @@ -14,14 +14,17 @@ from pypots.data import load_specific_dataset -def gene_random_walk_data(n_steps=24, n_features=10, n_classes=2, n_samples_each_class=1000): - """ Generate a random-walk dataset. - """ +def gene_random_walk_data( + n_steps=24, n_features=10, n_classes=2, n_samples_each_class=1000 +): + """Generate a random-walk dataset.""" # generate samples - X, y = generate_random_walk_for_classification(n_classes=n_classes, - n_samples_each_class=n_samples_each_class, - n_steps=n_steps, - n_features=n_features) + X, y = generate_random_walk_for_classification( + n_classes=n_classes, + n_samples_each_class=n_samples_each_class, + n_steps=n_steps, + n_features=n_features, + ) # split into train/val/test sets train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2) train_X, val_X, train_y, val_y = train_test_split(train_X, train_y, test_size=0.2) @@ -46,54 +49,62 @@ def gene_random_walk_data(n_steps=24, n_features=10, n_classes=2, n_samples_each test_X = test_X.reshape(-1, n_steps, n_features) # mask values in the test set as ground truth - test_X_intact, test_X, test_X_missing_mask, test_X_indicating_mask = mcar(test_X, 0.3) + test_X_intact, test_X, test_X_missing_mask, test_X_indicating_mask = mcar( + test_X, 0.3 + ) test_X = masked_fill(test_X, 1 - test_X_missing_mask, torch.nan) data = { - 'n_classes': n_classes, - 'n_steps': n_steps, - 'n_features': n_features, - 'train_X': train_X, 'train_y': train_y, - 'val_X': val_X, 'val_y': val_y, - 'test_X': test_X, 'test_y': test_y, - 'test_X_intact': test_X_intact, - 'test_X_indicating_mask': test_X_indicating_mask + "n_classes": n_classes, + "n_steps": n_steps, + "n_features": n_features, + "train_X": train_X, + "train_y": train_y, + "val_X": val_X, + "val_y": val_y, + "test_X": test_X, + "test_y": test_y, + "test_X_intact": test_X_intact, + "test_X_indicating_mask": test_X_indicating_mask, } return data def gene_physionet2012(): - """ Generate PhysioNet2012. - """ + """Generate PhysioNet2012.""" # generate samples - df = load_specific_dataset('physionet_2012') - X = df['X'] - X = X.drop(df['static_features'], axis=1) + df = load_specific_dataset("physionet_2012") + X = df["X"] + X = X.drop(df["static_features"], axis=1) def apply_func(df_temp): - missing = list(set(range(0, 48)).difference(set(df_temp['Time']))) - missing_part = pd.DataFrame({'Time': missing}) + missing = list(set(range(0, 48)).difference(set(df_temp["Time"]))) + missing_part = pd.DataFrame({"Time": missing}) df_temp = df_temp.append(missing_part, ignore_index=False, sort=False) - df_temp = df_temp.set_index('Time').sort_index().reset_index() + df_temp = df_temp.set_index("Time").sort_index().reset_index() df_temp = df_temp.iloc[:48] return df_temp - X = X.groupby('RecordID').apply(apply_func) - X = X.drop('RecordID', axis=1) + X = X.groupby("RecordID").apply(apply_func) + X = X.drop("RecordID", axis=1) X = X.reset_index() - X = X.drop(['level_1', 'Time'], axis=1) + X = X.drop(["level_1", "Time"], axis=1) - y = df['y'] - all_recordID = X['RecordID'].unique() + y = df["y"] + all_recordID = X["RecordID"].unique() train_set_ids, test_set_ids = train_test_split(all_recordID, test_size=0.2) train_set_ids, val_set_ids = train_test_split(train_set_ids, test_size=0.2) - train_set = X[X['RecordID'].isin(train_set_ids)] - val_set = X[X['RecordID'].isin(val_set_ids)] - test_set = X[X['RecordID'].isin(test_set_ids)] - train_set = train_set.drop('RecordID', axis=1) - val_set = val_set.drop('RecordID', axis=1) - test_set = test_set.drop('RecordID', axis=1) - train_X, val_X, test_X = train_set.to_numpy(), val_set.to_numpy(), test_set.to_numpy() + train_set = X[X["RecordID"].isin(train_set_ids)] + val_set = X[X["RecordID"].isin(val_set_ids)] + test_set = X[X["RecordID"].isin(test_set_ids)] + train_set = train_set.drop("RecordID", axis=1) + val_set = val_set.drop("RecordID", axis=1) + test_set = test_set.drop("RecordID", axis=1) + train_X, val_X, test_X = ( + train_set.to_numpy(), + val_set.to_numpy(), + test_set.to_numpy(), + ) # normalization scaler = StandardScaler() train_X = scaler.fit_transform(train_X) @@ -109,18 +120,23 @@ def apply_func(df_temp): test_y = y[y.index.isin(test_set_ids)] train_y, val_y, test_y = train_y.to_numpy(), val_y.to_numpy(), test_y.to_numpy() - test_X_intact, test_X, test_X_missing_mask, test_X_indicating_mask = mcar(test_X, 0.1) + test_X_intact, test_X, test_X_missing_mask, test_X_indicating_mask = mcar( + test_X, 0.1 + ) test_X = masked_fill(test_X, 1 - test_X_missing_mask, torch.nan) data = { - 'n_classes': 2, - 'n_steps': 48, - 'n_features': train_X.shape[-1], - 'train_X': train_X, 'train_y': train_y.flatten(), - 'val_X': val_X, 'val_y': val_y.flatten(), - 'test_X': test_X, 'test_y': test_y.flatten(), - 'test_X_intact': test_X_intact, - 'test_X_indicating_mask': test_X_indicating_mask + "n_classes": 2, + "n_steps": 48, + "n_features": train_X.shape[-1], + "train_X": train_X, + "train_y": train_y.flatten(), + "val_X": val_X, + "val_y": val_y.flatten(), + "test_X": test_X, + "test_y": test_y.flatten(), + "test_X_intact": test_X_intact, + "test_X_indicating_mask": test_X_indicating_mask, } return data diff --git a/pypots/utils/__init__.py b/pypots/utils/__init__.py index 193e2be2..2d6f3394 100644 --- a/pypots/utils/__init__.py +++ b/pypots/utils/__init__.py @@ -1,5 +1,6 @@ """ """ + # Created by Wenjie Du -# License: GPL-v3 \ No newline at end of file +# License: GPL-v3 diff --git a/pypots/utils/metrics.py b/pypots/utils/metrics.py index 168f3af4..f47ac132 100644 --- a/pypots/utils/metrics.py +++ b/pypots/utils/metrics.py @@ -11,9 +11,11 @@ def cal_mae(inputs, target, mask=None): - """ calculate Mean Absolute Error""" - assert type(inputs) == type(target), f'types of inputs and target must match, ' \ - f'type(inputs)={type(inputs)}, type(target)={type(target)}' + """calculate Mean Absolute Error""" + assert type(inputs) == type(target), ( + f"types of inputs and target must match, but got" + f"type(inputs)={type(inputs)}, type(target)={type(target)}" + ) lib = np if isinstance(inputs, np.ndarray) else torch if mask is not None: return lib.sum(lib.abs(inputs - target) * mask) / (lib.sum(mask) + 1e-9) @@ -22,9 +24,11 @@ def cal_mae(inputs, target, mask=None): def cal_mse(inputs, target, mask=None): - """ calculate Mean Square Error""" - assert type(inputs) == type(target), f'types of inputs and target must match, ' \ - f'type(inputs)={type(inputs)}, type(target)={type(target)}' + """calculate Mean Square Error""" + assert type(inputs) == type(target), ( + f"types of inputs and target must match, but got" + f"type(inputs)={type(inputs)}, type(target)={type(target)}" + ) lib = np if isinstance(inputs, np.ndarray) else torch if mask is not None: return lib.sum(lib.square(inputs - target) * mask) / (lib.sum(mask) + 1e-9) @@ -33,26 +37,32 @@ def cal_mse(inputs, target, mask=None): def cal_rmse(inputs, target, mask=None): - """ calculate Root Mean Square Error""" - assert type(inputs) == type(target), f'types of inputs and target must match, ' \ - f'type(inputs)={type(inputs)}, type(target)={type(target)}' + """calculate Root Mean Square Error""" + assert type(inputs) == type(target), ( + f"types of inputs and target must match, but got" + f"type(inputs)={type(inputs)}, type(target)={type(target)}" + ) lib = np if isinstance(inputs, np.ndarray) else torch return lib.sqrt(cal_mse(inputs, target, mask)) def cal_mre(inputs, target, mask=None): - """ calculate Mean Relative Error""" - assert type(inputs) == type(target), f'types of inputs and target must match, ' \ - f'type(inputs)={type(inputs)}, type(target)={type(target)}' + """calculate Mean Relative Error""" + assert type(inputs) == type(target), ( + f"types of inputs and target must match, but got" + f"type(inputs)={type(inputs)}, type(target)={type(target)}" + ) lib = np if isinstance(inputs, np.ndarray) else torch if mask is not None: - return lib.sum(lib.abs(inputs - target) * mask) / (lib.sum(lib.abs(target * mask)) + 1e-9) + return lib.sum(lib.abs(inputs - target) * mask) / ( + lib.sum(lib.abs(target * mask)) + 1e-9 + ) else: return lib.mean(lib.abs(inputs - target)) / (lib.sum(lib.abs(target)) + 1e-9) def cal_binary_classification_metrics(prob_predictions, targets, pos_label=1): - """ Calculate the evaluation metrics for the binary classification task, + """Calculate the evaluation metrics for the binary classification task, including accuracy, precision, recall, f1 score, area under ROC curve, and area under Precision-Recall curve. If targets contains multiple categories, please set the positive category as `pos_label`. @@ -89,10 +99,14 @@ def cal_binary_classification_metrics(prob_predictions, targets, pos_label=1): elif len(targets.shape) == 2 and targets.shape[1] == 1: targets = np.asarray(targets).flatten() else: - raise f'targets dimensions should be 1 or 2, but got targets.shape: {targets.shape}' - - if len(prob_predictions.shape) == 1 or (len(prob_predictions.shape) == 2 and prob_predictions.shape[1] == 1): - prob_predictions = np.asarray(prob_predictions).flatten() # turn the array shape into [n_samples] + raise f"targets dimensions should be 1 or 2, but got targets.shape: {targets.shape}" + + if len(prob_predictions.shape) == 1 or ( + len(prob_predictions.shape) == 2 and prob_predictions.shape[1] == 1 + ): + prob_predictions = np.asarray( + prob_predictions + ).flatten() # turn the array shape into [n_samples] binary_predictions = prob_predictions prediction_categories = (prob_predictions >= 0.5).astype(int) binary_prediction_categories = prediction_categories @@ -101,7 +115,7 @@ def cal_binary_classification_metrics(prob_predictions, targets, pos_label=1): binary_predictions = prob_predictions[:, pos_label] binary_prediction_categories = (prediction_categories == pos_label).astype(int) else: - raise f'predictions dimensions should be 1 or 2, but got predictions.shape: {prob_predictions.shape}' + raise f"predictions dimensions should be 1 or 2, but got predictions.shape: {prob_predictions.shape}" # accuracy score doesn't have to be of binary classification acc_score = cal_acc(prediction_categories, targets) @@ -112,28 +126,32 @@ def cal_binary_classification_metrics(prob_predictions, targets, pos_label=1): binary_targets = np.copy(targets) binary_targets[~mask] = mask_val - precision, recall, f1 = cal_precision_recall_f1(binary_prediction_categories, binary_targets, pos_label) - pr_auc, precisions, recalls, _ = cal_pr_auc(binary_predictions, binary_targets, pos_label) + precision, recall, f1 = cal_precision_recall_f1( + binary_prediction_categories, binary_targets, pos_label + ) + pr_auc, precisions, recalls, _ = cal_pr_auc( + binary_predictions, binary_targets, pos_label + ) ROC_AUC, fprs, tprs, _ = cal_roc_auc(binary_predictions, binary_targets, pos_label) PR_AUC = metrics.auc(recalls, precisions) classification_metrics = { - 'predictions': prediction_categories, - 'accuracy': acc_score, - 'precision': precision, - 'recall': recall, - 'f1': f1, - 'precisions': precisions, - 'recalls': recalls, - 'pr_auc': PR_AUC, - 'fprs': fprs, - 'tprs': tprs, - 'roc_auc': ROC_AUC, + "predictions": prediction_categories, + "accuracy": acc_score, + "precision": precision, + "recall": recall, + "f1": f1, + "precisions": precisions, + "recalls": recalls, + "pr_auc": PR_AUC, + "fprs": fprs, + "tprs": tprs, + "roc_auc": ROC_AUC, } return classification_metrics def cal_precision_recall_f1(prob_predictions, targets, pos_label=1): - """ Calculate precision, recall, and F1-score of model predictions. + """Calculate precision, recall, and F1-score of model predictions. Parameters ---------- @@ -154,14 +172,15 @@ def cal_precision_recall_f1(prob_predictions, targets, pos_label=1): The F1 score of model predictions. """ - precision, recall, f1, _ = metrics.precision_recall_fscore_support(targets, prob_predictions, - pos_label=pos_label) + precision, recall, f1, _ = metrics.precision_recall_fscore_support( + targets, prob_predictions, pos_label=pos_label + ) precision, recall, f1 = precision[pos_label], recall[pos_label], f1[pos_label] return precision, recall, f1 def cal_pr_auc(prob_predictions, targets, pos_label=1): - """ Calculate precisions, recalls, and area under PR curve of model predictions. + """Calculate precisions, recalls, and area under PR curve of model predictions. Parameters ---------- @@ -185,14 +204,15 @@ def cal_pr_auc(prob_predictions, targets, pos_label=1): """ - precisions, recalls, thresholds = metrics.precision_recall_curve(targets, prob_predictions, - pos_label=pos_label) + precisions, recalls, thresholds = metrics.precision_recall_curve( + targets, prob_predictions, pos_label=pos_label + ) pr_auc = metrics.auc(recalls, precisions) return pr_auc, precisions, recalls, thresholds def cal_roc_auc(prob_predictions, targets, pos_label=1): - """ Calculate false positive rates, true positive rates, and area under AUC curve of model predictions. + """Calculate false positive rates, true positive rates, and area under AUC curve of model predictions. Parameters ---------- @@ -215,14 +235,15 @@ def cal_roc_auc(prob_predictions, targets, pos_label=1): Increasing thresholds on the decision function used to compute FPR and TPR. """ - fprs, tprs, thresholds = metrics.roc_curve(y_true=targets, y_score=prob_predictions, - pos_label=pos_label) + fprs, tprs, thresholds = metrics.roc_curve( + y_true=targets, y_score=prob_predictions, pos_label=pos_label + ) roc_auc = metrics.auc(fprs, tprs) return roc_auc, fprs, tprs, thresholds def cal_acc(class_predictions, targets): - """ Calculate accuracy score of model predictions. + """Calculate accuracy score of model predictions. Parameters ---------- @@ -242,7 +263,7 @@ def cal_acc(class_predictions, targets): def cal_rand_index(class_predictions, targets): - """ Calculate Rand Index, a measure of the similarity between two data clusterings. + """Calculate Rand Index, a measure of the similarity between two data clusterings. Refer to :cite:`rand1971RandIndex`. Parameters @@ -279,7 +300,7 @@ def cal_rand_index(class_predictions, targets): def cal_adjusted_rand_index(class_predictions, targets): - """ Calculate adjusted Rand Index. + """Calculate adjusted Rand Index. Refer to :cite:`hubert1985AdjustedRI`. Parameters @@ -299,7 +320,7 @@ def cal_adjusted_rand_index(class_predictions, targets): def cal_cluster_purity(class_predictions, targets): - """ Calculate cluster purity. + """Calculate cluster purity. Parameters ---------- @@ -319,5 +340,7 @@ def cal_cluster_purity(class_predictions, targets): """ contingency_matrix = metrics.cluster.contingency_matrix(targets, class_predictions) - cluster_purity = np.sum(np.amax(contingency_matrix, axis=0)) / np.sum(contingency_matrix) + cluster_purity = np.sum(np.amax(contingency_matrix, axis=0)) / np.sum( + contingency_matrix + ) return cluster_purity diff --git a/setup.py b/setup.py index aecb0cb8..20cc6be4 100644 --- a/setup.py +++ b/setup.py @@ -2,39 +2,45 @@ from pypots.__version__ import version -with open('./README.md', encoding='utf-8') as f: +with open("./README.md", encoding="utf-8") as f: README = f.read() setup( - name='pypots', + name="pypots", version=version, - description='A Python Toolbox for Data Mining on Partially-Observed Time Series', + description="A Python Toolbox for Data Mining on Partially-Observed Time Series", long_description=README, - long_description_content_type='text/markdown', - license='GPL-3.0', - author='Wenjie Du', - author_email='wenjay.du@gmail.com', - url='https://github.com/WenjieDu/PyPOTS', - download_url='https://github.com/WenjieDu/PyPOTS/archive/master.zip', + long_description_content_type="text/markdown", + license="GPL-3.0", + author="Wenjie Du", + author_email="wenjay.du@gmail.com", + url="https://github.com/WenjieDu/PyPOTS", + download_url="https://github.com/WenjieDu/PyPOTS/archive/master.zip", keywords=[ - 'data mining', 'neural networks', 'machine learning', 'deep learning', - 'partially observed', 'time series', 'missing data', 'missing values', + "data mining", + "neural networks", + "machine learning", + "deep learning", + "partially observed", + "time series", + "missing data", + "missing values", ], - packages=find_packages(exclude=['tests']), + packages=find_packages(exclude=["tests"]), include_package_data=True, install_requires=[ - 'matplotlib', - 'numpy', - 'scikit_learn', - 'scipy', - 'torch>=1.10', # torch_sparse v0.6.12 requires 1.9<=torch<1.10, v0.6.13 needs torch>=1.10 - 'torch_sparse==0.6.13', - 'torch_scatter', - 'torch_geometric', - 'tensorboard', - 'pandas', - 'pycorruptor', - 'tsdb', + "matplotlib", + "numpy", + "scikit_learn", + "scipy", + "torch>=1.10", # torch_sparse v0.6.12 requires 1.9<=torch<1.10, v0.6.13 needs torch>=1.10 + "torch_sparse==0.6.13", + "torch_scatter", + "torch_geometric", + "tensorboard", + "pandas", + "pycorruptor", + "tsdb", ], - setup_requires=['setuptools>=38.6.0'], + setup_requires=["setuptools>=38.6.0"], )