加载中 .coveragerc 0 → 100644 +22 −0 原始行号 差异行号 差异行 [report] # Regexes for lines to exclude from consideration exclude_lines = # Have to re-enable the standard pragma pragma: no cover # Don't complain about missing debug-only code: if self\.debug # Don't complain if tests don't hit defensive assertion code: raise AssertionError raise NotImplementedError # Don't complain if non-runnable code isn't run: if 0: if __name__ == .__main__.: [run] omit = # test code need not coverage statistics tests/* .gitignore 0 → 100644 +8 −0 原始行号 差异行号 差异行 .idea .cache *.iml /debug.py /.coverage *.pyc .DS_Store *.*~ README.rst 0 → 100644 +134 −0 原始行号 差异行号 差异行 DONUT ===== Donut is an anomaly detection algorithm for periodic KPIs. Installation ------------ Checkout this repository and execute: .. code-block:: bash pip install git+https://github.com/thu-ml/zhusuan.git pip install git+https://github.com/korepwx/tfsnippet.git pip install . This will first install `ZhuSuan`_ and `TFSnippet`_, the two major dependencies of Donut, then install the Donut package itself. _`ZhuSuan`: https://github.com/thu-ml/zhusuan _`TFSnippet`: https://github.com/korepwx/tfsnippet API Usage --------- To prepare the data: .. code-block:: python import numpy as np from donut import complete_timestamp, standardize_kpi # Read the raw data. timestamp, values, labels = ... # If there is no label, simply use all zeros. labels = np.zeros_like(values, dtype=np.int32) # Complete the timestamp, and obtain the missing point indicators. timestamp, missing, (values, labels) = \ complete_timestamp(timestamp, (values, labels)) # Split the training and testing data. test_portion = 0.3 test_n = int(len(values) * test_portion) train_values, test_values = values[:-test_n], values[-test_n:] train_labels, test_labels = labels[:-test_n], labels[-test_n:] train_missing, test_missing = missing[:-test_n], missing[-test_n:] # Standardize the training and testing data. train_values, mean, std = standardize_kpi( train_values, excludes=np.logical_or(train_labels, train_missing)) test_values, _, _ = standardize_kpi(test_values, mean=mean, std=std) To construct a Donut model: .. code-block:: python import tensorflow as tf from donut import Donut from tensorflow import keras as K from tfsnippet.modules import Sequential # We build the entire model within the scope of `model_vs`, # it should hold exactly all the variables of `model`, including # the variables created by Keras layers. with tf.variable_scope('model') as model_vs: model = Donut( h_for_p_x=Sequential([ K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), h_for_q_z=Sequential([ K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), x_dims=120, z_dims=5, ) To train the Donut model: .. code-block:: python from donut import DonutTrainer trainer = DonutTrainer(model=model, model_vs=model_vs) with tf.Session().as_default(): trainer.fit(train_values, train_labels, train_missing, mean, std) To use a trained Donut model for prediction: .. code-block:: python from donut import DonutPredictor predictor = DonutPredictor(model) with tf.Session().as_default(): # Remember to train the model before using the predictor, # or to restore the saved model. ... # Now we can use the predictor. test_score = predictor.get_score(test_values, test_missing) To save and restore a trained model: .. code-block:: python from tfsnippet.utils import get_variables_as_dict, VariableSaver with tf.Session().as_default(): # Train the model. ... # Remember to get the model variables after the birth of a # `predictor` or a `trainer`. The :class:`Donut` instances # does not build the graph until :meth:`Donut.get_score` or # :meth:`Donut.get_training_objective` is called, which is # done in the `predictor` or the `trainer`. var_dict = get_variables_as_dict(model_vs) # save variables to `save_dir` saver = VariableSaver(var_dict, save_dir) saver.save() with tf.Session().as_default(): # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), save_dir) saver.restore() donut/__init__.py 0 → 100644 +11 −0 原始行号 差异行号 差异行 __version__ = '0.1' from .augmentation import * from .model import * from .prediction import * from .preprocessing import * from .reconstruction import * from .training import * from .utils import * __all__ = ['Donut', 'DonutPredictor', 'DonutTrainer'] donut/augmentation.py 0 → 100644 +96 −0 原始行号 差异行号 差异行 import numpy as np from tfsnippet.utils import docstring_inherit __all__ = ['DataAugmentation', 'MissingDataInjection'] class DataAugmentation(object): """ Base class for data augmentation in training. Args: mean (float): Mean of the training data. std (float): Standard deviation of the training data. """ def __init__(self, mean, std): if std <= 0.: raise ValueError('`std` must be positive') self._mean = mean self._std = std def augment(self, values, labels, missing): """ Generate augmented data. Args: values (np.ndarray): 1-D float32 array of shape `(data_length,)`, the standardized KPI values. labels (np.ndarray): 1-D int32 array of shape `(data_length,)`, the anomaly labels for `values`. missing (np.ndarray): 1-D int32 array of shape `(data_length,)`, the indicator of missing points. Returns: np.ndarray: The augmented KPI values. np.ndarray: The augmented labels. np.ndarray: The augmented indicators of missing points. """ if len(values.shape) != 1: raise ValueError('`values` must be a 1-D array') if labels.shape != values.shape: raise ValueError('The shape of `labels` does not agree with the ' 'shape of `values` ({} vs {})'. format(labels.shape, values.shape)) if missing.shape != values.shape: raise ValueError('The shape of `missing` does not agree with the ' 'shape of `values` ({} vs {})'. format(missing.shape, values.shape)) return self._augment(values, labels, missing) def _augment(self, values, labels, missing): """ Derived classes should override this to actually implement the data augmentation algorithm. """ raise NotImplementedError() @property def mean(self): """Get the mean of the training data.""" return self._mean @property def std(self): """Get the standard deviation of training data.""" return self._std class MissingDataInjection(DataAugmentation): """ Data augmentation by injecting missing points into training data. Args: mean (float): Mean of the training data. std (float): Standard deviation of the training data. missing_rate (float): The ratio of missing points to inject. """ def __init__(self, mean, std, missing_rate): super(MissingDataInjection, self).__init__(mean, std) self._missing_rate = missing_rate @property def missing_rate(self): """Get the ratio of missing points to inject.""" return self._missing_rate @docstring_inherit(DataAugmentation.augment) def _augment(self, values, labels, missing): inject_y = np.random.binomial(1, self.missing_rate, size=values.shape) inject_idx = np.where(inject_y.astype(np.bool))[0] values = np.copy(values) values[inject_idx] = -self.mean / self.std missing = np.copy(missing) missing[inject_idx] = 1 return values, labels, missing 加载中
.coveragerc 0 → 100644 +22 −0 原始行号 差异行号 差异行 [report] # Regexes for lines to exclude from consideration exclude_lines = # Have to re-enable the standard pragma pragma: no cover # Don't complain about missing debug-only code: if self\.debug # Don't complain if tests don't hit defensive assertion code: raise AssertionError raise NotImplementedError # Don't complain if non-runnable code isn't run: if 0: if __name__ == .__main__.: [run] omit = # test code need not coverage statistics tests/*
.gitignore 0 → 100644 +8 −0 原始行号 差异行号 差异行 .idea .cache *.iml /debug.py /.coverage *.pyc .DS_Store *.*~
README.rst 0 → 100644 +134 −0 原始行号 差异行号 差异行 DONUT ===== Donut is an anomaly detection algorithm for periodic KPIs. Installation ------------ Checkout this repository and execute: .. code-block:: bash pip install git+https://github.com/thu-ml/zhusuan.git pip install git+https://github.com/korepwx/tfsnippet.git pip install . This will first install `ZhuSuan`_ and `TFSnippet`_, the two major dependencies of Donut, then install the Donut package itself. _`ZhuSuan`: https://github.com/thu-ml/zhusuan _`TFSnippet`: https://github.com/korepwx/tfsnippet API Usage --------- To prepare the data: .. code-block:: python import numpy as np from donut import complete_timestamp, standardize_kpi # Read the raw data. timestamp, values, labels = ... # If there is no label, simply use all zeros. labels = np.zeros_like(values, dtype=np.int32) # Complete the timestamp, and obtain the missing point indicators. timestamp, missing, (values, labels) = \ complete_timestamp(timestamp, (values, labels)) # Split the training and testing data. test_portion = 0.3 test_n = int(len(values) * test_portion) train_values, test_values = values[:-test_n], values[-test_n:] train_labels, test_labels = labels[:-test_n], labels[-test_n:] train_missing, test_missing = missing[:-test_n], missing[-test_n:] # Standardize the training and testing data. train_values, mean, std = standardize_kpi( train_values, excludes=np.logical_or(train_labels, train_missing)) test_values, _, _ = standardize_kpi(test_values, mean=mean, std=std) To construct a Donut model: .. code-block:: python import tensorflow as tf from donut import Donut from tensorflow import keras as K from tfsnippet.modules import Sequential # We build the entire model within the scope of `model_vs`, # it should hold exactly all the variables of `model`, including # the variables created by Keras layers. with tf.variable_scope('model') as model_vs: model = Donut( h_for_p_x=Sequential([ K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), h_for_q_z=Sequential([ K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), x_dims=120, z_dims=5, ) To train the Donut model: .. code-block:: python from donut import DonutTrainer trainer = DonutTrainer(model=model, model_vs=model_vs) with tf.Session().as_default(): trainer.fit(train_values, train_labels, train_missing, mean, std) To use a trained Donut model for prediction: .. code-block:: python from donut import DonutPredictor predictor = DonutPredictor(model) with tf.Session().as_default(): # Remember to train the model before using the predictor, # or to restore the saved model. ... # Now we can use the predictor. test_score = predictor.get_score(test_values, test_missing) To save and restore a trained model: .. code-block:: python from tfsnippet.utils import get_variables_as_dict, VariableSaver with tf.Session().as_default(): # Train the model. ... # Remember to get the model variables after the birth of a # `predictor` or a `trainer`. The :class:`Donut` instances # does not build the graph until :meth:`Donut.get_score` or # :meth:`Donut.get_training_objective` is called, which is # done in the `predictor` or the `trainer`. var_dict = get_variables_as_dict(model_vs) # save variables to `save_dir` saver = VariableSaver(var_dict, save_dir) saver.save() with tf.Session().as_default(): # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), save_dir) saver.restore()
donut/__init__.py 0 → 100644 +11 −0 原始行号 差异行号 差异行 __version__ = '0.1' from .augmentation import * from .model import * from .prediction import * from .preprocessing import * from .reconstruction import * from .training import * from .utils import * __all__ = ['Donut', 'DonutPredictor', 'DonutTrainer']
donut/augmentation.py 0 → 100644 +96 −0 原始行号 差异行号 差异行 import numpy as np from tfsnippet.utils import docstring_inherit __all__ = ['DataAugmentation', 'MissingDataInjection'] class DataAugmentation(object): """ Base class for data augmentation in training. Args: mean (float): Mean of the training data. std (float): Standard deviation of the training data. """ def __init__(self, mean, std): if std <= 0.: raise ValueError('`std` must be positive') self._mean = mean self._std = std def augment(self, values, labels, missing): """ Generate augmented data. Args: values (np.ndarray): 1-D float32 array of shape `(data_length,)`, the standardized KPI values. labels (np.ndarray): 1-D int32 array of shape `(data_length,)`, the anomaly labels for `values`. missing (np.ndarray): 1-D int32 array of shape `(data_length,)`, the indicator of missing points. Returns: np.ndarray: The augmented KPI values. np.ndarray: The augmented labels. np.ndarray: The augmented indicators of missing points. """ if len(values.shape) != 1: raise ValueError('`values` must be a 1-D array') if labels.shape != values.shape: raise ValueError('The shape of `labels` does not agree with the ' 'shape of `values` ({} vs {})'. format(labels.shape, values.shape)) if missing.shape != values.shape: raise ValueError('The shape of `missing` does not agree with the ' 'shape of `values` ({} vs {})'. format(missing.shape, values.shape)) return self._augment(values, labels, missing) def _augment(self, values, labels, missing): """ Derived classes should override this to actually implement the data augmentation algorithm. """ raise NotImplementedError() @property def mean(self): """Get the mean of the training data.""" return self._mean @property def std(self): """Get the standard deviation of training data.""" return self._std class MissingDataInjection(DataAugmentation): """ Data augmentation by injecting missing points into training data. Args: mean (float): Mean of the training data. std (float): Standard deviation of the training data. missing_rate (float): The ratio of missing points to inject. """ def __init__(self, mean, std, missing_rate): super(MissingDataInjection, self).__init__(mean, std) self._missing_rate = missing_rate @property def missing_rate(self): """Get the ratio of missing points to inject.""" return self._missing_rate @docstring_inherit(DataAugmentation.augment) def _augment(self, values, labels, missing): inject_y = np.random.binomial(1, self.missing_rate, size=values.shape) inject_idx = np.where(inject_y.astype(np.bool))[0] values = np.copy(values) values[inject_idx] = -self.mean / self.std missing = np.copy(missing) missing[inject_idx] = 1 return values, labels, missing