code is now well-organized (d2789463) · 提交 · AIOps-NanKai / model / test2

.coveragerc

0 → 100644

+22 −0

原始行号	差异行号	差异行
		[report]
		# Regexes for lines to exclude from consideration
		exclude_lines =
		# Have to re-enable the standard pragma
		pragma: no cover

		# Don't complain about missing debug-only code:
		if self\.debug

		# Don't complain if tests don't hit defensive assertion code:
		raise AssertionError
		raise NotImplementedError

		# Don't complain if non-runnable code isn't run:
		if 0:
		if __name__ == .__main__.:


		[run]
		omit =
		# test code need not coverage statistics
		tests/*

.gitignore

0 → 100644

+8 −0

原始行号	差异行号	差异行
		.idea
		.cache
		*.iml
		/debug.py
		/.coverage
		*.pyc
		.DS_Store
		.~

README.rst

0 → 100644

+134 −0

原始行号	差异行号	差异行
		DONUT
		=====

		Donut is an anomaly detection algorithm for periodic KPIs.

		Installation
		------------

		Checkout this repository and execute:

		.. code-block:: bash

		pip install git+https://github.com/thu-ml/zhusuan.git
		pip install git+https://github.com/korepwx/tfsnippet.git
		pip install .

		This will first install `ZhuSuan`_ and `TFSnippet`_, the two major dependencies
		of Donut, then install the Donut package itself.

		_`ZhuSuan`: https://github.com/thu-ml/zhusuan
		_`TFSnippet`: https://github.com/korepwx/tfsnippet

		API Usage
		---------

		To prepare the data:

		.. code-block:: python

		import numpy as np
		from donut import complete_timestamp, standardize_kpi

		# Read the raw data.
		timestamp, values, labels = ...
		# If there is no label, simply use all zeros.
		labels = np.zeros_like(values, dtype=np.int32)

		# Complete the timestamp, and obtain the missing point indicators.
		timestamp, missing, (values, labels) = \
		complete_timestamp(timestamp, (values, labels))

		# Split the training and testing data.
		test_portion = 0.3
		test_n = int(len(values) * test_portion)
		train_values, test_values = values[:-test_n], values[-test_n:]
		train_labels, test_labels = labels[:-test_n], labels[-test_n:]
		train_missing, test_missing = missing[:-test_n], missing[-test_n:]

		# Standardize the training and testing data.
		train_values, mean, std = standardize_kpi(
		train_values, excludes=np.logical_or(train_labels, train_missing))
		test_values, _, _ = standardize_kpi(test_values, mean=mean, std=std)

		To construct a Donut model:

		.. code-block:: python

		import tensorflow as tf
		from donut import Donut
		from tensorflow import keras as K
		from tfsnippet.modules import Sequential

		# We build the entire model within the scope of `model_vs`,
		# it should hold exactly all the variables of `model`, including
		# the variables created by Keras layers.
		with tf.variable_scope('model') as model_vs:
		model = Donut(
		h_for_p_x=Sequential([
		K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001),
		activation=tf.nn.relu),
		K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001),
		activation=tf.nn.relu),
		]),
		h_for_q_z=Sequential([
		K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001),
		activation=tf.nn.relu),
		K.layers.Dense(100, W_regularizer=K.regularizers.l2(0.001),
		activation=tf.nn.relu),
		]),
		x_dims=120,
		z_dims=5,
		)

		To train the Donut model:

		.. code-block:: python

		from donut import DonutTrainer

		trainer = DonutTrainer(model=model, model_vs=model_vs)
		with tf.Session().as_default():
		trainer.fit(train_values, train_labels, train_missing, mean, std)

		To use a trained Donut model for prediction:

		.. code-block:: python

		from donut import DonutPredictor

		predictor = DonutPredictor(model)
		with tf.Session().as_default():
		# Remember to train the model before using the predictor,
		# or to restore the saved model.
		...

		# Now we can use the predictor.
		test_score = predictor.get_score(test_values, test_missing)

		To save and restore a trained model:

		.. code-block:: python

		from tfsnippet.utils import get_variables_as_dict, VariableSaver

		with tf.Session().as_default():
		# Train the model.
		...

		# Remember to get the model variables after the birth of a
		# `predictor` or a `trainer`. The :class:`Donut` instances
		# does not build the graph until :meth:`Donut.get_score` or
		# :meth:`Donut.get_training_objective` is called, which is
		# done in the `predictor` or the `trainer`.
		var_dict = get_variables_as_dict(model_vs)

		# save variables to `save_dir`
		saver = VariableSaver(var_dict, save_dir)
		saver.save()

		with tf.Session().as_default():
		# Restore variables from `save_dir`.
		saver = VariableSaver(get_variables_as_dict(model_vs), save_dir)
		saver.restore()

donut/init.py

0 → 100644

+11 −0

原始行号	差异行号	差异行
		__version__ = '0.1'

		from .augmentation import *
		from .model import *
		from .prediction import *
		from .preprocessing import *
		from .reconstruction import *
		from .training import *
		from .utils import *

		__all__ = ['Donut', 'DonutPredictor', 'DonutTrainer']

donut/augmentation.py

0 → 100644

+96 −0

原始行号	差异行号	差异行
		import numpy as np
		from tfsnippet.utils import docstring_inherit

		__all__ = ['DataAugmentation', 'MissingDataInjection']


		class DataAugmentation(object):
		"""
		Base class for data augmentation in training.

		Args:
		mean (float): Mean of the training data.
		std (float): Standard deviation of the training data.
		"""

		def __init__(self, mean, std):
		if std <= 0.:
		raise ValueError('`std` must be positive')
		self._mean = mean
		self._std = std

		def augment(self, values, labels, missing):
		"""
		Generate augmented data.

		Args:
		values (np.ndarray): 1-D float32 array of shape `(data_length,)`,
		the standardized KPI values.
		labels (np.ndarray): 1-D int32 array of shape `(data_length,)`,
		the anomaly labels for `values`.
		missing (np.ndarray): 1-D int32 array of shape `(data_length,)`,
		the indicator of missing points.

		Returns:
		np.ndarray: The augmented KPI values.
		np.ndarray: The augmented labels.
		np.ndarray: The augmented indicators of missing points.
		"""
		if len(values.shape) != 1:
		raise ValueError('`values` must be a 1-D array')
		if labels.shape != values.shape:
		raise ValueError('The shape of `labels` does not agree with the '
		'shape of `values` ({} vs {})'.
		format(labels.shape, values.shape))
		if missing.shape != values.shape:
		raise ValueError('The shape of `missing` does not agree with the '
		'shape of `values` ({} vs {})'.
		format(missing.shape, values.shape))
		return self._augment(values, labels, missing)

		def _augment(self, values, labels, missing):
		"""
		Derived classes should override this to actually implement the
		data augmentation algorithm.
		"""
		raise NotImplementedError()

		@property
		def mean(self):
		"""Get the mean of the training data."""
		return self._mean

		@property
		def std(self):
		"""Get the standard deviation of training data."""
		return self._std


		class MissingDataInjection(DataAugmentation):
		"""
		Data augmentation by injecting missing points into training data.

		Args:
		mean (float): Mean of the training data.
		std (float): Standard deviation of the training data.
		missing_rate (float): The ratio of missing points to inject.
		"""

		def __init__(self, mean, std, missing_rate):
		super(MissingDataInjection, self).__init__(mean, std)
		self._missing_rate = missing_rate

		@property
		def missing_rate(self):
		"""Get the ratio of missing points to inject."""
		return self._missing_rate

		@docstring_inherit(DataAugmentation.augment)
		def _augment(self, values, labels, missing):
		inject_y = np.random.binomial(1, self.missing_rate, size=values.shape)
		inject_idx = np.where(inject_y.astype(np.bool))[0]
		values = np.copy(values)
		values[inject_idx] = -self.mean / self.std
		missing = np.copy(missing)
		missing[inject_idx] = 1
		return values, labels, missing