Product Features
...
Machine Learning Models
Model Types

Machine Learning Anomaly Detection

1min

This use case is a customized timeseries version of making a CNN anomaly detection model from the TensorFlow website.

import numpy as np import tensorflow as tf from tensorflow import keras import pandas as pd import seaborn as sns from pylab import rcParams import matplotlib.pyplot as plt from matplotlib import rc from pandas.plotting import register_matplotlib_converters ​ register_matplotlib_converters() sns.set(style='whitegrid', palette='muted') # rcParams['figure.figsize'] = 22, 10 csv_path = '20161003_085624.csv' df = pd.read_csv(csv_path) ​ print ("\ncolumns: ", df.columns, "Data Frame Length: ", len(df), " rows\n") ​ features_considered = ['Temperature (C)'] ​ features = df[features_considered] features.index = np.arange(start=0, stop=len(df), step = 1) # print(features.index) print(features.head())​ x = features.values df = features print(x) # plt.plot(df) # plt.show() train_size = int(len(df) * 0.90) test_size = len(df) - train_size train, test = df[0:train_size], df[train_size:len(df)] print(train, test) print(train.shape, test.shape) ​ from sklearn.preprocessing import StandardScaler ​ scaler = StandardScaler() scaler = scaler.fit(train[['Temperature (C)']]) ​ train['close'] = scaler.transform(train[['Temperature (C)']]) test['close'] = scaler.transform(test[['Temperature (C)']]) # print(train, test) def create_dataset(X, y, time_steps=1): Xs, ys = [], [] for i in range(len(X) - time_steps): v = X.iloc[i:(i + time_steps)].values Xs.append(v)ys.append(y.iloc[i + time_steps]) return np.array(Xs), np.array(ys) ​ TIME_STEPS = 30 # reshape to [samples, time_steps, n_features] X_train, y_train = create_dataset(train[['Temperature (C)']], train.close, TIME_STEPS) ​ X_test, y_test = create_dataset(test[['Temperature (C)']], test.close, TIME_STEPS) print(X_train.shape) print(y_test.shape) print(X_train.shape[1], X_train.shape[2], y_train[1]) # model = keras.Sequential() # model.add(keras.layers.LSTM( # units=64, # input_shape=(X_train.shape[1], X_train.shape[2]) # )) # model.add(keras.layers.Dropout(rate=0.2)) # model.add(keras.layers.RepeatVector(n=X_train.shape[1])) # model.add(keras.layers.LSTM(units=64, return_sequences=True)) # model.add(keras.layers.Dropout(rate=0.2)) # model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=X_train.shape[2]))) # model.compile(loss='mae', optimizer='adam') verbose, epochs, batch_size = 0, 2, 128 n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[2], y_train[1] ​ model = keras.Sequential() model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features))) model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu')) model.add(tf.keras.layers.Dropout(0.5)) model.add(tf.keras.layers.MaxPooling1D(pool_size=2)) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dense(100, activation='relu')) model.add(tf.keras.layers.Dense(n_outputs, activation='softmax')) model.compile(loss='mae', optimizer='adam', metrics=['accuracy']) # fit network history = model.fit( X_train, y_train, epochs=2, batch_size=256, validation_split=0.1, shuffle=False ) ​ model.summary() X_train_pred = model.predict(X_train)​ # model.save("/anomalyModel/") ​ train_mae_loss = np.mean(np.abs(X_train_pred - X_train), axis=1) plt.figure() sns.distplot(train_mae_loss, bins=50, kde=True); ​ ​ X_test_pred = model.predict(X_test) ​ test_mae_loss = np.mean(np.abs(X_test_pred - X_test), axis=1) print(test_mae_loss) plt.figure() sns.distplot(train_mae_loss, bins=50, kde=True) plt.figure() ​ THRESHOLD = 0.65 ​ test_score_df = pd.DataFrame(index=test[TIME_STEPS:].index) test_score_df['loss'] = test_mae_loss test_score_df['threshold'] = THRESHOLD test_score_df['anomaly'] = test_score_df.loss > test_score_df.threshold test_score_df['close'] = test[TIME_STEPS:].close ​ plt.plot(test_score_df.index, test_score_df.loss, label='loss') plt.plot(test_score_df.index, test_score_df.threshold, label='threshold') plt.xticks(rotation=25) ​ anomalies = test_score_df[test_score_df.anomaly == True] print(anomalies.head()) ​ plt.plot( test[TIME_STEPS:].index, scaler.inverse_transform(test[TIME_STEPS:].close), label='temp' ); ​ sns.scatterplot( anomalies.index, scaler.inverse_transform(anomalies.close), color=sns.color_palette()[3], s=52, label='anomaly' ) plt.xticks(rotation=25) ​ plt.show()