from IPython.display import Image


Image(filename ='images_iquantile/decisionTree.png')


Image("images_iquantile/Gradient-boosted-decision-tree.png")


dico = {"Description" : "Point Name",
        "MOTOR CURRENT" : "PARD@3C52@3C52-M@JT7099.CAL",
        "Température palier étage 1" : "PARD@3C52@3C52-M@TE7011A.PNT",
        "Température palier étage2" : "PARD@3C52@3C52-M@TE7021A.PNT",
        "Température palier étage 3" : "PARD@3C52@3C52-M@TE7031A.PNT",
        "Température palier étage 4" : "PARD@3C52@3C52-M@TE7041A.PNT",
        "Déplacement axiale 1/2" : "PARD@3C52@3C52-M@VT7001.PNT:RAW",
        "Déplacement axiale 3/4" : "PARD@3C52@3C52-M@VT7002.PNT:RAW",
        "1e stade vibration X" : "PARD@3C52@3C52-M@VT7011A.PNT:RAW",
        "1er stade vibration Y" : "PARD@3C52@3C52-M@VT7011B.PNT:RAW",
        "2e stade vibration X" : "PARD@3C52@3C52-M@VT7021A.PNT:RAW",
        "2e stade vibration Y" : "PARD@3C52@3C52-M@VT7021B.PNT:RAW",
        "3e stade vibration X" : "PARD@3C52@3C52-M@VT7031A.PNT:RAW",
        "3e stade vibration Y" : "PARD@3C52@3C52-M@VT7031B.PNT:RAW",
        "4e stade vibration X" : "PARD@3C52@3C52-M@VT7041A.PNT:RAW",
        "4e stade vibration Y" : "PARD@3C52@3C52-M@VT7041B.PNT:RAW",
        "Température huile sortie réfrigerant" : "PARD@3C52@3C52@TE7086.PNT",
        "labels" : "labels"}
inv_dico = {v: k for k, v in dico.items()}


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import datasets, ensemble
from sklearn.inspection import permutation_importance
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_regression
from sklearn.ensemble import GradientBoostingRegressor
import pickle


df_gen =  pd.read_csv('data_gan/normal_0_14000_detrended.csv')

motor_n1 = np.array(df_gen[:10000]['PARD@3C52@3C52-M@JT7099.CAL'])
temp_n1 = np.array(df_gen[:10000][dico['Température palier étage 1']]) 
depl1_n1 = np.array(df_gen[:10000][dico['Déplacement axiale 1/2']]) 
depl2_n1 = np.array(df_gen[:10000][dico['Déplacement axiale 3/4']])
vib_n1 = np.array(df_gen[:10000][dico['1e stade vibration X']])
label_n1 = np.array(df_gen[:10000][dico['labels']])


df_gen1 =  pd.read_csv('data_gan/normal_66900_100032_detrended.csv')

motor_n2 = np.array(df_gen1[:]['PARD@3C52@3C52-M@JT7099.CAL'])
temp_n2 = np.array(df_gen1[:][dico['Température palier étage 1']]) 
depl1_n2 = np.array(df_gen1[:][dico['Déplacement axiale 1/2']]) 
depl2_n2 = np.array(df_gen1[:][dico['Déplacement axiale 3/4']])
vib_n2 = np.array(df_gen1[:][dico['1e stade vibration X']])
label_n2 = np.array(df_gen1[:][dico['labels']])


df_gena1 =  pd.read_csv('data_gan/n_anormal_43089_66900_detrended.csv')

motor_an1 = np.array(df_gena1[:]['PARD@3C52@3C52-M@JT7099.CAL'])
temp_an1 = np.array(df_gena1[:][dico['Température palier étage 1']]) 
depl1_an1 = np.array(df_gena1[:][dico['Déplacement axiale 1/2']]) 
depl2_an1 = np.array(df_gena1[:][dico['Déplacement axiale 3/4']])
vib_an1 = np.array(df_gena1[:][dico['1e stade vibration X']])
label_an1 = np.array(df_gena1[:][dico['labels']])


df_gena2 =  pd.read_csv('data_gan/n_anormal_14000_43089_detrended.csv')

motor_an2 = np.array(df_gena2[:]['PARD@3C52@3C52-M@JT7099.CAL'])
temp_an2 = np.array(df_gena2[:][dico['Température palier étage 1']]) 
depl1_an2 = np.array(df_gena2[:][dico['Déplacement axiale 1/2']]) 
depl2_an2 = np.array(df_gena2[:][dico['Déplacement axiale 3/4']])
vib_an2 = np.array(df_gena2[:][dico['1e stade vibration X']])
label_an2 = np.array(df_gena2[:][dico['labels']])


df_gena3 =  pd.read_csv('data_gan/n_anormal_100032_end_detrended.csv')

motor_an3 = np.array(df_gena3[:]['PARD@3C52@3C52-M@JT7099.CAL'])
temp_an3 = np.array(df_gena3[:][dico['Température palier étage 1']]) 
depl1_an3 = np.array(df_gena3[:][dico['Déplacement axiale 1/2']]) 
depl2_an3 = np.array(df_gena3[:][dico['Déplacement axiale 3/4']])
vib_an3 = np.array(df_gena3[:][dico['1e stade vibration X']])
label_an3 = np.array(df_gena3[:][dico['labels']])


X_train = np.array([motor_n1, temp_n1, depl1_n1, depl2_n1, vib_n1]).T
y_train = X_train[1:,1]
X_train = X_train[:-1]
label_train = label_n1

X_test = np.array([motor_an1, temp_an1, depl1_an1, depl2_an1, vib_an1]).T
y_test = X_test[1:,1]
X_test = X_test[:-1]
label_test = label_an1


alpha = 0.9
params = {'n_estimators': 350,
          'max_depth': 6,
          'alpha' : alpha,
          'min_samples_split': 5,
          'learning_rate': 0.1,
          'loss': 'quantile'}
buffer_time = 504
maj = 700   # justified later


reg_up = ensemble.GradientBoostingRegressor(**params)

reg_up = reg_up.fit(X_train, y_train)

y_upper_test = reg_up.predict(X_test)
y_upper_train = reg_up.predict(X_train)


reg_low = ensemble.GradientBoostingRegressor(**params)
reg_low.set_params(alpha=1.0 - alpha)
reg_low = reg_low.fit(X_train, y_train)

y_lower_test = reg_low.predict(X_test)
y_lower_train = reg_low.predict(X_train)


fig = plt.figure(figsize = (20,10))

line3 = plt.plot(y_train,'b',label=u'Observations')

line1 = plt.plot(y_upper_train, 'r--', label='Upper quartile')
line2 = plt.plot(y_lower_train, 'g--',label='Lower quartile')
"""plt.fill(np.concatenate([xx, xx[::-1]]),
         np.concatenate([y_upper, y_lower[::-1]]),
         alpha=.5, fc='b', ec='None', label='95% prediction interval')"""
plt.xlabel('$t$')
plt.ylabel('Température')
plt.fill_between(range(0,len(y_upper_train)),y_upper_train, y_lower_train, alpha =0.1)

plt.legend()
plt.xlim(1000,5000)


plt.show()


fig = plt.figure(figsize = (20,10))

line3 = plt.plot(y_train,'b',label=u'Observations')
line1 = plt.plot(y_upper_train, 'r--', label='Upper quartile')
line2 = plt.plot(y_lower_train, 'g--',label='Lower quartile')

plt.xlabel('$t$')
plt.ylabel('Température')
plt.fill_between(range(0,len(y_upper_train)),y_upper_train, y_lower_train, alpha =0.1)

plt.legend()
plt.xlim(300,700)
plt.ylim(-0.05,0.01)

(-0.05, 0.01)


nb_up, nb_low = 0, 0 
for k,ele in enumerate(y_train):
    if ele > y_upper_train[k] : 
        nb_up += 1
    elif ele < y_lower_train[k]:
        nb_low -= 1
nb_tot = len(y_train)
above = nb_up /nb_tot
print(f'percent above :{nb_up/nb_tot}')   
print(f'percent below : {-nb_low/nb_tot}')

percent above :0.0997099709970997
percent below : 0.09760976097609761


fig = plt.figure(figsize = (20,10))

line1 = plt.plot(y_test,'b',label=u'Observations')

line2 = plt.plot(y_upper_test, 'r--', label = 'Upper quartile')
line3 = plt.plot(y_lower_test, 'g--', label='Lower quartile')
"""plt.fill(np.concatenate([xx, xx[::-1]]),
         np.concatenate([y_upper, y_lower[::-1]]),
         alpha=.5, fc='b', ec='None', label='95% prediction interval')"""
plt.xlabel('$t$')
plt.ylabel('Température')
plt.fill_between(range(0,len(y_upper_test)),y_upper_test, y_lower_test, alpha =0.1)
plt.plot(label_test/3, label = 'labels         ')
plt.xlim(10000,25000)

plt.legend()
plt.show()


fig = plt.figure(figsize = (20,10))

line1 = plt.plot(y_test,'b',label=u'Observations')
line2 = plt.plot(y_upper_test, 'r--', label = 'Upper quartile')
line3 = plt.plot(y_lower_test, 'g--', label='Lower quartile')

plt.xlabel('$t$')
plt.ylabel('Température')
plt.fill_between(range(0,len(y_upper_test)),y_upper_test, y_lower_test, alpha =0.1)
plt.xlim(7000,10000)
#plt.ylim(-0.2,0.2)

plt.legend()
plt.show()


nb_up, nb_low = 0, 0 
for k,ele in enumerate(y_train):
    if ele > y_upper_test[k] : 
        nb_up += 1
    elif ele < y_lower_test[k]:
        nb_low -= 1
nb_tot = len(y_test)
above = nb_up /nb_tot
print(f'percent above :{nb_up/nb_tot}')   
print(f'percent below : {-nb_low/nb_tot}')

percent above :0.2127677446451071
percent below : 0.200839983200336


feature_importance = reg_low.feature_importances_
sorted_idx = np.argsort(feature_importance)
pos = np.arange(sorted_idx.shape[0]) + .5
fig = plt.figure(figsize=(12, 6))
plt.barh(pos, feature_importance[sorted_idx], align='center')
plt.yticks(pos, np.array(['motor', 'temp', 'depl1', 'depl2', 'vib'])[sorted_idx])
plt.title('Feature Importance (MDI)')

Text(0.5, 1.0, 'Feature Importance (MDI)')


from scipy.integrate import trapz


def inte(y, y_lower, y_upper):
    inte = 0
    for k in range(len(y)):
        if k >= buffer_time :
            ecart_up = y[k : k+288] - y_upper[k: k + 288]
            ecart_up[ecart_up < 0] = 0
            up = trapz(ecart_up)
            ecart_low = y_lower[k: k+288] - y[k : k+288]
            ecart_low[ecart_low < 0] = 0
            low = trapz(ecart_low)
            inte += up + low
            inte_mean = inte/(len(y)-buffer_time)
    return inte_mean
standard_area = inte(y_train,y_lower_train,y_upper_train)
print(standard_area)

0.010561052426917341


def error(y_test, y_lower_test, y_upper_test, begin,maj):
    ecart_up = y_test[begin : begin+288] - y_upper_test[begin: begin + 288]
    ecart_up[ecart_up <0] =0
    up = trapz(ecart_up)
    ecart_low = y_lower_test[begin: begin+288] - y_test[begin : begin+288]
    ecart_low[ecart_low < 0] = 0
    low = trapz(ecart_low)
    inte = up + low
    if inte > maj*standard_area :
        return 1   #error detected
    else :
        return 0   # normal situation


er = np.zeros(len(y_test))
for k in range(len(y_test)):
    if k >= buffer_time :
        er_loc = error(y_test, y_lower_test, y_upper_test, k, maj)
        if er_loc == 1 :
            er[k] = 1


plt.figure(figsize=(20,10))            
plt.plot(er, label = '1: found anomalies')
plt.plot(label_test/6, label = 'labels')
plt.plot(y_test, label = 'signal')
plt.legend();


positive_label = np.count_nonzero(label_test)
positive_predict = np.count_nonzero(er)
label_mask = label_test != 0
er_mask = er > 0
true_positive = label_mask[:-1]*er_mask
tp = np.count_nonzero(true_positive)

label_mask1 = label_test != 0
er_mask1 = er == 0
false_neg = label_mask1[:-1]*er_mask1
fn = np.count_nonzero(false_neg)

label_mask2 = label_test == 0
false_pos = label_mask2[:-1]*er_mask
fp = np.count_nonzero(false_pos)

true_neg = len(er) - np.count_nonzero(true_positive)-np.count_nonzero(false_neg)-np.count_nonzero(false_pos)

precision_before = tp /(tp+fp)
recall_before = tp/(tp+fn)
f_1_score_before = 2*(precision_before*recall_before)/(precision_before + recall_before)

print(f"precision= {precision_before}")
print(f"recall= {recall_before}")
print(f"f1 score= {f_1_score_before}")

precision= 0.6724587963308226
recall= 0.8497785317123739
f1 score= 0.7507910074937552


ER = np.zeros(len(y_test))
memory = False
for k in range(len(y_test)):
    if k >= buffer_time :
        if er[k] == 1 :
            memory = True
            ER[k] = 1
        elif memory and er[k-buffer_time:k].any() :
            ER[k] = 1
        elif not(er[k-buffer_time:k].any())   :
            memory = False


Colour = len(y_test) * [0]
for k, ele in enumerate(y_test):
    if er[k] == 1 and ER[k] == 1:
        Colour[k] = 2
    elif er[k] == 0 and ER[k] == 1:
        Colour[k] = 1


plt.figure(figsize=(20,10))            
plt.plot(Colour, label = '1: recent anomalies 2 : found anomalies')
plt.plot(label_test, label = 'labels')
plt.plot(y_test)
plt.legend();


positive_label = np.count_nonzero(label_test)
positive_predict = np.count_nonzero(ER)
label_mask = label_test != 0
ER_mask = ER > 0
true_positive = label_mask[:-1]*ER_mask
tp = np.count_nonzero(true_positive)

label_mask1 = label_test != 0
ER_mask1 = ER == 0
false_neg = label_mask1[:-1]*ER_mask1
fn = np.count_nonzero(false_neg)

label_mask2 = label_test == 0
false_pos = label_mask2[:-1]*ER_mask
fp = np.count_nonzero(false_pos)

true_neg = len(ER) - np.count_nonzero(true_positive)-np.count_nonzero(false_neg)-np.count_nonzero(false_pos)

precision_after = tp /(tp+fp)
recall_after = tp/(tp+fn)
f_1_score_after = 2*(precision_after*recall_after)/(precision_after + recall_after)
print(f"precision= {precision_after}")
print(f"recall= {recall_after}")
print(f"f1 score= {f_1_score_after}")

precision= 0.6444906444906445
recall= 0.9640938648572236
f1 score= 0.7725419120978704


Image(filename ='images_iquantile/learningrate.png')


Image(filename = 'images_iquantile/precision-recall.png')


X_train_final = np.array([motor_n2, temp_n2, depl1_n2, depl2_n2, vib_n2]).T
y_train_final = X_train[1:,1]
X_train_final = X_train[:-1]
label_train_final = label_n2


X_test_final = np.array([motor_an3, temp_an3, depl1_an3, depl2_an3, vib_an3]).T
y_test_final = X_test_final[1:,1]
X_test_final = X_test_final[:-1]

label_test_final = label_an3


params = {'n_estimators': 300,
          'max_depth': 4,
          'alpha' : 0.85,
          'min_samples_split': 3,
          'learning_rate': 0.1,
          'loss': 'quantile'}
reg_up.set_params(**params)
buffer_time = 504


params = {'n_estimators': 300,
          'max_depth': 4,
          'alpha' : 0.15,
          'min_samples_split': 3,
          'learning_rate': 0.1,
          'loss': 'quantile'}
reg_low.set_params(**params)

GradientBoostingRegressor(alpha=0.15, loss='quantile', max_depth=4,
                          min_samples_split=3, n_estimators=300)


reg_up.fit(X_train_final, y_train_final)
reg_up.fit(X_train, y_train)

reg_low.fit(X_train, y_train)
reg_low.fit(X_train_final, y_train_final)

GradientBoostingRegressor(alpha=0.15, loss='quantile', max_depth=4,
                          min_samples_split=3, n_estimators=300)


model1 = 'finalized_predictor_up.sav'
pickle.dump(reg_up, open(model1, 'wb'))

model2 = 'finalized_predictor_low.sav'
pickle.dump(reg_low, open(model2, 'wb'))


y_upper_test_f = reg_up.predict(X_test_final)
y_lower_test_f = reg_low.predict(X_test_final)


er_f = np.zeros(len(y_test_final))
for k in range(len(y_test_final)):
    if k >= buffer_time :
        er_loc = error(y_test_final, y_lower_test_f, y_upper_test_f, k, maj)
        if er_loc == 1 :
            er_f[k] = 1


ER_f = np.zeros(len(y_test_final))
memory_f = False
for k in range(len(y_test_final)):
    if k >= buffer_time :
        if er_f[k] == 1 :
            memory_f = True
            ER_f[k] = 1
        elif memory_f and er_f[k-buffer_time:k].any() :
            ER_f[k] = 1
        elif not(er_f[k-buffer_time:k].any())   :
            memory_f = False


positive_label_f = np.count_nonzero(label_test_final)
positive_predict_f = np.count_nonzero(er_f)
label_mask_f = label_test_final != 0
ER_mask_f = ER_f > 0

true_positive_f = label_mask_f[1:]*ER_mask_f
tp_f = np.count_nonzero(true_positive_f)
label_mask1_f = label_test_final != 0
ER_mask1_f = ER_f == 0

false_neg_f = label_mask1_f[1:]*ER_mask1_f
fn_f = np.count_nonzero(false_neg_f)

label_mask2_f = label_test_final == 0
ER_mask2_f = ER_f > 0
false_pos_f = label_mask2_f[1:]*ER_mask2_f
fp_f = np.count_nonzero(false_pos_f)

true_neg_f = len(ER_f) - tp_f-fn_f-fp_f

precision_f = tp_f /(tp_f+fp_f)
recall_f = tp_f/(tp_f+fn_f)
f_1_score_f = 2*(precision_f*recall_f)/(precision_f + recall_f)
print(f"precision= {precision_f}")
print(f"recall= {recall_f}")
print(f"f1 score= {f_1_score_f}")

precision= 0.575109282591926
recall= 0.9951058907278876
f1 score= 0.7289378483199167


Colour_f = np.zeros(len(y_test_final))
for k, ele in enumerate(y_test_final):
    if er_f[k] == 1 and ER_f[k] == 1:
        Colour_f[k] = 2
    elif er_f[k] == 0 and ER_f[k] == 1:
        Colour_f[k] = 1


fig = plt.figure(figsize = (20,10))

line1 = plt.plot(y_test_final,'b',label='Observations')
line2 = plt.plot(y_upper_test_f, 'r--', label = 'Upper quartile')
line3 = plt.plot(y_lower_test_f, 'g--', label='Lower quartile')
line4 = plt.plot(label_test_final/3,label = 'labels')

plt.xlabel('$t$')
plt.ylabel('Température')
plt.fill_between(range(0,len(y_upper_test_f)),y_upper_test_f, y_lower_test_f, alpha =0.1)
plt.xlim(20000,25000)

plt.legend()

<matplotlib.legend.Legend at 0x7fedd5752510>


plt.figure(figsize=(20,10))            
plt.plot(label_test_final, label = 'labels')
plt.plot(Colour_f, label='color label')
plt.legend();


def quantile_interval(file_data, model_up, model_low):
    
    df_data =  pd.read_csv(file_data)
    motor = np.array(df_data['PARD@3C52@3C52-M@JT7099.CAL'])
    temp = np.array(df_data[dico['Température palier étage 1']]) 
    depl1 = np.array(df_data[dico['Déplacement axiale 1/2']]) 
    depl2 = np.array(df_data[dico['Déplacement axiale 3/4']])
    vib = np.array(df_data[dico['1e stade vibration X']])
    time = np.array(df_data['index'])

    reg_up = pickle.load(open(model_up, 'rb'))
    reg_low = pickle.load(open(model_low, 'rb'))
    
    X = np.array([motor, temp, depl1, depl2, vib]).T
    y = X[1:,1]
    X = X[:-1]

    y_upper = reg_up.predict(X)
    y_lower = reg_low.predict(X)
    
    er_f = np.zeros(len(X))
    for k in range(len(X)):
        if k >= buffer_time :
            er_loc = error(y, y_lower, y_upper, k, maj)
            if er_loc == 1 :
                er_f[k] = 1
      
    ER_f = np.zeros(len(X))
    memory_f = False
    for k in range(len(X)):
        if k >= buffer_time :
            if er_f[k] == 1 :
                memory_f = True
                ER_f[k] = 1
            elif memory_f and er_f[k-buffer_time:k].any() :
                ER_f[k] = 1
            elif not(er_f[k-buffer_time:k].any())   :
                memory_f = False
            
    Colour_f = np.zeros(len(X))
    for k, ele in enumerate(X):
        if er_f[k] == 1 and ER_f[k] == 1:
            Colour_f[k] = 2
        elif er_f[k] == 0 and ER_f[k] == 1:
            Colour_f[k] = 1
            
    names = np.array(['timestamp', 'status', 'lower quantile', 'upper quantile'])
    features = np.array([time[-5000:], Colour_f[-5000:].astype(int), y_lower[-5000:], y_upper[-5000:]],dtype = object).T
    final = np.vstack((names,features))
       
    return   final,temp


plt.figure(figsize=(20,10))
ret, y = quantile_interval('data_gan/n_anormal_14000_43089_detrended.csv', 'finalized_predictor_up.sav', 'finalized_predictor_low.sav')

plt.plot(ret[1:,1], label = 'color')
plt.plot(ret[1:,3], label = 'y_upper')
plt.plot(ret[1:,2], label = 'y_lower')
plt.plot(y[-5000:], label = 'values')
plt.legend()

<matplotlib.legend.Legend at 0x7fedd5216690>


plt.figure(figsize=(20,10))

ret, y = quantile_interval('data_gan/n_anormal_43089_66900_detrended.csv', 'finalized_predictor_up.sav', 'finalized_predictor_low.sav')

plt.plot(ret[1:,1], label = 'color')
plt.plot(ret[1:,3], label = 'y_upper')
plt.plot(ret[1:,2], label = 'y_lower')
plt.plot(y[-5000:], label = 'values')
plt.legend()

<matplotlib.legend.Legend at 0x7fedd5278a50>


plt.figure(figsize=(20,10))

ret, y = quantile_interval('data_gan/n_anormal_100032_end_detrended.csv', 'finalized_predictor_up.sav', 'finalized_predictor_low.sav')

plt.plot(ret[1:,1], label = 'color')
plt.plot(ret[1:,3], label = 'y_upper')
plt.plot(ret[1:,2], label = 'y_lower')
plt.plot(y[-5000:], label = 'values')
plt.legend()

<matplotlib.legend.Legend at 0x7fedd5023710>

Quantile Regression¶

I. Theory¶

How does quantile regression basically work?¶

What are its avantages?¶

Deepest understanding of the method:¶

Introduction to our dataset :¶

Decision trees :¶

From a forest of trees to a robust prediction¶

Boosting¶

Gradient boosting¶

Theory¶

Algorithm¶

Quantile prediction¶

II. Imports¶

III. Function¶

IV. Plotting¶

V. Interpretation of feature importance¶

VI. Creating an alarm¶

VII. Quantifying the error and choosing the hyper-parameters¶

To visualize the error :¶

VIII. Final training of the predictor¶

IX. Final function¶

X. References¶