from IPython.display import Image


Image("images_emd/imageemd1.png")


Image("images_emd/imageemd2.png")


import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from PyEMD import EMD

#The detrended data
df = pd.read_csv('clean_data_detrended.csv')


debut = 50000
delta = 5000
fin = debut + delta

#We want to display the temperature feature, where the anomalies are the most visible
t=np.arange(debut,fin+1)
s=np.array(df.loc[debut:fin,'Température palier étage 1'].astype(float))

def generateIMF(t,s):
    return EMD().emd(s,t)

def displayIMF(imf):
    plt.figure(figsize=(50,20))
    N = imf.shape[0]+1
    nbre_inf = N     #The number of IMFs we want to display, it stops when there is no more
    
    plt.subplot(nbre_inf+1,1,1)
    plt.plot(df.loc[debut:fin,'Température palier étage 1'].astype(float))
    plt.plot(df.loc[debut:fin,'labels'])
    plt.title("Signal original")
    plt.xlabel("Time [s]")
    
    i=0
    for n, imf in enumerate(imf):
        plt.subplot(nbre_inf+1,1,n+2)
        plt.title("IMF "+str(n+1))
        plt.xlabel("Time [s]")
        plt.plot(t,imf,'g')
        i+=1
        if i>=nbre_inf:
            break


IMF = generateIMF(t,s)
displayIMF(IMF)


from scipy.misc import derivative
from scipy.signal import hilbert,chirp

def hilb_mod(signal):
    z = hilbert(signal)
    return abs(z)

def hilb_freq(signal, dx):
    z = hilbert(signal)
    theta = np.angle(z)
    omega = np.diff(np.unwrap(theta))/dx
    return omega


def displaya(imf, nbre_imf = IMF.shape[0]+1):
    plt.figure(figsize=(50,20))
    
    plt.subplot(nbre_imf+1,1,1)
    plt.plot(df.loc[debut:fin,'Température palier étage 1'].astype(float))
    plt.plot(df.loc[debut:fin,'labels'])
    plt.title("Signal x(t)")
    plt.xlabel("Time [s]")
    
    i=0
    for n, imf in enumerate(IMF):
        plt.subplot(nbre_imf+1,1,n+2)
        plt.title("IMF "+str(n+1))
        plt.xlabel("Time [s]")
        plt.plot(t, hilb_mod(imf), 'g')
        i+=1
        if i>=nbre_imf:
            break


IMF = generateIMF(t,s)
taille = IMF.shape[0]+1
displaya(IMF, taille)


def displayf(imf, nbre_imf = IMF.shape[0]+1):
    plt.figure(figsize = (50,20))
    
    plt.subplot(nbre_imf+1,1,1)
    plt.plot(df.loc[debut:fin,'Température palier étage 1'].astype(float))
    plt.plot(df.loc[debut:fin,'labels'])
    plt.title("Signal x(t)")
    plt.xlabel("Time [s]")
    
    i=0
    for n, imf in enumerate(IMF):
        plt.subplot(nbre_imf+1, 1, n+2)
    
        plt.title("IMF " + str(n+1))
        plt.xlabel("Time [s]")
        plt.plot(t[:-1], hilb_freq(imf,1), 'g')
        i += 1
        if i >= nbre_imf:
            break


IMF = generateIMF(t,s)
taille = IMF.shape[0]+1
displayf(IMF, taille)


# Useful function to invert lines
def inv_ligne(matrice):
    L = []
    for i in range(len(matrice)):
        l = []
        for j in range(len(matrice[0])):
            l.append(matrice[i][j])
        L.append(l)
    L.reverse()
    return np.array(L)

mpl.rcParams['figure.figsize']=[15,10]

def hilb_spec(s,t,img_height):
    mi_f, ma_f = 0,0
    n = len(t)-1
    image = np.zeros((img_height,n//20+1))
    freq = hilb_freq(s,1)
    amp = hilb_mod(s)[:-1]
    ma_amp = np.max(amp)
    mi_f = min(mi_f,np.min(freq))
    ma_f = max(ma_f,np.max(freq))
    for i in range(n):
        t_bin = i//20
        if freq[i] <= 0:
            continue
        p_bin = int(np.log2(1/freq[i])*10)
        if p_bin >= img_height or p_bin < 0: 
            continue
        image[p_bin][t_bin] += ma_amp-amp[i]
    image = inv_ligne(image)
    return image, freq, amp, mi_f, ma_f

def hilb_spec_imf(s,t,img_height,imfs):
    n = len(t) - 1
    im_fin = np.zeros((img_height,n//20+1))+0.01
    i=0
    for h in imfs:
        i += 1
        #if i>3 :
         #   break
        image, freq, amp, mi_f, ma_f = hilb_spec(h,t,img_height)
        im_fin += image
        
    plt.imshow(np.log(im_fin),cmap = plt.cm.plasma)
    plt.ylabel('period')
    plt.xlabel('time')

    y_ticks_location = np.arange(0,img_height,20)
    y_ticks_labels = np.arange(img_height,0,-20)
    plt.yticks(y_ticks_location,y_ticks_labels.astype(int))

    x_ticks_location = np.linspace(0,250,10)
    plt.xticks(x_ticks_location,np.linspace(debut,fin,10).astype(int))
    cbar=plt.colorbar()
    cbar.set_label('amplitude')


IMF = generateIMF(t,s)
hilb_spec_imf(s,t,300,IMF)


# We display from 0 to 5000
debut = 0
fin = debut + delta


t=np.arange(debut,fin+1)
s=np.array(df.loc[debut:fin,'Température palier étage 1'].astype(float))

IMF = generateIMF(t,s)
hilb_spec_imf(s,t,300,IMF)


debut = 0
delta = 5000
fin = debut + delta

t=np.arange(debut,fin+1)
s=np.array(df.loc[debut:fin,'Température palier étage 1'].astype(float))

plt.plot(t,s)

[<matplotlib.lines.Line2D at 0x7f4e2b30f090>]


IMF = generateIMF(t,s)
displaya(IMF,1)

# We plot a threshold of 0.1
plt.plot(t,np.ones(len(t))*0.1)

[<matplotlib.lines.Line2D at 0x7f4e2aeb2fd0>]


debut = 50000
delta = 5000
fin = debut + delta

t=np.arange(debut,fin+1)
s=np.array(df.loc[debut:fin,'Température palier étage 1'].astype(float))

plt.plot(t,s)

[<matplotlib.lines.Line2D at 0x7f4e2b221890>]


IMF = generateIMF(t,s)
displaya(IMF,1)

# We plot a threshold of 0.1
plt.plot(t,np.ones(len(t))*0.1)

[<matplotlib.lines.Line2D at 0x7f4e2afcbc10>]


def mean_std_sign(s,t):
    #This function computes the mean and standard deviation of the amplitude of the first IMF of a signal, 
    #the slicing is used to get rid of side effects 
    imfs = EMD().emd(s,t)
    imf1 = imfs[0]
    module = hilb_mod(imf1)[50:fin-debut-50]
    return (np.mean(module),np.std(module))

def mean_std_donn(l, begin, end, delta, shift = 1000):
    #This function gives the moving average of the amplitude of the first IMF of a signal l(t)
    
    debut = begin
    fin = debut + delta
    moyenne = []
    ecart = []
    
    for i in range((end - delta)//shift):
        
        t = np.arange(debut,fin+1)
        s = np.array(l.loc[debut:fin,'Température palier étage 1'].astype(float))
        
        if np.all(l.loc[debut:fin,'labels'] == np.zeros((fin-debut+1))) == True: #Here we only consider slots where there is no problem 
            p = mean_std_sign(s,t)
            moyenne.append(p[0])
            ecart.append(p[1])
        debut += shift
        fin += shift
    return moyenne, ecart


total_mean, total_std = mean_std_donn(df, 0, 130000, 5000)
plt.plot(total_mean)
plt.plot(total_std)

[<matplotlib.lines.Line2D at 0x7f4e2af3ff50>]


globalMean = np.mean(total_mean)
globalStd = np.mean(total_std)
print(globalMean, globalStd)

0.02646221315125416 0.022649730749175017


def alarme_EMD(l,requested_column, begin, end, alpha, delta = 5000, shift = 144, N = 24,buffer = 500):
    
    debut = begin
    fin = delta + begin
    predict = [0 for i in range(delta-shift)] #the delta-shift first points of the data are only usd to predict the others so      labelled at 0
    
    for i in range((end-delta-begin)//shift):
        t = np.arange(debut,fin + 1)
        s = np.array(l.loc[debut:fin,requested_column].astype(float))
        imfs = EMD().emd(s,t)
        module = hilb_mod(imfs[0])
        
        for j in range(delta-shift,delta):
            if module[j] > globalMean + alpha*globalStd:
                predict.append(2)
            else:
                predict.append(0)
        debut += shift
        fin += shift
    
    for k in range(1,len(predict)-N-1):
        if predict[k] == 2 and predict[k-1] == 0:
            if predict[k:k+N] != [2 for i in range(N)]:
                predict[k:k+N] = [0 for i in range(N)]
    
    for j in range(buffer,len(predict)):
        if predict[j] == 0:
            if  2 in (predict[j - buffer : j]) :
                predict[j] = 1
    return predict   #In the real process we only return the last "delta" values but here, to visualize the alarme
                     #on the whole signal, we don't crop it


plt.figure(figsize=(50,20))

debut = 0
fin = 135000

plt.plot(df.loc[debut:fin,'labels'])
plt.plot(df.loc[debut:fin,'Température palier étage 1'].astype(float));
l = alarme_EMD(df,'Température palier étage 1',  begin = debut, end = fin, alpha = 1.8, delta = 5000 , shift = 144 , N = 24, buffer = 500)
plt.plot(l)
plt.xticks(np.arange(debut, fin, 2000.0))
plt.grid()


def choice_parameter(alpha, N):
    predict = alarme_EMD(df, 'Température palier étage 1', begin = 0, end = 135000, alpha = alpha, delta = 5000 , shift = 144, N = N, buffer = 500)
    vrais_pos_det = []
    labels = np.array(df.loc[0:135000,'labels'])
    for i in range(0,134000):
        if predict[i] == 2:
            predict[i] = 1
        if labels[i] < 0:
            labels[i] = 0
        if predict[i] == 1 and labels[i] != 0:
            vrais_pos_det.append(1)
        if labels[i] != 0:
            labels[i] = 1
    recall = sum(vrais_pos_det)/sum(labels)
    prec = sum(vrais_pos_det)/sum(predict)
    return recall, prec


#for i in range(10):
#    recall, prec = choice_parameter(1.5 + 0.1*i, 20)
#    print(recall, prec)


#for i in range(10):
#    recall, prec = choice_parameter(2.5 + 0.1*i, 20)
#    print(recall, prec)


#for i in range(10):
#    recall, prec = choice_parameter(2.8, 15 + 2*i)
#    print(recall, prec)


#for i in range(10):
#    recall, prec = choice_parameter(2.5 + 0.1*i, 27)
#    print(recall, prec)


choice_parameter(2.8, 27)

(0.7950337150808091, 0.5263108171941426)


plt.figure(figsize=(50,20))

debut = 0
fin = 135000

plt.plot(df.loc[debut:fin,'labels'])
plt.plot(df.loc[debut:fin,'Température palier étage 1'].astype(float));
l = alarme_EMD(df, 'Température palier étage 1', begin = debut, end = fin, alpha = 2.8, delta = 5000 , shift = 144 , N = 27, buffer = 500)
plt.plot(l)
plt.xticks(np.arange(debut, fin, 2000.0))
plt.grid()


### Hilbert spectrum modified to output the adequate red/orange/green zones
mpl.rcParams['figure.figsize']=[15,10]
def hilb_spec_imf(s,t,img_height):
    imfs=EMD().emd(s,t)
    n=len(t)-1
    im_fin=np.zeros((img_height,n//20+1))+0.01
    i=0
    for imf in imfs:
        i+=1
        #if i>3 :
         #   break
        image, freq, amp, mi_f, ma_f = hilb_spec(imf,t,img_height)
        im_fin += image
    predict=alarme_EMD(df,'Température palier étage 1', begin = debut-5000, end = fin, alpha = 2.2, delta =5000 , shift =144 , N =27, buffer=500 )[5000:]
    for i in range(len(predict)):
        if predict[i] ==2:
            im_fin[:, i//20]+=0.05
        elif predict[i] ==1:
            im_fin[:, i//20]+=0.005
        
    plt.imshow(np.log(im_fin),cmap=plt.cm.inferno)
    plt.ylabel('period')
    plt.xlabel('time')

    y_ticks_location = np.arange(0,img_height,20)
    y_ticks_labels = np.arange(img_height,0,-20)
    plt.yticks(y_ticks_location,y_ticks_labels.astype(int))

    x_ticks_location = np.linspace(0,250,10)
    plt.xticks(x_ticks_location,np.linspace(debut,fin,10).astype(int))
    cbar=plt.colorbar()
    cbar.set_label('amplitude')
    plt.savefig('try')


debut=29000
fin=34000
t=np.arange(debut,fin+1)
s=np.array(df.loc[debut:fin,'Température palier étage 1'].astype(float))
hilb_spec_imf(s,t,250)


plt.figure(figsize=(10,10))

debut = 29000
fin = 34000

plt.plot(df.loc[debut:fin,'labels'])
plt.plot(df.loc[debut:fin,'Température palier étage 1'].astype(float));
l = alarme_EMD(df,'Température palier étage 1',  begin = debut-5000, end = fin, alpha = 1.8, delta = 5000 , shift = 144 , N = 24, buffer = 500)[-5000:]
t=np.arange(debut,fin)
plt.plot(t[-len(l):],l)
plt.xticks(np.arange(debut, fin, 2000.0))
plt.grid()

EMD¶

I. Implementation and display¶

Generate IMFs¶

Hilbert transform on IMFs¶

Hilbert spectrum¶

II. Detection of the anomalies¶

Using a threshold¶

An improvement of the threshold method¶

Simulation of real-time process¶

III. Choice of parameters and the error¶