import ruptures as rp
import matplotlib.pyplot as plt


n_samples, dim, sigma = 10000, 3, 2
n_bkps = 2
signal, bkps = rp.pw_constant(n_samples, dim, n_bkps, noise_std=sigma)
signal = signal[:,1]
rp.display(signal, bkps)
plt.show();


pelt = rp.Pelt(jump = 20)
bkps = pelt.fit_predict(signal, 30)
rp.display(signal, bkps)
plt.show();


from math import floor
from ruptures.costs import cost_factory
from ruptures.base import BaseCost, BaseEstimator
import numpy as np

"""
We take off the functions of Pelt class in order to
only modify the functions we are interested by (and not all of them)
"""

cost, min_size, jump, n_samples = cost_factory(model="l2"), None, None, None

def initPelt(model="l2", custom_cost=None, custom_min_size=2, custom_jump=5, params=None):
    """Initialize a Pelt instance.

    Args:
        model (str, optional): segment model, ["l1", "l2", "rbf"]. Not used if ``'custom_cost'`` is not None.
        custom_cost (BaseCost, optional): custom cost function. Defaults to None.
        min_size (int, optional): minimum segment length.
        jump (int, optional): subsample (one every *jump* points).
        params (dict, optional): a dictionary of parameters for the cost instance.
    """
    global cost, min_size, jump, n_samples
    if custom_cost is not None and isinstance(custom_cost, BaseCost):
        cost = custom_cost
    else:
        if params is None:
            cost = cost_factory(model=model)
        else:
            cost = cost_factory(model=model, **params)
    min_size = max(custom_min_size, cost.min_size)
    jump = custom_jump

def segPelt(pen):
    """Computes the segmentation for a given penalty using PELT (or a list
    of penalties).
    Args:
        penalty (float): penalty value
    Returns:
        dict: partition dict {(start, end): cost value,...}
    """
    global cost

    # initialization
    # partitions[t] contains the optimal partition of signal[0:t]
    partitions = dict()  # this dict will be recursively filled
    partitions[0] = {(0, 0): 0}
    admissible = []

    # Recursion
    ind = [k for k in range(0, n_samples, jump) if k >= min_size]
    ind += [n_samples]
    for bkp in ind:
        # adding a point to the admissible set from the previous loop.
        new_adm_pt = floor((bkp - min_size) / jump)
        new_adm_pt *= jump
        admissible.append(new_adm_pt)

        subproblems = list()
        for t in admissible:
            # left partition
            try:
                tmp_partition = partitions[t].copy()
            except KeyError:  # no partition of 0:t exists
                continue
            # we update with the right partition
            tmp_partition.update({(t, bkp): cost.error(t, bkp) + pen})
            subproblems.append(tmp_partition)

        # finding the optimal partition
        partitions[bkp] = min(subproblems, key=lambda d: sum(d.values()))
        # trimming the admissible set
        admissible = [
            t
            for t, partition in zip(admissible, subproblems)
            if sum(partition.values()) <= sum(partitions[bkp].values()) + pen
        ]

    best_partition = partitions[n_samples]
    del best_partition[(0, 0)]
    return best_partition

def fitPelt(signal):
    """Set params.

    Args:
        signal (array): signal to segment. Shape (n_samples, n_features) or (n_samples,).

    Returns:
        self
    """
    global cost, n_samples
    # update params
    cost.fit(signal)
    if signal.ndim == 1:
        (n_samples,) = signal.shape
    else:
        n_samples, _ = signal.shape

def predictPelt(pen):
    """Return the optimal breakpoints.

    Must be called after the fit method. The breakpoints are associated with the signal passed
    to [`fit()`][ruptures.detection.pelt.Pelt.fit].

    Args:
        pen (float): penalty value (>0)

    Returns:
        list: sorted list of breakpoints
    """
    partition = segPelt(pen)
    bkps = sorted(e for s, e in partition.keys())
    return bkps

def fit_predictPelt(signal, pen):
    """Fit to the signal and return the optimal breakpoints.

    Helper method to call fit and predict once

    Args:
        signal (array): signal. Shape (n_samples, n_features) or (n_samples,).
        pen (float): penalty value (>0)

    Returns:
        list: sorted list of breakpoints
    """
    fitPelt(signal)
    return predictPelt(pen)


import pandas as pd

df = pd.read_csv("clean_data_detrended.csv")
data = df.iloc[:,3].to_numpy()

initPelt(custom_jump = 50)
%time bkps = fit_predictPelt(data, 30) # benchmark
rp.display(data,bkps) ;

CPU times: user 28.5 s, sys: 3.66 ms, total: 28.5 s
Wall time: 28.5 s


from copy import deepcopy

def predictPelt(pen):
    global cost
    # initialization
    # partitions[t] contains the optimal partition of signal[0:t]
    partitions = dict()  # this dict will be recursively filled
    partitions[0] = [[(0,0,0)], 0]
    admissible = np.array([], dtype = int)

    # Recursion
    ind = np.array([k for k in range(0, n_samples, jump) if k >= min_size], dtype = int)
    ind = np.concatenate((ind, [n_samples]))

    for bkp in ind:
        # adding a point to the admissible set from the previous loop.
        new_adm_pt = floor((bkp - min_size) / jump)
        new_adm_pt *= jump
        admissible = np.concatenate((admissible, [new_adm_pt]))

        sum_values = np.empty(len(admissible))
        min_index, min_part = 0, {}

        for index, t in enumerate(admissible):
            # left partition
            tmp_partition = deepcopy(partitions[t])
            tmp_cost = cost.error(t, bkp) + pen

            # we update with the right partition
            tmp_partition[0].append((t, bkp, tmp_cost))
            tmp_partition[1] += tmp_cost
            sum_values[index] = tmp_partition[1]

            # update the value of the minimum of the sum
            if sum_values[index] <= sum_values[min_index]:
                min_index = index
                min_part = tmp_partition

        # finding the optimal partition
        partitions[bkp] = min_part

        # trimming the admissible set
        admissible = admissible[sum_values <= sum_values[min_index] + pen]

    best_partition = partitions[n_samples]
    bkps = sorted(b for a,b,c in best_partition[0][1:])
    return bkps


%time bkps = fit_predictPelt(data, 30) # benchmark
rp.display(data,bkps) ;

CPU times: user 2min 58s, sys: 302 ms, total: 2min 58s
Wall time: 2min 58s
[11200, 12100, 12600, 33500, 34450, 35450, 36450, 36800, 37450, 37800, 38150, 38800, 39200, 40100, 40850, 41050, 52700, 52950, 53650, 54700, 54950, 55500, 55850, 56200, 56550, 56900, 57250, 57550, 57900, 58600, 58900, 59450, 59650, 60450, 60700, 61650, 64400, 64700, 65700, 108600, 108850, 109400, 111500, 111750, 112800, 113450, 115050, 115800, 116500, 117050, 117500, 118050, 118550, 119000, 120600, 121650, 122050, 135287]


def predictPelt(pen):
    # initialization
    # partitions[t] contains the optimal partition of signal[0:t]
    partitions = dict()  # this dict will be recursively filled
    partitions[0] = (None, None, 0)
    admissible = np.array([], dtype = int)

    # Recursion
    ind = np.array([k for k in range(0, n_samples, jump) 
                    if k >= min_size], dtype = int)
    ind = np.concatenate((ind, [n_samples]))

    for bkp in ind:
        # adding a point to the admissible set from the previous loop.
        new_adm_pt = floor((bkp - min_size) / jump)
        new_adm_pt *= jump
        admissible = np.concatenate((admissible, [new_adm_pt]))

        sum_values = np.empty(len(admissible))
        min_index, min_part = 0, None

        for index, t in enumerate(admissible):
            sum_values[index] = partitions[t][-1] + cost.error(t, bkp) + pen

            #update the value of the minimum of the sum
            if sum_values[index] <= sum_values[min_index]:
                min_index = index

        # finding the optimal partition
        t = admissible[min_index]
        partitions[bkp] = (partitions[t], bkp, sum_values[min_index])

        # trimming the admissible set
        admissible = admissible[sum_values <= sum_values[min_index] + pen]

    best_partition = partitions[n_samples]

    a,b,c = best_partition
    bkps = []

    while a is not None:
        bkps.append(b)
        a,b,c = a

    bkps = bkps[::-1]

    return bkps


%time bkps = fit_predictPelt(data, 30) # benchmark
rp.display(data,bkps) ;

CPU times: user 22.1 s, sys: 3.82 ms, total: 22.1 s
Wall time: 22.1 s


def predictPelt(pen):
    # initialization
    # partitions[t] contains the optimal partition of signal[0:t]
    partitions = [] # will be recursely filled
    partitions.append((0, 0))
    admissible = np.array([[-1, -1]], dtype=int)

    # Recursion
    ind = np.array([k for k in range(0, n_samples, jump) 
                    if k >= min_size], dtype = int)
    ind = np.concatenate((ind, [n_samples]))

    for index_bkp, bkp in enumerate(ind):
        # adding a point to the admissible set from the previous loop.
        new_adm_pt = [index_bkp, floor((bkp - min_size) / jump) * jump]
        if admissible[0][0] == -1:
            admissible[0] = new_adm_pt
        else:
            admissible = np.concatenate((admissible, [new_adm_pt]))

        sum_values = np.empty(len(admissible))
        min_index = 0

        for index_adm, adm in enumerate(admissible):
            i, t = adm
            sum_values[index_adm] = partitions[i][1] + cost.error(t, bkp) + pen

            #update the value of the minimum of the sum
            if sum_values[index_adm] <= sum_values[min_index]:
                min_index = index_adm

        # finding the optimal partition
        index_t = admissible[min_index][0]
        partitions.append((index_t, sum_values[min_index]))

        # trimming the admissible set
        admissible = admissible[sum_values <= sum_values[min_index] + pen]

    a = partitions[-1][0]
    bkps = [n_samples]

    while a != 0:
        bkps.append(ind[a])
        a = partitions[a][0]

    bkps = bkps[::-1]

    return bkps


%time bkps = fit_predictPelt(data, 30) # benchmark
rp.display(data,bkps) ;

CPU times: user 22.6 s, sys: 23.9 ms, total: 22.6 s
Wall time: 22.6 s

Ruptures¶

I. Introduction¶

II. Example of a PELT application to a randomized signal with different change points¶

III. Optimisation of PELT¶

What do we have to optimise?¶

Where does the algorithm spend its time ?¶

1. Changes in the data structure and minimum calculation¶

2. Deletion of the copy¶

3. Other optimisations (which don't reduce the computation time)¶

IV. Parallelisation of the cost calculation¶

V. Choice of the hyperparameters¶

VI. Post-processing¶