update model analysis

2393a0cf · Dilawar Mahmood · c533912b · 2393a0cf
Commit 2393a0cf authored 4 years ago by Dilawar Mahmood
--- a/notebooks/ModelAnalysis.ipynb
+++ b/notebooks/ModelAnalysis.ipynb
@@ -302,7 +302,6 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "\n",
    "try:\n",
    "    metrics_df = plot_graph(\"moments_accountant\", moments=True)\n",
    "    metrics_df = metrics_df.copy()\n",

 %% Cell type:code id: tags:

 ``` 
 PATH = "/home/federated/history/logdir/experiment_name" # Absolute path to the experiment folder
 ```

 %% Cell type:code id: tags:

 ``` 
 import tensorflow as tf
 import sys
 import os
 sys.path.append("../")

 from federated.data.data_preprocessing import load_data
 import numpy as np
 from matplotlib import pyplot as plt
 import itertools
 from sklearn.metrics import classification_report, roc_auc_score, roc_curve, auc, confusion_matrix
 from tensorflow.python.summary.summary_iterator import summary_iterator
 from collections import defaultdict
 import pandas as pd

 pd.options.plotting.backend = "plotly"

 %matplotlib inline
 ```

 %% Cell type:code id: tags:

 ``` 
 import nest_asyncio
 nest_asyncio.apply()
 ```

 %% Cell type:code id: tags:

 ``` 
 """
 Function that evaluates the model.
 Prints accuracy and loss value
 """
 def evaluation(X, y, model):
    scores = model.evaluate(X, y, verbose=0)
    print(f"Accuracy: {scores[1]*100}%, Loss: {scores[0]}")
 ```

 %% Cell type:code id: tags:

 ``` 
 LABELS = ["N", "S", "V", " F", "U"]
 ```

 %% Cell type:code id: tags:

 ``` 
 """
 Function that creates confusion matrix based upon the dataset and the model.
 Displays the confusion matrix.
 """

 def make_confusion_matrix(X,y, model):
    y_pred = model.predict(X)
    y_test = np.argmax(y, axis=1)
    y_pred = np.argmax(y_pred, axis=1)

    cnf_matrix = confusion_matrix(y_test, y_pred)

    plt.figure(figsize=(5,5))
    cnf_matrix =  cnf_matrix.astype('float') / cnf_matrix.sum(axis=1)[:, np.newaxis]
    plt.imshow(cnf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
    plt.colorbar()
    ticks = np.arange(len(LABELS))
    plt.xticks(ticks, LABELS, rotation=45)
    plt.yticks(ticks, LABELS)

    for i, j in itertools.product(range(cnf_matrix.shape[0]), range(cnf_matrix.shape[1])):
        plt.text(j,i,format(cnf_matrix[i,j], '.2f'), horizontalalignment="center",
                    color="white" if cnf_matrix[i, j] > cnf_matrix.max()/2 else "black")

    plt.tight_layout()
    plt.ylabel("True label")
    plt.xlabel("Predicted label")

    plt.savefig(f"{PATH}/images/confusion_matrix.pdf", bbox_inches='tight')

    plt.show()

    return y_test, y_pred
 ```

 %% Cell type:code id: tags:

 ``` 
 """
 Function that loads model and dataset for analysis
 Returns x_test, y_test and model
 """
 def load(name):
    model = tf.keras.models.load_model(PATH)
    X_test, y_test = load_data(data_analysis=True)

    X_test = X_test.reshape(len(X_test), X_test.shape[1],1)
    evaluation(X_test, y_test, model)

    return X_test, y_test, model
 ```

 %% Cell type:code id: tags:

 ``` 
 def dataframe_from_event(path, type, moments=False):

    path += f"/{type}/"
    event_files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
    event_files = list(filter(lambda f: "empty" not in f, event_files))

    event_files_ids = [int(event.split(".")[-2]) for event in event_files]
    index = event_files_ids.index(max(event_files_ids))

    path += event_files[index]

    if moments:
        condition = lambda x : "privacy_loss" in x
    else:
        condition = lambda x : "loss" in x or "accuracy" in  x

    metrics = defaultdict(list)
    for e in summary_iterator(path):
        for v in e.summary.value:
            if isinstance(v.simple_value, float) and condition(v.tag):
                if v.simple_value == 0.0:
                    metrics[f"{v.tag}_{type}"].append(tf.make_ndarray(v.tensor))
                else:
                    metrics[f"{v.tag}_{type}"].append(v.simple_value)

    metrics_df = pd.DataFrame({k: v for k,v in metrics.items()})

    return metrics_df
 ```

 %% Cell type:code id: tags:

 ``` 
 def plot_graph(type, moments=False):

    if type not in ["accuracy", "loss", "moments_accountant"]:
        raise ValueError(f"type must be accuracy or loss, not {type}")

    path = PATH
    if moments:
        moments_df = dataframe_from_event(path, type)
        moments_df= moments_df.rename(columns={"cumulative_privacy_loss_moments_accountant": "cumulative_privacy_loss"})
        return moments_df
    else:
        train_df = dataframe_from_event(path, "train")
        validation_df = dataframe_from_event(path, "validation")

        cols = list(train_df.columns) + list(validation_df.columns)

        return pd.concat([train_df, validation_df], axis=1)[[col for col in cols if type in col]]
 ```

 %% Cell type:code id: tags:

 ``` 
 X_test, y_test, model_centralized = load(PATH)
 ```

 %% Cell type:code id: tags:

 ``` 
 model_centralized.summary()
 ```

 %% Cell type:code id: tags:

 ``` 
 y_test, y_pred = make_confusion_matrix(X_test, y_test, model_centralized)
 ```

 %% Cell type:code id: tags:

 ``` 
 print(classification_report(y_test, y_pred, target_names=LABELS))
 ```

 %% Cell type:code id: tags:

 ``` 
 metrics_df = plot_graph("accuracy")
 metrics_df = metrics_df.copy()

 for col in metrics_df.columns:
    if "validation" in col:
        metrics_df.rename(columns={col: 'Validation Accuracy'}, inplace=True)
    else:
        metrics_df.rename(columns={col: 'Training Accuracy'}, inplace=True)

 fig = metrics_df.plot(labels=dict(index="Epoch", value="Accuracy"))
 fig.show()

 fig.write_image(f"{PATH}/images/accuracy.pdf")
 ```

 %% Cell type:code id: tags:

 ``` 
 metrics_df = plot_graph("loss")
 metrics_df = metrics_df.copy()

 for col in metrics_df.columns:
    if "validation" in col:
        metrics_df.rename(columns={col: 'Validation Loss'}, inplace=True)
    else:
        metrics_df.rename(columns={col: 'Training Loss'}, inplace=True)

 metrics_df.index += 1

 fig = metrics_df.plot(labels=dict(index="Epoch", value="Loss", variable=""))
 fig.show()

 fig.write_image(f"{PATH}/images/loss.pdf")
 ```

 %% Cell type:code id: tags:

 ``` 
-
 try:
    metrics_df = plot_graph("moments_accountant", moments=True)
    metrics_df = metrics_df.copy()

    metrics_df.rename(columns={"cumulative_privacy_loss": "Privacy Loss"}, inplace=True)

    fig = metrics_df.plot(labels=dict(index="Epoch", value="ε", variable=""))
    fig.show()

    fig.write_image(f"{PATH}/images/moments_accountant.pdf")
 except:
    print("No moments accountant.")
 ```