Jorge III Altamirano Astorga, Luz Aurora Hernández Martínez, Ita-Andehui Santiago Castillejos.
Prof. Dr. Edgar Francisco Román-Rangel.
We trained our models using Vertex AI in order to have enough computational resources to load as much history as possible within our budget.
import re, os, sys, shelve, time, dill, io
from pickle import PicklingError
from dill import Pickler, Unpickler
shelve.Pickler = Pickler
shelve.Unpickler = Unpickler
from IPython.display import display, Markdown, Math, clear_output, Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plotnine import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
def render_mpl_table(data, col_width=3.0, row_height=0.625, font_size=14,
header_color='#40466e', row_colors=['#f1f1f2', 'w'], edge_color='w',
bbox=[0, 0, 1, 1], header_columns=0,
ax=None, **kwargs):
"""
Taken from https://stackoverflow.com/a/39358722/7323086
"""
if ax is None:
size = (np.array(data.shape[::-1]) + np.array([0, 1])) * np.array([col_width, row_height])
fig, ax = plt.subplots(figsize=size)
ax.axis('off')
mpl_table = ax.table(cellText=data.values, bbox=bbox, colLabels=data.columns, **kwargs)
mpl_table.auto_set_font_size(False)
mpl_table.set_fontsize(font_size)
for k, cell in six.iteritems(mpl_table._cells):
cell.set_edgecolor(edge_color)
if k[0] == 0 or k[1] < header_columns:
cell.set_text_props(weight='bold', color='w', size=font_size*1.05)
cell.set_facecolor(header_color)
else:
cell.set_facecolor(row_colors[k[0]%len(row_colors) ])
plt.show()
#df.dropna(inplace=True)
clear_output()
def performance_plot(history, a=None, b=None,
metrics=["accuracy", "val_accuracy"],
plot_validation=True,
title="Gráficas de Desempeño."):
"""
Prints performance plot from a, to b on a history dict.
Inputs:
history: dict containing "loss" and "accuracy" keys
a: epoch start
b. last epoch
metrics: plot these metrics (train and validation). Always 2.
plot_validation: boolean indicating if validation data should be plotted.
a: from this epoch
b: to this epoch
"""
if a is None:
a = 0
if b is None:
b = len(history['loss'])
a = np.min((a,b))
b = np.max((a,b))
imgrows = (len(metrics) + 1) / 2
imgrows = np.round(imgrows, 0)
imgrows = int(imgrows)
#print(imgrows)
# Plot loss
plt.figure(figsize=(14, 5
*imgrows))
plt.suptitle(title)
plt.subplot(imgrows, 2, 1)
plt.title('Loss')
plt.plot(history['loss'][a:b], label='Training', linewidth=2)
if plot_validation:
plt.plot(history['val_loss'][a:b], label='Validation', linewidth=2)
plt.legend()
plt.xlabel('Epoch')
plt.ylabel(f'Loss')
quantiles = np.quantile(range(a, b),
[.2, .4, .6, .8]).round(0).astype(int)
quantiles = np.insert(quantiles, 0, [a])
quantiles += 1
quantiles = np.append(quantiles, [b-1])
plt.xticks(ticks=quantiles-a,
labels=quantiles)
plt.grid(True)
# Plot accuracy
for i, metric in enumerate(metrics):
#print(f"metric: {metric}, i: {i}")
#print(f"mean metric: {np.mean(history[metric])}")
plt.subplot(imgrows, 2, i+2)
plt.title(metric)
plt.plot(history[metric][a:b], label='Training',
linewidth=2)
if plot_validation:
plt.plot(history["val_" + metric][a:b],
label='Validation', linewidth=2)
plt.legend()
plt.xlabel('Epoch')
plt.ylabel(metric)
#plt.xlim(a, b)
#print(range(0, b-a))
plt.xticks(ticks=quantiles-a,
labels=quantiles)
plt.grid(True)
plt.show()
def model_performance_df(models_path=""):
models = []
object_names = []
objects = {}
for y in [x for x in os.listdir(models_path) if x.endswith("dill")]:
model_path = os.path.join(models_path, y)
with io.open(model_path, 'rb') as file:
object_name = re.sub(r"\.", "_", y)
object_name = re.sub(r"_dill", "", object_name)
objects[object_name] = dill.load(file)
object_names.append(object_name)
model_times = [o
for o in object_names
if o.endswith("_time")]
model_hists = [o
for o in object_names
if o.endswith("_hist")]
model_tsparams = [o
for o in object_names
if o.endswith("_tsparams")]
#print(model_tsparams)
#print(objects["scaler-iaq"])
perf_table = pd.DataFrame({
"Model": [re.sub("_time$", "", model_time) for model_time in model_times],
"Time": [objects[model_time] for model_time in model_times],
"Epochs": [len(objects[model_hist]["loss"]) for model_hist in model_hists],
"Window Size Days": [objects[model_ts]["window_size_days"] for model_ts in model_tsparams],
"Stride": [objects[model_ts]["stride"] for model_ts in model_tsparams],
"Sampling Rate": [objects[model_ts]["sampling_rate"] for model_ts in model_tsparams],
"Batch Size": [objects[model_ts]["batch_size"] for model_ts in model_tsparams],
"MSE": [objects[model_hist]["val_loss"][-1] for model_hist in model_hists],
"MAE": [objects[model_hist]["val_mae"][-1] for model_hist in model_hists],
})
### Remove MinMaxScaler from MAE: ie IAQ scale.
perf_table["MAE"] = (
objects["scaler-iaq"].
inverse_transform( ## do the inverse
## reshape info an array (n_rows, 1)
perf_table["MAE"].to_numpy().reshape(1,-1)
)
).reshape(-1) #reshape array back into (n_rows) shape
perf_table.sort_values("MSE", inplace=True)
perf_table.reset_index(drop=True, inplace=True)
return perf_table
base_path = "data/vertex-models/output-hyper5min/"
perf_table = model_performance_df(base_path)
perf_table
/opt/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/sklearn/base.py:334: UserWarning: Trying to unpickle estimator MinMaxScaler from version 0.22.2.post1 when using version 0.23.2. This might lead to breaking code or invalid results. Use at your own risk. /opt/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/sklearn/base.py:334: UserWarning: Trying to unpickle estimator MinMaxScaler from version 1.0.1 when using version 0.23.2. This might lead to breaking code or invalid results. Use at your own risk.
Model | Time | Epochs | Window Size Days | Stride | Sampling Rate | Batch Size | MSE | MAE | |
---|---|---|---|---|---|---|---|---|---|
0 | model_dnn00 | 217.878893 | 31 | 8 | 1 | 2 | 128 | 0.022354 | 65.318006 |
1 | model_conv00 | 304.261804 | 31 | 8 | 1 | 2 | 128 | 0.022398 | 65.414109 |
2 | model_conv02 | 588.811753 | 31 | 8 | 1 | 2 | 128 | 0.022474 | 65.567544 |
3 | model_rnn00 | 1594.912729 | 31 | 8 | 1 | 2 | 128 | 0.023116 | 66.682703 |
4 | model_best01a | 669.725358 | 31 | 8 | 1 | 2 | 128 | 0.023511 | 67.104972 |
5 | model_lstm00 | 266.832087 | 31 | 8 | 1 | 2 | 128 | 0.024388 | 65.582181 |
6 | model_lstm02 | 323.540315 | 31 | 8 | 1 | 2 | 128 | 0.025541 | 69.957182 |
7 | model_dnn01 | 217.440506 | 31 | 8 | 1 | 2 | 128 | 0.026252 | 70.855299 |
8 | model_dnn02 | 293.978746 | 31 | 8 | 1 | 2 | 128 | 0.034087 | 79.413494 |
9 | model_rnn02 | 1600.787760 | 31 | 8 | 1 | 2 | 128 | 0.042607 | 87.835063 |
10 | model_best03a | 665.194710 | 31 | 8 | 1 | 2 | 128 | 0.043218 | 79.838635 |
11 | model_best03b | 416.240680 | 31 | 8 | 1 | 2 | 128 | 0.047076 | 92.035175 |
Models_list = perf_table["Model"].tolist()
Models = pd.Categorical(perf_table["Model"],
categories=Models_list)
perf_table["Modelo2"] = Models
perf_table_5min = perf_table.copy()
(
ggplot(perf_table.head(5), aes(x="Modelo2", y="MAE", fill="Modelo2")) +
geom_bar(stat="identity") +
#geom_bar(aes(y="mae"), stat="identity") +
labs(y="Mean Absolute Error", x="Models",
title="Plot for Comparing the Models\nPerformance on IAQ Scale."
) +
theme(legend_position="none", axis_text_x=element_text(rotation=45))
)
<ggplot: (8786590715385)>
for i, model in enumerate(perf_table["Model"]):
display(Markdown(f"{i+1}. Model \"{model}\""))
display(Markdown(f""))
base_path = "data/vertex-models/output-hyper2min/"
perf_table = model_performance_df(base_path)
perf_table
/opt/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/sklearn/base.py:334: UserWarning: Trying to unpickle estimator MinMaxScaler from version 1.0.1 when using version 0.23.2. This might lead to breaking code or invalid results. Use at your own risk. /opt/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/sklearn/base.py:334: UserWarning: Trying to unpickle estimator MinMaxScaler from version 0.22.2.post1 when using version 0.23.2. This might lead to breaking code or invalid results. Use at your own risk.
Model | Time | Epochs | Window Size Days | Stride | Sampling Rate | Batch Size | MSE | MAE | |
---|---|---|---|---|---|---|---|---|---|
0 | model_dnn02 | 3842.451737 | 77 | 8 | 1 | 2 | 128 | 0.021211 | 63.329056 |
1 | model_lstm00 | 3538.611173 | 77 | 8 | 1 | 2 | 128 | 0.021481 | 63.077525 |
2 | model_dnn00 | 2902.502979 | 77 | 8 | 1 | 2 | 128 | 0.022219 | 65.089605 |
3 | model_best03b | 5510.362569 | 77 | 8 | 1 | 2 | 128 | 0.024159 | 68.114908 |
4 | model_rnn00 | 18605.987639 | 77 | 8 | 1 | 2 | 128 | 0.024258 | 68.245714 |
5 | model_best01a | 9027.691227 | 77 | 8 | 1 | 2 | 128 | 0.024798 | 68.948664 |
6 | model_conv02 | 8152.342661 | 77 | 8 | 1 | 2 | 128 | 0.026240 | 70.762903 |
7 | model_dnn01 | 2908.461928 | 77 | 8 | 1 | 2 | 128 | 0.026618 | 71.207453 |
8 | model_rnn02 | 19003.077316 | 77 | 8 | 1 | 2 | 128 | 0.030488 | 75.748702 |
9 | model_best03a | 8879.601115 | 77 | 8 | 1 | 2 | 128 | 0.036016 | 81.302087 |
10 | model_lstm02 | 3530.718313 | 77 | 8 | 1 | 2 | 128 | 0.156364 | 182.239953 |
11 | model_conv00 | 4126.700369 | 77 | 8 | 1 | 2 | 128 | 0.919341 | 465.832692 |
Models_list = perf_table["Model"].tolist()
Models = pd.Categorical(perf_table["Model"],
categories=Models_list)
perf_table["Modelo2"] = Models
perf_table_2min = perf_table.copy()
(
ggplot(perf_table.head(5), aes(x="Modelo2", y="MAE", fill="Modelo2")) +
geom_bar(stat="identity") +
#geom_bar(aes(y="mae"), stat="identity") +
labs(y="Mean Absolute Error", x="Models",
title="Plot for Comparing the Models\nPerformance on IAQ Scale."
) +
theme(legend_position="none", axis_text_x=element_text(rotation=45))
)
<ggplot: (8786590684605)>
base_path = "data/vertex-models/output-hyper1min/"
perf_table = model_performance_df(base_path)
perf_table
/opt/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/sklearn/base.py:334: UserWarning: Trying to unpickle estimator MinMaxScaler from version 1.0.1 when using version 0.23.2. This might lead to breaking code or invalid results. Use at your own risk. /opt/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/sklearn/base.py:334: UserWarning: Trying to unpickle estimator MinMaxScaler from version 0.22.2.post1 when using version 0.23.2. This might lead to breaking code or invalid results. Use at your own risk.
Model | Time | Epochs | Window Size Days | Stride | Sampling Rate | Batch Size | MSE | MAE | |
---|---|---|---|---|---|---|---|---|---|
0 | model_rnn02 | 87184.824203 | 100 | 8 | 1 | 2 | 128 | 0.160145 | 178.394232 |
Models_list = perf_table["Model"].tolist()
Models = pd.Categorical(perf_table["Model"],
categories=Models_list)
perf_table["Modelo2"] = Models
perf_table_1min = perf_table.copy()
(
ggplot(perf_table.head(5), aes(x="Modelo2", y="MAE", fill="Modelo2")) +
geom_bar(stat="identity") +
#geom_bar(aes(y="mae"), stat="identity") +
labs(y="Mean Absolute Error", x="Models",
title="Plot for Comparing the Models\nPerformance on IAQ Scale."
) +
theme(legend_position="none", axis_text_x=element_text(rotation=45))
)
<ggplot: (8786590715329)>
(
ggplot(perf_table_5min, aes(x="MAE", y="Time")) +
#### 5 Min
geom_point(color="red") +
geom_text(aes(x=perf_table_5min["MAE"]+35, label="Model",
size=9), color="red") +
### 2 Min
geom_point(aes(x=perf_table_2min["MAE"]+5,
y=perf_table_2min["Time"]-200),
color="blue") +
geom_text(aes(x=perf_table_2min["MAE"]+5+30,
y=perf_table_2min["Time"]-200-150,
label="Model",
size=9), color="blue") +
labs(x="MAE", y="Time",
title="Comparison of Models and Data Sampling."
) +
theme(legend_position="none", axis_text_x=element_text(rotation=45))
).draw(return_ggplot=False)
""
''
perf_table_plotly = perf_table_2min.copy()
perf_table_plotly["Resample"] = "2 Min"
perf_table_plotly = pd.concat([perf_table_plotly, perf_table_5min])
perf_table_plotly.loc[perf_table_plotly["Resample"].isna(),
"Resample"
] = "5 Min"
perf_table_plotly = pd.concat([perf_table_plotly, perf_table_1min])
perf_table_plotly.loc[perf_table_plotly["Resample"].isna(),
"Resample"
] = "1 Min"
perf_table_plotly.reset_index(inplace=True, drop=True)
perf_table_plotly
Model | Time | Epochs | Window Size Days | Stride | Sampling Rate | Batch Size | MSE | MAE | Modelo2 | Resample | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | model_dnn02 | 3842.451737 | 77 | 8 | 1 | 2 | 128 | 0.021211 | 63.329056 | model_dnn02 | 2 Min |
1 | model_lstm00 | 3538.611173 | 77 | 8 | 1 | 2 | 128 | 0.021481 | 63.077525 | model_lstm00 | 2 Min |
2 | model_dnn00 | 2902.502979 | 77 | 8 | 1 | 2 | 128 | 0.022219 | 65.089605 | model_dnn00 | 2 Min |
3 | model_best03b | 5510.362569 | 77 | 8 | 1 | 2 | 128 | 0.024159 | 68.114908 | model_best03b | 2 Min |
4 | model_rnn00 | 18605.987639 | 77 | 8 | 1 | 2 | 128 | 0.024258 | 68.245714 | model_rnn00 | 2 Min |
5 | model_best01a | 9027.691227 | 77 | 8 | 1 | 2 | 128 | 0.024798 | 68.948664 | model_best01a | 2 Min |
6 | model_conv02 | 8152.342661 | 77 | 8 | 1 | 2 | 128 | 0.026240 | 70.762903 | model_conv02 | 2 Min |
7 | model_dnn01 | 2908.461928 | 77 | 8 | 1 | 2 | 128 | 0.026618 | 71.207453 | model_dnn01 | 2 Min |
8 | model_rnn02 | 19003.077316 | 77 | 8 | 1 | 2 | 128 | 0.030488 | 75.748702 | model_rnn02 | 2 Min |
9 | model_best03a | 8879.601115 | 77 | 8 | 1 | 2 | 128 | 0.036016 | 81.302087 | model_best03a | 2 Min |
10 | model_lstm02 | 3530.718313 | 77 | 8 | 1 | 2 | 128 | 0.156364 | 182.239953 | model_lstm02 | 2 Min |
11 | model_conv00 | 4126.700369 | 77 | 8 | 1 | 2 | 128 | 0.919341 | 465.832692 | model_conv00 | 2 Min |
12 | model_dnn00 | 217.878893 | 31 | 8 | 1 | 2 | 128 | 0.022354 | 65.318006 | model_dnn00 | 5 Min |
13 | model_conv00 | 304.261804 | 31 | 8 | 1 | 2 | 128 | 0.022398 | 65.414109 | model_conv00 | 5 Min |
14 | model_conv02 | 588.811753 | 31 | 8 | 1 | 2 | 128 | 0.022474 | 65.567544 | model_conv02 | 5 Min |
15 | model_rnn00 | 1594.912729 | 31 | 8 | 1 | 2 | 128 | 0.023116 | 66.682703 | model_rnn00 | 5 Min |
16 | model_best01a | 669.725358 | 31 | 8 | 1 | 2 | 128 | 0.023511 | 67.104972 | model_best01a | 5 Min |
17 | model_lstm00 | 266.832087 | 31 | 8 | 1 | 2 | 128 | 0.024388 | 65.582181 | model_lstm00 | 5 Min |
18 | model_lstm02 | 323.540315 | 31 | 8 | 1 | 2 | 128 | 0.025541 | 69.957182 | model_lstm02 | 5 Min |
19 | model_dnn01 | 217.440506 | 31 | 8 | 1 | 2 | 128 | 0.026252 | 70.855299 | model_dnn01 | 5 Min |
20 | model_dnn02 | 293.978746 | 31 | 8 | 1 | 2 | 128 | 0.034087 | 79.413494 | model_dnn02 | 5 Min |
21 | model_rnn02 | 1600.787760 | 31 | 8 | 1 | 2 | 128 | 0.042607 | 87.835063 | model_rnn02 | 5 Min |
22 | model_best03a | 665.194710 | 31 | 8 | 1 | 2 | 128 | 0.043218 | 79.838635 | model_best03a | 5 Min |
23 | model_best03b | 416.240680 | 31 | 8 | 1 | 2 | 128 | 0.047076 | 92.035175 | model_best03b | 5 Min |
24 | model_rnn02 | 87184.824203 | 100 | 8 | 1 | 2 | 128 | 0.160145 | 178.394232 | model_rnn02 | 1 Min |
import plotly.express as px
fig = px.scatter(perf_table_plotly,
x="MAE",
y="Time",
color="Resample",
hover_data=["Model"],
title="Comparison of Models and Data Sampling."
)
fig.write_html("paper07_model_comparison.comp.html",
config={"displaylogo": False,
#https://plotly.com/python/configuration-options/
'modeBarButtonsToRemove': ["pan2d", "select2d",
"lasso2d", "zoomIn2d",
"zoomOut2d", "autoScale2d",
#"resetScale2d",
#"drawline", "drawopenpath",
#"drawclosedpath", "drawcircle",
#"drawrect", "eraseshape",
#"hoverClosestGl2d",
"toImage",
"toggleSpikelines", "toggleHover",
"hoverClosestCartesian", "hoverCompareCartesian"
]
})
base_path = "data/vertex-models/output-hyper5min-w15-stride1-samplingrate2/"
perf_table_5min_w15_stride1_samplerate2 = model_performance_df(base_path)
Models_list = perf_table_5min_w15_stride1_samplerate2["Model"].tolist()
Models = pd.Categorical(perf_table_5min_w15_stride1_samplerate2["Model"],
categories=Models_list)
perf_table_5min_w15_stride1_samplerate2["Modelo2"] = Models
perf_table_5min_w15_stride1_samplerate2 = perf_table_5min_w15_stride1_samplerate2.copy()
(
ggplot(perf_table_5min_w15_stride1_samplerate2.head(5),
aes(x="Modelo2", y="MAE", fill="Modelo2")) +
geom_bar(stat="identity") +
#geom_bar(aes(y="mae"), stat="identity") +
labs(y="Mean Absolute Error", x="Models",
title="Plot for Comparing the Models\nPerformance on IAQ Scale."
) +
theme(legend_position="none", axis_text_x=element_text(rotation=45))
)
/opt/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/sklearn/base.py:334: UserWarning: Trying to unpickle estimator MinMaxScaler from version 1.0.1 when using version 0.23.2. This might lead to breaking code or invalid results. Use at your own risk.
<ggplot: (8786589409769)>
base_path = "data/vertex-models/output-hyper5min-w15-stride2-samplingrate2/"
perf_table_5min_w15_stride2_samplerate2 = model_performance_df(base_path)
Models_list = perf_table_5min_w15_stride2_samplerate2["Model"].tolist()
Models = pd.Categorical(perf_table_5min_w15_stride2_samplerate2["Model"],
categories=Models_list)
perf_table_5min_w15_stride2_samplerate2["Modelo2"] = Models
perf_table_5min_w15_stride2_samplerate2 = perf_table_5min_w15_stride2_samplerate2.copy()
(
ggplot(perf_table_5min_w15_stride2_samplerate2.head(5),
aes(x="Modelo2", y="MAE", fill="Modelo2")) +
geom_bar(stat="identity") +
#geom_bar(aes(y="mae"), stat="identity") +
labs(y="Mean Absolute Error", x="Models",
title="Plot for Comparing the Models\nPerformance on IAQ Scale."
) +
theme(legend_position="none", axis_text_x=element_text(rotation=45))
)
/opt/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/sklearn/base.py:334: UserWarning: Trying to unpickle estimator MinMaxScaler from version 1.0.1 when using version 0.23.2. This might lead to breaking code or invalid results. Use at your own risk.
<ggplot: (8786588456561)>
perf_table_plotly = perf_table_5min.copy()
perf_table_plotly["Resample"] = "5 Min, 1 week history, Stride 1, Sample Rate 2"
perf_table_plotly = pd.concat([perf_table_plotly,
perf_table_5min_w15_stride1_samplerate2])
perf_table_plotly.loc[perf_table_plotly["Resample"].isna(),
"Resample"
] = "5 Min, 2 week history, Stride 1, Sample Rate 2"
perf_table_plotly = pd.concat([perf_table_plotly,
perf_table_5min_w15_stride2_samplerate2])
perf_table_plotly.loc[perf_table_plotly["Resample"].isna(),
"Resample"
] = "5 Min, 2 week history, Stride 2, Sample Rate 2"
perf_table_plotly.sort_values("MSE", inplace=True)
perf_table_plotly.reset_index(inplace=True, drop=True)
perf_table_plotly
Model | Time | Epochs | Window Size Days | Stride | Sampling Rate | Batch Size | MSE | MAE | Modelo2 | Resample | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | model_best01a | 289.835371 | 14 | 15 | 2 | 2 | 128 | 0.019915 | 59.518684 | model_best01a | 5 Min, 2 week history, Stride 2, Sample Rate 2 |
1 | model_conv00 | 416.815034 | 28 | 15 | 1 | 2 | 128 | 0.020201 | 60.608897 | model_conv00 | 5 Min, 2 week history, Stride 1, Sample Rate 2 |
2 | model_conv00 | 145.192476 | 14 | 15 | 2 | 2 | 128 | 0.020284 | 59.472804 | model_conv00 | 5 Min, 2 week history, Stride 2, Sample Rate 2 |
3 | model_conv02 | 799.290095 | 28 | 15 | 1 | 2 | 128 | 0.020658 | 61.578502 | model_conv02 | 5 Min, 2 week history, Stride 1, Sample Rate 2 |
4 | model_dnn00 | 261.228681 | 28 | 15 | 1 | 2 | 128 | 0.021193 | 63.077436 | model_dnn00 | 5 Min, 2 week history, Stride 1, Sample Rate 2 |
5 | model_dnn00 | 217.878893 | 31 | 8 | 1 | 2 | 128 | 0.022354 | 65.318006 | model_dnn00 | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
6 | model_conv00 | 304.261804 | 31 | 8 | 1 | 2 | 128 | 0.022398 | 65.414109 | model_conv00 | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
7 | model_lstm00 | 338.718267 | 28 | 15 | 1 | 2 | 128 | 0.022461 | 57.847824 | model_lstm00 | 5 Min, 2 week history, Stride 1, Sample Rate 2 |
8 | model_conv02 | 588.811753 | 31 | 8 | 1 | 2 | 128 | 0.022474 | 65.567544 | model_conv02 | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
9 | model_rnn00 | 1594.912729 | 31 | 8 | 1 | 2 | 128 | 0.023116 | 66.682703 | model_rnn00 | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
10 | model_dnn00 | 76.777939 | 14 | 15 | 2 | 2 | 128 | 0.023332 | 58.157114 | model_dnn00 | 5 Min, 2 week history, Stride 2, Sample Rate 2 |
11 | model_best01a | 669.725358 | 31 | 8 | 1 | 2 | 128 | 0.023511 | 67.104972 | model_best01a | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
12 | model_lstm00 | 266.832087 | 31 | 8 | 1 | 2 | 128 | 0.024388 | 65.582181 | model_lstm00 | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
13 | model_best01a | 890.749112 | 28 | 15 | 1 | 2 | 128 | 0.024402 | 59.030443 | model_best01a | 5 Min, 2 week history, Stride 1, Sample Rate 2 |
14 | model_lstm02 | 323.540315 | 31 | 8 | 1 | 2 | 128 | 0.025541 | 69.957182 | model_lstm02 | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
15 | model_dnn01 | 217.440506 | 31 | 8 | 1 | 2 | 128 | 0.026252 | 70.855299 | model_dnn01 | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
16 | model_lstm00 | 114.022465 | 14 | 15 | 2 | 2 | 128 | 0.032799 | 79.699521 | model_lstm00 | 5 Min, 2 week history, Stride 2, Sample Rate 2 |
17 | model_dnn02 | 293.978746 | 31 | 8 | 1 | 2 | 128 | 0.034087 | 79.413494 | model_dnn02 | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
18 | model_conv02 | 270.753144 | 14 | 15 | 2 | 2 | 128 | 0.034809 | 82.422247 | model_conv02 | 5 Min, 2 week history, Stride 2, Sample Rate 2 |
19 | model_rnn02 | 1600.787760 | 31 | 8 | 1 | 2 | 128 | 0.042607 | 87.835063 | model_rnn02 | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
20 | model_best03a | 665.194710 | 31 | 8 | 1 | 2 | 128 | 0.043218 | 79.838635 | model_best03a | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
21 | model_best03b | 416.240680 | 31 | 8 | 1 | 2 | 128 | 0.047076 | 92.035175 | model_best03b | 5 Min, 1 week history, Stride 1, Sample Rate 2 |
fig = px.scatter(perf_table_plotly,
x="MAE",
y="Time",
color="Resample",
hover_data=["Model"],
title="Comparison of Models and Data Sampling."
)
fig.write_html("paper07_model_comparison.comp-1w-vs-2w.html",
config={"displaylogo": False,
#https://plotly.com/python/configuration-options/
'modeBarButtonsToRemove': ["pan2d", "select2d",
"lasso2d", "zoomIn2d",
"zoomOut2d", "autoScale2d",
#"resetScale2d",
#"drawline", "drawopenpath",
#"drawclosedpath", "drawcircle",
#"drawrect", "eraseshape",
#"hoverClosestGl2d",
"toImage",
"toggleSpikelines", "toggleHover",
"hoverClosestCartesian", "hoverCompareCartesian"
]
})
def model_performance_df(models_path=""):
models = []
object_names = []
objects = {}
for y in [x for x in os.listdir(models_path) if x.endswith("dill")]:
model_path = os.path.join(models_path, y)
with io.open(model_path, 'rb') as file:
object_name = re.sub(r"\.", "_", y)
object_name = re.sub(r"_dill", "", object_name)
objects[object_name] = dill.load(file)
object_names.append(object_name)
model_times = [o
for o in object_names
if o.endswith("_time")]
model_hists = [o
for o in object_names
if o.endswith("_hist")]
model_tsparams = [o
for o in object_names
if o.endswith("_tsparams")]
print(objects["scaler-iaq"])
perf_table = pd.DataFrame({
"Model": [re.sub("_time$", "", model_time) for model_time in model_times],
"Time": [objects[model_time] for model_time in model_times],
"Epochs": [len(objects[model_hist]["loss"]) for model_hist in model_hists],
"Window Size Days": [objects[model_ts]["window_size_days"] for model_ts in model_tsparams],
"Stride": [objects[model_ts]["stride"] for model_ts in model_tsparams],
"Sampling Rate": [objects[model_ts]["sampling_rate"] for model_ts in model_tsparams],
"Batch Size": [objects[model_ts]["batch_size"] for model_ts in model_tsparams],
"MSE": [objects[model_hist]["val_loss"][-1] for model_hist in model_hists],
"MAE": [objects[model_hist]["val_mae"][-1] for model_hist in model_hists],
})
### Remove MinMaxScaler from MAE: ie IAQ scale.
perf_table["MAE"] = (
objects["scaler-iaq"].
inverse_transform( ## do the inverse
## reshape info an array (n_rows, 1)
perf_table["MAE"].to_numpy().reshape(1,-1)
)
).reshape(-1) #reshape array back into (n_rows) shape
perf_table.sort_values("MAE", inplace=True)
perf_table.reset_index(drop=True, inplace=True)
return perf_table
base_path = "data/vertex-models/output-2w/"
perf_table = model_performance_df(base_path)
perf_table
MinMaxScaler()
/opt/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/sklearn/base.py:334: UserWarning: Trying to unpickle estimator MinMaxScaler from version 0.22.2.post1 when using version 0.23.2. This might lead to breaking code or invalid results. Use at your own risk.
Model | Time | Epochs | Window Size Days | Stride | Sampling Rate | Batch Size | MSE | MAE | |
---|---|---|---|---|---|---|---|---|---|
0 | model_best03b | 629.414618 | 28 | 15 | 1 | 2 | 128 | 0.019970 | 57.656134 |
1 | model_best01a | 865.906092 | 28 | 15 | 1 | 2 | 128 | 0.019819 | 58.824076 |
2 | model_best03a | 864.207575 | 28 | 15 | 1 | 2 | 128 | 0.020162 | 60.483345 |
3 | model_dnn01 | 223.165007 | 28 | 15 | 1 | 2 | 128 | 0.020358 | 60.661538 |
Models_list = perf_table["Model"].tolist()
Models = pd.Categorical(perf_table["Model"],
categories=Models_list)
perf_table["Modelo2"] = Models
(
ggplot(perf_table.head(5), aes(x="Modelo2", y="MAE", fill="Modelo2")) +
geom_bar(stat="identity") +
#geom_bar(aes(y="mae"), stat="identity") +
labs(y="Mean Absolute Error", x="Models",
title="Plot for Comparing the Models\nPerformance on IAQ Scale."
) +
theme(legend_position="none", axis_text_x=element_text(rotation=45))
)
<ggplot: (8786600537517)>
for i, model in enumerate(perf_table["Model"]):
display(Markdown(f"{i+1}. Model \"{model}\""))
display(Markdown(f""))
Keras contributors et al. Keras / Code examples / Timeseries / Timeseries forecasting for weather prediction. 2021.
Tensorflow Contributors. Tensorflow: Tutorial on Time series forecastingTime series forecasting. 2021.
Román-Rangel, Francisco. Notas y Código del Curso de Aprendizaje Profundo. 2021.
González-Pérez, Felipe. Notas de aprendizaje de máquina (2020)