In [1]:
import Orange
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os

The code below was used to generate the plots used in the paper. If replicating our plots, please ensure to to use the correct path to the raw result files.

In [2]:
# This is just for nicer-looking results

names = {
    "ordinary":"Ordinary Kriging",
    "universal":"Universal Kriging",
    "simple":"Basic regression",
    "SAR":"SAR",
    "MA":"MA",
    "ARMA":"ARMA",
    "CNN":"CNN",
    "static":"SD-MRP",
    "weights":"WP-MRP",
    "optimisation":"O-MRP"
}

Aggregate raw results into a more convenient single CSV

In [3]:
def summarise_results(dataset):
    # Returns a string containing .csv style lines of results
    
    d = os.fsencode("path/to/results" + dataset + "/")
    s = ""
    for file in os.listdir(d):
        name_split = file.decode().split('.')[0].split('_')
        if(name_split[0] != "optimisation"):
            if(name_split[0] == "ordinary" or name_split[0] == "static" or name_split[0] == "universal"):
                condition = dataset + "_" + name_split[2] + "_" + name_split[3]
            else:
                condition = dataset + "_" + name_split[1] + "_" + name_split[2]

            df = pd.read_csv(d.decode() + file.decode())
            df = df.loc[df['value'] < 999999]
            df = df.sort_values('proportion_test')
            means = df.groupby(['proportion_test'], as_index=False)['value'].mean()['value']
            df = df.drop_duplicates(subset=("proportion_test"), keep='first', inplace=False)
            df['mean'] = means.values

            x = df['proportion_test'].values
            y_mean = df['mean'].values

            i = 0
            for prop in x:
                name = names[name_split[0]]

                line = name + "," + condition + str(x[i]) + "," + str(y_mean[i]) + "\n"
                i += 1
                s += line
    return(s)

# Save results for both datasets
s = "algorithm,condition,loss\n"
s += summarise_results("GDP")
s += summarise_results("Covid")

with open("results.csv","w") as fp:
    fp.write(s)

Generate critical difference diagram

In [4]:
def create_cd(path,save_fig=False,save_name=None):
    # Use the previously generated .csv file to plot a
    # critical difference diagram
    
    df = pd.read_csv(path)
    conditions = {}
    for i,r in df.iterrows():
        cond = r['condition']
        entry = {r['algorithm']:r['loss']}
        if(cond in conditions):
            conditions[cond].append(entry)
        else:
            conditions[cond] = [entry]

    rankings = {
        "Ordinary Kriging":0,
        "Universal Kriging":0,
        "Basic regression":0,
        "SAR":0,
        "MA":0,
        "ARMA":0,
        "CNN":0,
        "SD-MRP":0,
        "WP-MRP":0
    }

    i = 0
    for cond,l in conditions.items():
        ordered_list = []
        for d in l:
            for k,v in d.items(): # Only one but easiest way to access it
                ordered_list.append(v)
        ordered_list.sort()

        # Find corresponding index for ranking (add 1)
        for d in l:
            for k,v in d.items():
                r = ordered_list.index(v) + 1
                # Add
                rankings[k] = rankings[k] + r


        i += 1



    for k,v in rankings.items():
        rankings[k] = rankings[k] / i


    names_sd = []
    avranks = []

    for k,v in rankings.items():
        names_sd.append(k)
        avranks.append(v)


    cd = Orange.evaluation.compute_CD(avranks, i,alpha="0.05", test="nemenyi")
    Orange.evaluation.graph_ranks(avranks, names_sd, cd=cd, width=5, textspace=1.5)
    
    if(save_fig):
        if(save_name == None):
            print("aaaaah")
            asdkf #intentional crash
            # To any reviewer who actually happens to see this, hope it made you laugh!
            
        plt.savefig(save_name)
    plt.show()
In [5]:
create_cd("results.csv",save_fig=True,save_name="figures/CD_all_nemenyi.pdf")

Generate parallel coordinate plot

In [6]:
def create_parallel_coordinates(path,save_fig=False,save_name=None):
    # Use the previously generated .csv file to plot parallel coordinates
    
    cond_names = {
        'GDP_Daegu_Daegu':"GDP DA-DA", 
        'GDP_Seoul_Daegu':"GDP SE-DA", 
        'GDP_Seoul_Taipei':"GDP SE-TP", 
        'GDP_Taichung_Daegu':"GDP TC-DA", 
        'GDP_Taichung_Taipei':"GDP TC-TP", 
        'GDP_Taipei_Taipei':"GDP TP-TP", 
        'Covid_Daegu_Daegu':"CoV DA-DA", 
        'Covid_Seoul_Daegu':"CoV SE-DA"
    }
    
    
    df = pd.read_csv(path)

    algs = {}
    conditions = []
    for i,r in df.iterrows():
        alg = r['algorithm']
        cond = r['condition'][:-3]
        loss = r['loss']

        cond = cond_names[cond]
        
        if(alg in algs):
            existing = algs[alg]
            if(cond in existing):
                algs[alg][cond].append(loss)
            else:
                algs[alg][cond] = [loss]
        else:
            algs[alg] = {cond:[loss]}
            

        if(cond not in conditions):
            conditions.append(cond)




    for alg,conds in algs.items():
        for cond,l in conds.items():
            conds[cond] = np.mean(l)

    
    l = []
    for alg,conds in algs.items():
        d = {"Algorithm":alg}
        for cond,val in conds.items():
            d[cond] = val
        l.append(d)


    df_new = pd.DataFrame(l)
    
    for cond in conditions:
        if(cond != cond_names["Covid_Daegu_Daegu"] and cond != cond_names["Covid_Seoul_Daegu"]):
            df_new[cond] = df_new[cond] / 15000

    fig = plt.figure(figsize=(8, 4))
    ax = fig.add_subplot(111)
    plt.title("Performance per condition",fontsize=20)
    plt.ylabel("MAE", fontsize=14)
    plt.ylim(0,9)

    # Colour blindness-proof colour scheme
    colors = [
        "#222255",
        "#E69F00",
        "#56B4E9",
        "#009E73",
        "#F0E442",
        "#D55E00",
        "#0072B2",
        "#CC79A7",
        "#000000",
    ]
    pd.plotting.parallel_coordinates(df_new,'Algorithm',ax=ax,color=colors).legend(loc='center left',
                                                                bbox_to_anchor=(1.0, 0.5))

   
    if(save_fig):
        if(save_name != None):
            plt.savefig(save_name,bbox_inches = 'tight')

    
    

    
In [7]:
create_parallel_coordinates("results.csv",save_fig=False,save_name="figures/PC_all_colours.pdf")

Generate stacked bar plot

In [8]:
def create_prop_bar(path,save_fig=False,save_name=None):
    # Use the previously generated .csv file to create a stacked bar plot
    # for p setting sensitivity per algorithm
    
    df = pd.read_csv(path)

    algs = {}
    conditions = []
    for i,r in df.iterrows():
        alg = r['algorithm']
        cond = r['condition'][-3:]
        loss = r['loss']

        
        if(alg in algs):
            existing = algs[alg]
            if(cond in existing):
                algs[alg][cond].append(loss)
            else:
                algs[alg][cond] = [loss]
        else:
            algs[alg] = {cond:[loss]}
            

        if(cond not in conditions):
            conditions.append(cond)


    for alg,conds in algs.items():
        for cond,l in conds.items():
            conds[cond] = np.mean(l)

    
    l = []
    for alg,conds in algs.items():
        d = {"Algorithm":alg}
        for cond,val in conds.items():
            d[cond] = val
        l.append(d)


    df_new = pd.DataFrame(l)
    df_new = df_new.sort_values("Algorithm",ascending=False)
    
    df_new["sum"] = df_new["0.1"] + df_new["0.3"] + df_new["0.5"] + df_new["0.7"] + df_new["0.9"]
    df_new["0.1"] = df_new["0.1"] / df_new["sum"]
    df_new["0.3"] = df_new["0.3"] / df_new["sum"]
    df_new["0.5"] = df_new["0.5"] / df_new["sum"]
    df_new["0.7"] = df_new["0.7"] / df_new["sum"]
    df_new["0.9"] = df_new["0.9"] / df_new["sum"]
    
    df_new = df_new.drop(["sum"],axis=1)
    
    fig = df_new.plot( 
        x = 'Algorithm', 
        kind = 'barh', 
        stacked = True, 
        title = 'Loss per proportion of hidden values', 
        mark_right = True
    ).get_figure()
    

    if(save_fig):
        if(save_name != None):
            fig.savefig(save_name,bbox_inches = 'tight')
In [9]:
create_prop_bar("results.csv",save_fig=False,save_name="figures/prop_bar.pdf")