In [ ]:
from preprocessing.load_spatial_data import *
from preprocessing.region_graphs import *
from preprocessing.regions import *
from preprocessing.types import *
from preprocessing.label import *

from mrp.mrp_misc import *
from mrp.mrp import *
from baselines.general import *
from baselines.CNN import *

import pandas as pd
import numpy as np
import networkx as nx

from sklearn.metrics import mean_absolute_error as mae
import autokeras as ak
from tensorflow.keras.models import load_model
import tensorflow as tf

import resource
import os

Parameters. This part of the code is the same as it is in any of the run files.

In [ ]:
# Global parameters


experiment_name = "experiment_name"

# File paths


label_paths = ["/path/to/gdp/raster",
               "/path/to/legacy/dataset/not/used",
              "/path/to/legacy/dataset/not/used",
              "/path/to/legacy/dataset/not/used",
              "/path/to/legacy/dataset/not/used",
               "/path/to/covid/dataset/patientroute/file"]

temp_file_path = "/path/to/temp/files"

optimisation_path = "/path/to/legacy/files/not/used"
working_path = "/path/to/working/directory"

taxonomy_filename = "type_taxonomy_v1.tsv"



# Label parameters

label_bands = [1,1,1,1,1,None]

# GDP
label_ind = 0
label_from_grid = True

# COVID
#label_ind = 5
#label_from_grid = False

label_set = label_paths[label_ind].split("/")[-1].split(".")[0] # atrocious line I know but it's an easy solution


# Spatial parameters

region_size_lat = 0.1 # Only location size used for interpolation; region size is legacy
region_size_lon = 0.1 # Only location size used for interpolation; region size is legacy
location_size_lat = 0.02 # Degrees on map
location_size_lon = 0.02 # Degrees on map

region_params = [region_size_lat,region_size_lon,location_size_lat,location_size_lon]


# MRP parameters

hidden_proportion = 0.8


# Optimisation parameters

MRP_iter = 100
optimisation_epochs = 100
average_loss = True
mutation_rate = 0.2
mutation_intensity = 0.5
train_proportion = 0.8


# CNN parameters

nn_window_height = 17
nn_window_width = 18
nn_validation_split = 0.33467051772273004


##################################
# Bounding boxes #################
##################################

# Taipei bboxes

bbox_taipei_bl = (121.3485,24.8192)
bbox_taipei_tr = (121.7760,25.2465)
shp = "/path/to/taiwan/shapefiles"
taipei_dict = {"bbox_bl":bbox_taipei_bl,"bbox_tr":bbox_taipei_tr,"shp":shp}

# Taichung bboxes

bbox_taichung_bl = (120.3854,23.9724)
bbox_taichung_tr = (120.9129,24.3997)
shp = "/path/to/taiwan/shapefiles"
taichung_dict = {"bbox_bl":bbox_taichung_bl,"bbox_tr":bbox_taichung_tr,"shp":shp}

# Seoul bboxes

bbox_seoul_bl = (126.7938,37.4378)
bbox_seoul_tr = (127.3454,37.7072)
shp = "/path/to/korea/shapefiles"
seoul_dict = {"bbox_bl":bbox_seoul_bl,"bbox_tr":bbox_seoul_tr,"shp":shp}

# Daegu bboxes

bbox_daegu_bl = (128.4298,35.7642)
bbox_daegu_tr = (128.7956,35.9772)
shp = "/path/to/korea/shapefiles"
daegu_dict = {"bbox_bl":bbox_daegu_bl,"bbox_tr":bbox_daegu_tr,"shp":shp}


regions = [taipei_dict,taichung_dict,seoul_dict,daegu_dict,busan_dict,amsterdam_dict]


# Set train/test here
shapefile_path_train = taichung_dict['shp']
bbox_train_bl = taichung_dict['bbox_bl']
bbox_train_tr = taichung_dict['bbox_tr']

shapefile_path_test = daegu_dict['shp']
bbox_test_bl = daegu_dict['bbox_bl']
bbox_test_tr = daegu_dict['bbox_tr']



training_set_name = ""
if(bbox_train_bl == bbox_taipei_bl):
    training_set_name = "Taipei"
elif(bbox_train_bl == bbox_taichung_bl):
    training_set_name = "Taichung"
elif(bbox_train_bl == bbox_seoul_bl):
    training_set_name = "Seoul"
elif(bbox_train_bl == bbox_daegu_bl):
    training_set_name = "Daegu"
elif(bbox_train_bl == bbox_busan_bl):
    training_set_name = "Busan"
elif(bbox_train_bl == bbox_amsterdam_bl):
    training_set_name = "Amsterdam"

test_set_name = ""
if(bbox_test_bl == bbox_taipei_bl):
    test_set_name = "Taipei"
elif(bbox_test_bl == bbox_taichung_bl):
    test_set_name = "Taichung"
elif(bbox_test_bl == bbox_seoul_bl):
    test_set_name = "Seoul"
elif(bbox_test_bl == bbox_daegu_bl):
    test_set_name = "Daegu"
elif(bbox_test_bl == bbox_busan_bl):
    test_set_name = "Busan"
elif(bbox_test_bl == bbox_amsterdam_bl):
    test_set_name = "Amsterdam"



# Variable parameters


# Type parameters; these were determined beforehand using SMAC

type_frequency_ratio = 0.5887024334058893
type_top_n = 10
type_top_n_percent = 20
type_top_n_variable = 15

type_params = [type_frequency_ratio,type_top_n,type_top_n_percent,type_top_n_variable]

region_min_objects = 0



# Methods; these were determined beforehand using SMAC

# Possible values: "replace","drop"
missing_value_method = "replace"

# Possible values: "frequency,top,top_percent,top_variable,taxonomy,none"
type_filter_method = "frequency"

# Possible values: "unit","z_score","mean_norm","none"
feature_normalisation_method = "none"

Auto-Keras. The cells below will first preprocess the data as in the run files, after which auto-keras is used to perform Neural Architecture Search. The final model will be saved at the specified path. WARNING: this code will use a lot of storage when run.

In [ ]:
# Preprocessing

hidden_proportion_train = 0.8
hidden_proportion_test = 0.5

# Preprocess train+test set

S = load_spatial_data(shapefile_path_train,missing_value_method)
S = clip_area(S,bbox_train_bl,bbox_train_tr)

S,types = find_types(S,optimisation_path,working_path,type_filter_method,type_params,
                     taxonomy_filename=taxonomy_filename,verbose=False)

S = compute_centroids(S)
region_bounds = compute_region_bounds(S,location_size_lat,location_size_lon)
regions,region_bounds = assign_objects_to_regions(S,region_bounds,region_min_objects=region_min_objects)
super_G = create_super_graph_raw(regions,region_bounds,types,location_size_lat,location_size_lon)
super_H_train,width_train,height_train = convert_super_G(super_G,S,label_paths[label_ind],region_params,hidden_proportion_train,
                                                                    from_grid=label_from_grid)



S = load_spatial_data(shapefile_path_test,missing_value_method)
S = clip_area(S,bbox_test_bl,bbox_test_tr)

S,types = find_types(S,optimisation_path,working_path,type_filter_method,type_params,
                     taxonomy_filename=taxonomy_filename,verbose=False)

S = compute_centroids(S)
region_bounds = compute_region_bounds(S,location_size_lat,location_size_lon)
regions,region_bounds = assign_objects_to_regions(S,region_bounds,region_min_objects=region_min_objects)
super_G = create_super_graph_raw(regions,region_bounds,types,location_size_lat,location_size_lon)
super_H_test,width_test,height_test = convert_super_G(super_G,S,label_paths[label_ind],region_params,hidden_proportion_test,
                                                                    from_grid=label_from_grid)
In [ ]:
project_name = project_name
model_name = project_name_50
max_trials = 50

X_train, y_train = graph_to_tensor_train(super_H_train,nn_window_height,nn_window_width)
X_test, y_test = graph_to_tensor_test(super_H_test,nn_window_height,nn_window_width)

# Auto-Keras

reg = ak.ImageRegressor(
    overwrite=False,
    max_trials=max_trials,
    directory="/path/to/autokeras/storage", # Make sure to have enough space free; it takes A LOT
    project_name=project_name,
    loss="mean_absolute_error")

reg.fit(X_train, y_train, validation_split=nn_validation_split)

model = reg.export_model()

try:
    model.save("/path/to/model/saving/directory" + model_name, save_format="tf")
except:
    model.save("/path/to/model/saving/directory" + model_name + ".h5")