from preprocessing.load_spatial_data import *
from preprocessing.region_graphs import *
from preprocessing.regions import *
from preprocessing.types import *
from preprocessing.label import *
from mrp.mrp_misc import *
from mrp.mrp import *
from baselines.general import *
from baselines.CNN import *
import pandas as pd
import numpy as np
import networkx as nx
from sklearn.metrics import mean_absolute_error as mae
import autokeras as ak
from tensorflow.keras.models import load_model
import tensorflow as tf
import resource
import os
Parameters. This part of the code is the same as it is in any of the run files.
# Global parameters
experiment_name = "experiment_name"
# File paths
label_paths = ["/path/to/gdp/raster",
"/path/to/legacy/dataset/not/used",
"/path/to/legacy/dataset/not/used",
"/path/to/legacy/dataset/not/used",
"/path/to/legacy/dataset/not/used",
"/path/to/covid/dataset/patientroute/file"]
temp_file_path = "/path/to/temp/files"
optimisation_path = "/path/to/legacy/files/not/used"
working_path = "/path/to/working/directory"
taxonomy_filename = "type_taxonomy_v1.tsv"
# Label parameters
label_bands = [1,1,1,1,1,None]
# GDP
label_ind = 0
label_from_grid = True
# COVID
#label_ind = 5
#label_from_grid = False
label_set = label_paths[label_ind].split("/")[-1].split(".")[0] # atrocious line I know but it's an easy solution
# Spatial parameters
region_size_lat = 0.1 # Only location size used for interpolation; region size is legacy
region_size_lon = 0.1 # Only location size used for interpolation; region size is legacy
location_size_lat = 0.02 # Degrees on map
location_size_lon = 0.02 # Degrees on map
region_params = [region_size_lat,region_size_lon,location_size_lat,location_size_lon]
# MRP parameters
hidden_proportion = 0.8
# Optimisation parameters
MRP_iter = 100
optimisation_epochs = 100
average_loss = True
mutation_rate = 0.2
mutation_intensity = 0.5
train_proportion = 0.8
# CNN parameters
nn_window_height = 17
nn_window_width = 18
nn_validation_split = 0.33467051772273004
##################################
# Bounding boxes #################
##################################
# Taipei bboxes
bbox_taipei_bl = (121.3485,24.8192)
bbox_taipei_tr = (121.7760,25.2465)
shp = "/path/to/taiwan/shapefiles"
taipei_dict = {"bbox_bl":bbox_taipei_bl,"bbox_tr":bbox_taipei_tr,"shp":shp}
# Taichung bboxes
bbox_taichung_bl = (120.3854,23.9724)
bbox_taichung_tr = (120.9129,24.3997)
shp = "/path/to/taiwan/shapefiles"
taichung_dict = {"bbox_bl":bbox_taichung_bl,"bbox_tr":bbox_taichung_tr,"shp":shp}
# Seoul bboxes
bbox_seoul_bl = (126.7938,37.4378)
bbox_seoul_tr = (127.3454,37.7072)
shp = "/path/to/korea/shapefiles"
seoul_dict = {"bbox_bl":bbox_seoul_bl,"bbox_tr":bbox_seoul_tr,"shp":shp}
# Daegu bboxes
bbox_daegu_bl = (128.4298,35.7642)
bbox_daegu_tr = (128.7956,35.9772)
shp = "/path/to/korea/shapefiles"
daegu_dict = {"bbox_bl":bbox_daegu_bl,"bbox_tr":bbox_daegu_tr,"shp":shp}
regions = [taipei_dict,taichung_dict,seoul_dict,daegu_dict,busan_dict,amsterdam_dict]
# Set train/test here
shapefile_path_train = taichung_dict['shp']
bbox_train_bl = taichung_dict['bbox_bl']
bbox_train_tr = taichung_dict['bbox_tr']
shapefile_path_test = daegu_dict['shp']
bbox_test_bl = daegu_dict['bbox_bl']
bbox_test_tr = daegu_dict['bbox_tr']
training_set_name = ""
if(bbox_train_bl == bbox_taipei_bl):
training_set_name = "Taipei"
elif(bbox_train_bl == bbox_taichung_bl):
training_set_name = "Taichung"
elif(bbox_train_bl == bbox_seoul_bl):
training_set_name = "Seoul"
elif(bbox_train_bl == bbox_daegu_bl):
training_set_name = "Daegu"
elif(bbox_train_bl == bbox_busan_bl):
training_set_name = "Busan"
elif(bbox_train_bl == bbox_amsterdam_bl):
training_set_name = "Amsterdam"
test_set_name = ""
if(bbox_test_bl == bbox_taipei_bl):
test_set_name = "Taipei"
elif(bbox_test_bl == bbox_taichung_bl):
test_set_name = "Taichung"
elif(bbox_test_bl == bbox_seoul_bl):
test_set_name = "Seoul"
elif(bbox_test_bl == bbox_daegu_bl):
test_set_name = "Daegu"
elif(bbox_test_bl == bbox_busan_bl):
test_set_name = "Busan"
elif(bbox_test_bl == bbox_amsterdam_bl):
test_set_name = "Amsterdam"
# Variable parameters
# Type parameters; these were determined beforehand using SMAC
type_frequency_ratio = 0.5887024334058893
type_top_n = 10
type_top_n_percent = 20
type_top_n_variable = 15
type_params = [type_frequency_ratio,type_top_n,type_top_n_percent,type_top_n_variable]
region_min_objects = 0
# Methods; these were determined beforehand using SMAC
# Possible values: "replace","drop"
missing_value_method = "replace"
# Possible values: "frequency,top,top_percent,top_variable,taxonomy,none"
type_filter_method = "frequency"
# Possible values: "unit","z_score","mean_norm","none"
feature_normalisation_method = "none"
Auto-Keras. The cells below will first preprocess the data as in the run files, after which auto-keras is used to perform Neural Architecture Search. The final model will be saved at the specified path. WARNING: this code will use a lot of storage when run.
# Preprocessing
hidden_proportion_train = 0.8
hidden_proportion_test = 0.5
# Preprocess train+test set
S = load_spatial_data(shapefile_path_train,missing_value_method)
S = clip_area(S,bbox_train_bl,bbox_train_tr)
S,types = find_types(S,optimisation_path,working_path,type_filter_method,type_params,
taxonomy_filename=taxonomy_filename,verbose=False)
S = compute_centroids(S)
region_bounds = compute_region_bounds(S,location_size_lat,location_size_lon)
regions,region_bounds = assign_objects_to_regions(S,region_bounds,region_min_objects=region_min_objects)
super_G = create_super_graph_raw(regions,region_bounds,types,location_size_lat,location_size_lon)
super_H_train,width_train,height_train = convert_super_G(super_G,S,label_paths[label_ind],region_params,hidden_proportion_train,
from_grid=label_from_grid)
S = load_spatial_data(shapefile_path_test,missing_value_method)
S = clip_area(S,bbox_test_bl,bbox_test_tr)
S,types = find_types(S,optimisation_path,working_path,type_filter_method,type_params,
taxonomy_filename=taxonomy_filename,verbose=False)
S = compute_centroids(S)
region_bounds = compute_region_bounds(S,location_size_lat,location_size_lon)
regions,region_bounds = assign_objects_to_regions(S,region_bounds,region_min_objects=region_min_objects)
super_G = create_super_graph_raw(regions,region_bounds,types,location_size_lat,location_size_lon)
super_H_test,width_test,height_test = convert_super_G(super_G,S,label_paths[label_ind],region_params,hidden_proportion_test,
from_grid=label_from_grid)
project_name = project_name
model_name = project_name_50
max_trials = 50
X_train, y_train = graph_to_tensor_train(super_H_train,nn_window_height,nn_window_width)
X_test, y_test = graph_to_tensor_test(super_H_test,nn_window_height,nn_window_width)
# Auto-Keras
reg = ak.ImageRegressor(
overwrite=False,
max_trials=max_trials,
directory="/path/to/autokeras/storage", # Make sure to have enough space free; it takes A LOT
project_name=project_name,
loss="mean_absolute_error")
reg.fit(X_train, y_train, validation_split=nn_validation_split)
model = reg.export_model()
try:
model.save("/path/to/model/saving/directory" + model_name, save_format="tf")
except:
model.save("/path/to/model/saving/directory" + model_name + ".h5")