diff --git a/eval_atsp/ASHPPEnv.py b/eval_atsp/ASHPPEnv.py new file mode 100644 index 0000000..7bc9863 --- /dev/null +++ b/eval_atsp/ASHPPEnv.py @@ -0,0 +1,181 @@ + +""" +The MIT License + +Copyright (c) 2021 MatNet + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + + + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from dataclasses import dataclass +import torch +import warnings + +from ATSProblemDef import get_random_problems + + +@dataclass +class Reset_State: + problems: torch.Tensor + # shape: (batch, node, node) + + +@dataclass +class Step_State: + BATCH_IDX: torch.Tensor + POMO_IDX: torch.Tensor + # shape: (batch, pomo) + current_node: torch.Tensor = None + # shape: (batch, pomo) + ninf_mask: torch.Tensor = None + # shape: (batch, pomo, node) + + +class ASHPPEnv: + def __init__(self, **env_params): + + # Const @INIT + #################################### + self.env_params = env_params + self.node_cnt = env_params['node_cnt'] + self.pomo_size = env_params['pomo_size'] # pomo size if sample size here + + # Const @Load_Problem + #################################### + self.batch_size = None + self.BATCH_IDX = None + self.POMO_IDX = None + # IDX.shape: (batch, pomo) + self.problems = None + # shape: (batch, node, node) + + # Dynamic + #################################### + self.selected_count = None + self.current_node = None + # shape: (batch, pomo) + self.selected_node_list = None + # shape: (batch, pomo, 0~) + + # STEP-State + #################################### + self.step_state = None + + def load_problems(self, batch_size): + self.batch_size = batch_size + self.BATCH_IDX = torch.arange(self.batch_size)[:, None].expand(self.batch_size, self.pomo_size) + self.POMO_IDX = torch.arange(self.pomo_size)[None, :].expand(self.batch_size, self.pomo_size) + + problem_gen_params = self.env_params['problem_gen_params'] + self.problems = get_random_problems(batch_size, self.node_cnt, problem_gen_params) + # shape: (batch, node, node) + + def load_problems_manual(self, problems): + # problems.shape: (batch, node, node) + + self.batch_size = problems.size(0) + self.BATCH_IDX = torch.arange(self.batch_size)[:, None].expand(self.batch_size, self.pomo_size) + self.POMO_IDX = torch.arange(self.pomo_size)[None, :].expand(self.batch_size, self.pomo_size) + self.problems = problems + # shape: (batch, node, node) + + def reset(self): + self.selected_count = 2 # Add starting and terminating ndoes + # Set current nodes as 0 + self.current_node = torch.zeros((self.batch_size, self.pomo_size), dtype=torch.long) + # Set the last node as node - 1 + self.last_node = torch.ones((self.batch_size, self.pomo_size), dtype=torch.long) * (self.node_cnt - 1) + + # shape: (batch, pomo) + self.selected_node_list = self.current_node[:, :, None] + # shape: (batch, pomo, 0~) + + self._create_step_state() + + reward = None + done = False + return Reset_State(problems=self.problems), reward, done + + def _create_step_state(self): + self.step_state = Step_State(BATCH_IDX=self.BATCH_IDX, POMO_IDX=self.POMO_IDX) + self.step_state.ninf_mask = torch.zeros((self.batch_size, self.pomo_size, self.node_cnt)) + # shape: (batch, pomo, node) + + def pre_step(self): + reward = None + done = False + + # Set the starting and terminating nodes to -inf + self.step_state.ninf_mask[self.BATCH_IDX, self.POMO_IDX, 0] = float('-inf') + self.step_state.ninf_mask[self.BATCH_IDX, self.POMO_IDX, -1] = float('-inf') + + # Set current node to 0 + self.step_state.current_node = self.current_node + # Set last node to node - 1 + self.step_state.last_node = self.last_node + + + return self.step_state, reward, done + + def step(self, node_idx): + # node_idx.shape: (batch, pomo) + + self.selected_count += 1 + self.current_node = node_idx + # shape: (batch, pomo) + self.selected_node_list = torch.cat((self.selected_node_list, self.current_node[:, :, None]), dim=2) + # shape: (batch, pomo, 0~node) + + self._update_step_state() + + # returning values + done = (self.selected_count == self.node_cnt) + if done: + # Concat the terminating node (the last node) to the selected node list + self.current_node = torch.ones((self.batch_size, self.pomo_size), dtype=torch.long) * (self.node_cnt - 1) + self.selected_node_list = torch.cat((self.selected_node_list, self.current_node[:, :, None]), dim=2) + reward = -self._get_total_distance() # Note the MINUS Sign ==> We MAXIMIZE reward + # shape: (batch, pomo) + else: + reward = None + return self.step_state, reward, done + + def _update_step_state(self): + self.step_state.current_node = self.current_node + # shape: (batch, pomo) + self.step_state.ninf_mask[self.BATCH_IDX, self.POMO_IDX, self.current_node] = float('-inf') + # shape: (batch, pomo, node) + + def _get_total_distance(self): + + node_from = self.selected_node_list[:, :, :-1] + # shape: (batch, pomo, node - 1) + node_to = self.selected_node_list.roll(dims=2, shifts=-1)[:, :, :-1] + # shape: (batch, pomo, node - 1) + batch_index = self.BATCH_IDX[:, :, None].expand(self.batch_size, self.pomo_size, self.node_cnt - 1) + # shape: (batch, pomo, node - 1) + + selected_cost = self.problems[batch_index, node_from, node_to] + # shape: (batch, pomo, node - 1) + total_distance = selected_cost.sum(2) + # shape: (batch, pomo) + + return total_distance diff --git a/eval_atsp/ASHPPModel.py b/eval_atsp/ASHPPModel.py new file mode 100644 index 0000000..a07903c --- /dev/null +++ b/eval_atsp/ASHPPModel.py @@ -0,0 +1,348 @@ + +""" +The MIT License + +Copyright (c) 2021 MatNet + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + + + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +from ATSPModel_LIB import AddAndInstanceNormalization, FeedForward, MixedScore_MultiHeadAttention + + +class ASHPPModel(nn.Module): + + def __init__(self, **model_params): + super().__init__() + self.model_params = model_params + + self.encoder = ATSP_Encoder(**model_params) + self.decoder = ATSP_Decoder(**model_params) + + self.encoded_row = None + self.encoded_col = None + # shape: (batch, node, embedding) + + def pre_forward(self, reset_state): + + problems = reset_state.problems + # problems.shape: (batch, node, node) + + batch_size = problems.size(0) + node_cnt = problems.size(1) + embedding_dim = self.model_params['embedding_dim'] + + row_emb = torch.zeros(size=(batch_size, node_cnt, embedding_dim)) + # emb.shape: (batch, node, embedding) + col_emb = torch.zeros(size=(batch_size, node_cnt, embedding_dim)) + # shape: (batch, node, embedding) + + seed_cnt = self.model_params['one_hot_seed_cnt'] + rand = torch.rand(batch_size, seed_cnt) + batch_rand_perm = rand.argsort(dim=1) + rand_idx = batch_rand_perm[:, :node_cnt] + + b_idx = torch.arange(batch_size)[:, None].expand(batch_size, node_cnt) + n_idx = torch.arange(node_cnt)[None, :].expand(batch_size, node_cnt) + col_emb[b_idx, n_idx, rand_idx] = 1 + # shape: (batch, node, embedding) + + self.encoded_row, self.encoded_col = self.encoder(row_emb, col_emb, problems) + # encoded_nodes.shape: (batch, node, embedding) + + self.decoder.set_kv(self.encoded_col) + + def forward(self, state): + + batch_size = state.BATCH_IDX.size(0) + pomo_size = state.BATCH_IDX.size(1) + + if (state.current_node == 0).all(): + encoded_last_row = _get_encoding(self.encoded_row, state.last_node) + self.decoder.set_q1(encoded_last_row) + + encoded_current_row = _get_encoding(self.encoded_row, state.current_node) + + # shape: (batch, pomo, embedding) + all_job_probs = self.decoder(encoded_current_row, ninf_mask=state.ninf_mask) + # shape: (batch, pomo, job) + + if self.training or self.model_params['eval_type'] == 'softmax': + while True: # to fix pytorch.multinomial bug on selecting 0 probability elements + with torch.no_grad(): + selected = all_job_probs.reshape(batch_size * pomo_size, -1).multinomial(1) \ + .squeeze(dim=1).reshape(batch_size, pomo_size) + # shape: (batch, pomo) + + prob = all_job_probs[state.BATCH_IDX, state.POMO_IDX, selected] \ + .reshape(batch_size, pomo_size) + # shape: (batch, pomo) + + if (prob != 0).all(): + break + else: + assert self.model_params['eval_type'] == 'greedy' + selected = all_job_probs.argmax(dim=2) + # shape: (batch, pomo) + prob = None + + return selected, prob + + +def _get_encoding(encoded_nodes, node_index_to_pick): + # encoded_nodes.shape: (batch, problem, embedding) + # node_index_to_pick.shape: (batch, pomo) + + batch_size = node_index_to_pick.size(0) + pomo_size = node_index_to_pick.size(1) + embedding_dim = encoded_nodes.size(2) + + gathering_index = node_index_to_pick[:, :, None].expand(batch_size, pomo_size, embedding_dim) + # shape: (batch, pomo, embedding) + + picked_nodes = encoded_nodes.gather(dim=1, index=gathering_index) + # shape: (batch, pomo, embedding) + + return picked_nodes + + +######################################## +# ENCODER +######################################## +class ATSP_Encoder(nn.Module): + def __init__(self, **model_params): + super().__init__() + encoder_layer_num = model_params['encoder_layer_num'] + self.layers = nn.ModuleList([EncoderLayer(**model_params) for _ in range(encoder_layer_num)]) + + def forward(self, row_emb, col_emb, cost_mat): + # col_emb.shape: (batch, col_cnt, embedding) + # row_emb.shape: (batch, row_cnt, embedding) + # cost_mat.shape: (batch, row_cnt, col_cnt) + + for layer in self.layers: + row_emb, col_emb = layer(row_emb, col_emb, cost_mat) + + return row_emb, col_emb + + +class EncoderLayer(nn.Module): + def __init__(self, **model_params): + super().__init__() + self.row_encoding_block = EncodingBlock(**model_params) + self.col_encoding_block = EncodingBlock(**model_params) + + def forward(self, row_emb, col_emb, cost_mat): + # row_emb.shape: (batch, row_cnt, embedding) + # col_emb.shape: (batch, col_cnt, embedding) + # cost_mat.shape: (batch, row_cnt, col_cnt) + row_emb_out = self.row_encoding_block(row_emb, col_emb, cost_mat) + col_emb_out = self.col_encoding_block(col_emb, row_emb, cost_mat.transpose(1, 2)) + + return row_emb_out, col_emb_out + + +class EncodingBlock(nn.Module): + def __init__(self, **model_params): + super().__init__() + self.model_params = model_params + embedding_dim = self.model_params['embedding_dim'] + head_num = self.model_params['head_num'] + qkv_dim = self.model_params['qkv_dim'] + + self.Wq = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + self.Wk = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + self.Wv = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + self.mixed_score_MHA = MixedScore_MultiHeadAttention(**model_params) + self.multi_head_combine = nn.Linear(head_num * qkv_dim, embedding_dim) + + self.add_n_normalization_1 = AddAndInstanceNormalization(**model_params) + self.feed_forward = FeedForward(**model_params) + self.add_n_normalization_2 = AddAndInstanceNormalization(**model_params) + + def forward(self, row_emb, col_emb, cost_mat): + # NOTE: row and col can be exchanged, if cost_mat.transpose(1,2) is used + # input1.shape: (batch, row_cnt, embedding) + # input2.shape: (batch, col_cnt, embedding) + # cost_mat.shape: (batch, row_cnt, col_cnt) + head_num = self.model_params['head_num'] + + q = reshape_by_heads(self.Wq(row_emb), head_num=head_num) + # q shape: (batch, head_num, row_cnt, qkv_dim) + k = reshape_by_heads(self.Wk(col_emb), head_num=head_num) + v = reshape_by_heads(self.Wv(col_emb), head_num=head_num) + # kv shape: (batch, head_num, col_cnt, qkv_dim) + + out_concat = self.mixed_score_MHA(q, k, v, cost_mat) + # shape: (batch, row_cnt, head_num*qkv_dim) + + multi_head_out = self.multi_head_combine(out_concat) + # shape: (batch, row_cnt, embedding) + + out1 = self.add_n_normalization_1(row_emb, multi_head_out) + out2 = self.feed_forward(out1) + out3 = self.add_n_normalization_2(out1, out2) + + return out3 + # shape: (batch, row_cnt, embedding) + + +######################################## +# Decoder +######################################## + +class ATSP_Decoder(nn.Module): + def __init__(self, **model_params): + super().__init__() + self.model_params = model_params + embedding_dim = self.model_params['embedding_dim'] + head_num = self.model_params['head_num'] + qkv_dim = self.model_params['qkv_dim'] + + self.Wq_0 = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + self.Wq_1 = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + self.Wk = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + self.Wv = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + + self.multi_head_combine = nn.Linear(head_num * qkv_dim, embedding_dim) + + self.k = None # saved key, for multi-head attention + self.v = None # saved value, for multi-head_attention + self.single_head_key = None # saved key, for single-head attention + self.q1 = None # saved q1, for multi-head attention + + def set_kv(self, encoded_jobs): + # encoded_jobs.shape: (batch, job, embedding) + head_num = self.model_params['head_num'] + + self.k = reshape_by_heads(self.Wk(encoded_jobs), head_num=head_num) + self.v = reshape_by_heads(self.Wv(encoded_jobs), head_num=head_num) + # shape: (batch, head_num, job, qkv_dim) + self.single_head_key = encoded_jobs.transpose(1, 2) + # shape: (batch, embedding, job) + + def set_q1(self, encoded_q1): + # encoded_q.shape: (batch, n, embedding) # n can be 1 or pomo + head_num = self.model_params['head_num'] + + self.q1 = reshape_by_heads(self.Wq_1(encoded_q1), head_num=head_num) + # shape: (batch, head_num, n, qkv_dim) + + def forward(self, encoded_q0, ninf_mask): + # encoded_q4.shape: (batch, pomo, embedding) + # ninf_mask.shape: (batch, pomo, job) + + head_num = self.model_params['head_num'] + + # Multi-Head Attention + ####################################################### + q0 = reshape_by_heads(self.Wq_0(encoded_q0), head_num=head_num) + # shape: (batch, head_num, pomo, qkv_dim) + + q = self.q1 + q0 + # shape: (batch, head_num, pomo, qkv_dim) + + out_concat = self._multi_head_attention(q, self.k, self.v, rank3_ninf_mask=ninf_mask) + # shape: (batch, pomo, head_num*qkv_dim) + + mh_atten_out = self.multi_head_combine(out_concat) + # shape: (batch, pomo, embedding) + + # Single-Head Attention, for probability calculation + ####################################################### + score = torch.matmul(mh_atten_out, self.single_head_key) + # shape: (batch, pomo, job) + + sqrt_embedding_dim = self.model_params['sqrt_embedding_dim'] + logit_clipping = self.model_params['logit_clipping'] + + score_scaled = score / sqrt_embedding_dim + # shape: (batch, pomo, job) + + score_clipped = logit_clipping * torch.tanh(score_scaled) + + score_masked = score_clipped + ninf_mask + + probs = F.softmax(score_masked, dim=2) + # shape: (batch, pomo, job) + + return probs + + def _multi_head_attention(self, q, k, v, rank2_ninf_mask=None, rank3_ninf_mask=None): + # q shape: (batch, head_num, n, key_dim) : n can be either 1 or pomo + # k,v shape: (batch, head_num, node, key_dim) + # rank2_ninf_mask.shape: (batch, node) + # rank3_ninf_mask.shape: (batch, group, node) + + batch_s = q.size(0) + n = q.size(2) + node_cnt = k.size(2) + + head_num = self.model_params['head_num'] + qkv_dim = self.model_params['qkv_dim'] + sqrt_qkv_dim = self.model_params['sqrt_qkv_dim'] + + score = torch.matmul(q, k.transpose(2, 3)) + # shape: (batch, head_num, n, node) + + score_scaled = score / sqrt_qkv_dim + if rank2_ninf_mask is not None: + score_scaled = score_scaled + rank2_ninf_mask[:, None, None, :].expand(batch_s, head_num, n, node_cnt) + if rank3_ninf_mask is not None: + score_scaled = score_scaled + rank3_ninf_mask[:, None, :, :].expand(batch_s, head_num, n, node_cnt) + + weights = nn.Softmax(dim=3)(score_scaled) + # shape: (batch, head_num, n, node) + + out = torch.matmul(weights, v) + # shape: (batch, head_num, n, key_dim) + + out_transposed = out.transpose(1, 2) + # shape: (batch, n, head_num, key_dim) + + out_concat = out_transposed.reshape(batch_s, n, head_num * qkv_dim) + # shape: (batch, n, head_num*key_dim) + + return out_concat + + +######################################## +# NN SUB FUNCTIONS +######################################## + +def reshape_by_heads(qkv, head_num): + # q.shape: (batch, n, head_num*key_dim) : n can be either 1 or PROBLEM_SIZE + + batch_s = qkv.size(0) + n = qkv.size(1) + + q_reshaped = qkv.reshape(batch_s, n, head_num, -1) + # shape: (batch, n, head_num, key_dim) + + q_transposed = q_reshaped.transpose(1, 2) + # shape: (batch, head_num, n, key_dim) + + return q_transposed diff --git a/eval_atsp/ASHPPTrainer.py b/eval_atsp/ASHPPTrainer.py new file mode 100644 index 0000000..95b61dd --- /dev/null +++ b/eval_atsp/ASHPPTrainer.py @@ -0,0 +1,222 @@ + +""" +The MIT License + +Copyright (c) 2021 MatNet + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + + + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import torch +from logging import getLogger + +from ASHPPEnv import ASHPPEnv as Env +from ASHPPModel import ASHPPModel as Model + +from torch.optim import Adam as Optimizer +from torch.optim.lr_scheduler import MultiStepLR as Scheduler + +from utils_atsp.utils import * + + +class ASHPPTrainer: + def __init__(self, + env_params, + model_params, + optimizer_params, + trainer_params): + + # save arguments + self.env_params = env_params + self.model_params = model_params + self.optimizer_params = optimizer_params + self.trainer_params = trainer_params + + # result folder, logger + self.logger = getLogger(name='trainer') + self.result_folder = get_result_folder() + self.result_log = LogData() + + # cuda + USE_CUDA = self.trainer_params['use_cuda'] + if USE_CUDA: + cuda_device_num = self.trainer_params['cuda_device_num'] + torch.cuda.set_device(cuda_device_num) + device = torch.device('cuda', cuda_device_num) + torch.set_default_tensor_type('torch.cuda.FloatTensor') + else: + device = torch.device('cpu') + torch.set_default_tensor_type('torch.FloatTensor') + + # Main Components + self.model = Model(**self.model_params) + self.env = Env(**self.env_params) + self.optimizer = Optimizer(self.model.parameters(), **self.optimizer_params['optimizer']) + self.scheduler = Scheduler(self.optimizer, **self.optimizer_params['scheduler']) + + # Restore + self.start_epoch = 1 + model_load = trainer_params['model_load'] + if model_load['enable']: + checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load) + checkpoint = torch.load(checkpoint_fullname, map_location=device) + self.model.load_state_dict(checkpoint['model_state_dict']) + self.start_epoch = 1 + model_load['epoch'] + self.result_log.set_raw_data(checkpoint['result_log']) + self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + self.scheduler.last_epoch = model_load['epoch']-1 + self.logger.info('Saved Model Loaded !!') + + # utility + self.time_estimator = TimeEstimator() + + def run(self): + self.time_estimator.reset(self.start_epoch) + for epoch in range(self.start_epoch, self.trainer_params['epochs']+1): + self.logger.info('=================================================================') + + # LR Decay + self.scheduler.step() + + # Train + train_score, train_loss = self._train_one_epoch(epoch) + self.result_log.append('train_score', epoch, train_score) + self.result_log.append('train_loss', epoch, train_loss) + + ############################ + # Logs & Checkpoint + ############################ + elapsed_time_str, remain_time_str = self.time_estimator.get_est_string(epoch, self.trainer_params['epochs']) + self.logger.info("Epoch {:3d}/{:3d}: Time Est.: Elapsed[{}], Remain[{}]".format( + epoch, self.trainer_params['epochs'], elapsed_time_str, remain_time_str)) + + all_done = (epoch == self.trainer_params['epochs']) + model_save_interval = self.trainer_params['logging']['model_save_interval'] + img_save_interval = self.trainer_params['logging']['img_save_interval'] + + if epoch > 1: # save latest images, every epoch + self.logger.info("Saving log_image") + image_prefix = '{}/latest'.format(self.result_folder) + util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], + self.result_log, labels=['train_score']) + util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], + self.result_log, labels=['train_loss']) + + if all_done or (epoch % model_save_interval) == 0: + self.logger.info("Saving trained_model") + checkpoint_dict = { + 'epoch': epoch, + 'model_state_dict': self.model.state_dict(), + 'optimizer_state_dict': self.optimizer.state_dict(), + 'scheduler_state_dict': self.scheduler.state_dict(), + 'result_log': self.result_log.get_raw_data() + } + torch.save(checkpoint_dict, '{}/checkpoint-{}.pt'.format(self.result_folder, epoch)) + + if all_done or (epoch % img_save_interval) == 0: + image_prefix = '{}/img/checkpoint-{}'.format(self.result_folder, epoch) + util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], + self.result_log, labels=['train_score']) + util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], + self.result_log, labels=['train_loss']) + + if all_done: + self.logger.info(" *** Training Done *** ") + self.logger.info("Now, printing log array...") + util_print_log_array(self.logger, self.result_log) + + def _train_one_epoch(self, epoch): + + score_AM = AverageMeter() + loss_AM = AverageMeter() + + train_num_episode = self.trainer_params['train_episodes'] + episode = 0 + loop_cnt = 0 + while episode < train_num_episode: + + remaining = train_num_episode - episode + batch_size = min(self.trainer_params['train_batch_size'], remaining) + + avg_score, avg_loss = self._train_one_batch(batch_size) + score_AM.update(avg_score, batch_size) + loss_AM.update(avg_loss, batch_size) + + episode += batch_size + + # Log First 10 Batch, only at the first epoch + if epoch == self.start_epoch: + loop_cnt += 1 + if loop_cnt <= 10: + self.logger.info('Epoch {:3d}: Train {:3d}/{:3d}({:1.1f}%) Score: {:.4f}, Loss: {:.4f}' + .format(epoch, episode, train_num_episode, 100. * episode / train_num_episode, + score_AM.avg, loss_AM.avg)) + + # Log Once, for each epoch + self.logger.info('Epoch {:3d}: Train ({:3.0f}%) Score: {:.4f}, Loss: {:.4f}' + .format(epoch, 100. * episode / train_num_episode, + score_AM.avg, loss_AM.avg)) + + return score_AM.avg, loss_AM.avg + + def _train_one_batch(self, batch_size): + + # Prep + ############################################### + self.model.train() + self.env.load_problems(batch_size) + reset_state, _, _ = self.env.reset() + self.model.pre_forward(reset_state) + + prob_list = torch.zeros(size=(batch_size, self.env.pomo_size, 0)) + # shape: (batch, pomo, 0~) + + # POMO Rollout + ############################################### + state, reward, done = self.env.pre_step() + while not done: + selected, prob = self.model(state) + # shape: (batch, pomo) + state, reward, done = self.env.step(selected) + + prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + + # Loss + ############################################### + advantage = reward - reward.float().mean(dim=1, keepdims=True) + # shape: (batch, pomo) + log_prob = prob_list.log().sum(dim=2) + # size = (batch, pomo) + loss = -advantage * log_prob # Minus Sign: To Increase REWARD + # shape: (batch, pomo) + loss_mean = loss.mean() + + # Score + ############################################### + max_pomo_reward, _ = reward.max(dim=1) # get best results from pomo + score_mean = -max_pomo_reward.float().mean() # negative sign to make positive value + + # Step & Return + ############################################### + self.model.zero_grad() + loss_mean.backward() + self.optimizer.step() + return score_mean.item(), loss_mean.item() \ No newline at end of file diff --git a/eval_atsp/ATSPTester_glop.py b/eval_atsp/ATSPTester_glop.py index c3e318e..550e3f1 100644 --- a/eval_atsp/ATSPTester_glop.py +++ b/eval_atsp/ATSPTester_glop.py @@ -30,8 +30,8 @@ import os from logging import getLogger -from ATSPEnv import ATSPEnv as Env -from ATSPModel import ATSPModel as Model +from ASHPPEnv import ASHPPEnv as Env +from ASHPPModel import ASHPPModel as Model from utils_atsp.utils import get_result_folder, AverageMeter, TimeEstimator @@ -91,16 +91,20 @@ def run(self, insts): test_num_episode = insts.size(0) episode = 0 - ret = [] + scores = [] + + solutions = [] while episode < test_num_episode: remaining = test_num_episode - episode batch_size = min(self.tester_params['test_batch_size'], remaining) - aug_score = self._test_one_batch(episode, episode+batch_size, insts) + aug_score, batch_solutions = self._test_one_batch(episode, episode+batch_size, insts) + + scores.append(aug_score) - ret.append(aug_score) + solutions.append(batch_solutions) episode += batch_size @@ -117,7 +121,11 @@ def run(self, insts): # self.logger.info(" *** Test Done *** ") # self.logger.info(" NO-AUG SCORE: {:.4f} ".format(score_AM.avg)) # self.logger.info(" AUGMENTATION SCORE: {:.4f} ".format(aug_score_AM.avg)) - return ret + + scores = torch.cat(scores, dim=0) + solutions = torch.cat(solutions, dim=0) + + return scores, solutions def _test_one_batch(self, idx_start, idx_end, insts): @@ -127,6 +135,7 @@ def _test_one_batch(self, idx_start, idx_end, insts): # Augmentation ############################################### if self.tester_params['augmentation_enable']: + assert False, "Augmentation is not supported" aug_factor = self.tester_params['aug_factor'] batch_size = aug_factor*batch_size @@ -152,16 +161,19 @@ def _test_one_batch(self, idx_start, idx_end, insts): # Return ############################################### - batch_size = batch_size//aug_factor - aug_reward = reward.reshape(aug_factor, batch_size, self.env.pomo_size) - # shape: (augmentation, batch, pomo) - - max_pomo_reward, _ = aug_reward.max(dim=2) # get best results from pomo - # shape: (augmentation, batch) - no_aug_score = -max_pomo_reward[0, :].float().mean() # negative sign to make positive value + aug_reward = reward.reshape(batch_size, self.env.pomo_size) + # shape: (batch, pomo) + + # Get solutions + solutions = self.env.selected_node_list + # shape: (batch, pomo, node_cnt) + + max_pomo_reward, max_pomo_reward_idx = aug_reward.max(dim=1) # get best results from pomo + # shape: (batch) - max_aug_pomo_reward, _ = max_pomo_reward.max(dim=0) # get best results from augmentation - # shape: (batch,) - return -max_aug_pomo_reward.float() # negative sign to make positive value + optimal_solution = solutions[torch.arange(batch_size), max_pomo_reward_idx] + # shape: (batch, node_cnt) + + return max_pomo_reward.float(), optimal_solution # negative sign to make positive value diff --git a/eval_atsp/ATSProblemDef.py b/eval_atsp/ATSProblemDef.py index a9ced86..e4f7075 100644 --- a/eval_atsp/ATSProblemDef.py +++ b/eval_atsp/ATSProblemDef.py @@ -26,6 +26,7 @@ """ import torch +from tqdm import tqdm def get_random_problems(batch_size, node_cnt, problem_gen_params): @@ -107,9 +108,14 @@ def load_single_problem_from_file(filename, node_cnt, scaler): if not os.path.exists("../data/atsp"): os.mkdir("../data/atsp") - torch.manual_seed(1234) - + torch.manual_seed(1234) + dataset_size = 30 - for scale in [150, 200, 1000]: - problems = get_random_problems(dataset_size, scale, problem_gen_params) - torch.save(problems, "../data/atsp/ATSP{}.pt".format(scale)) \ No newline at end of file + for scale in [150, 250, 1000]: + problems = [] + for inst_id in tqdm(range(dataset_size)): + problem = get_random_problems(1, scale, problem_gen_params) + problems.append(problem) + problems = torch.cat(problems, dim=0) + torch.save(problems, "../data/atsp/ATSP{}.pt".format(scale)) + print(f"created ../data/atsp/ATSP{scale}.pt") \ No newline at end of file diff --git a/eval_atsp/test_glop.py b/eval_atsp/test_glop.py index 38f8d68..9fcc8ac 100644 --- a/eval_atsp/test_glop.py +++ b/eval_atsp/test_glop.py @@ -55,14 +55,25 @@ ########################################################################################## # parameters +##### GLOP parameters ##### +N_REVISER = 50 # We only test on Reviser-50; using more revisers requires code modifications +N_REVISIONS = 3 # number of revision iterations +N_SAMPLES = { + 150: 2000, + 250: 1000, + 1000: 500 + } # for sampling decoding during revision + + + env_params = { - 'node_cnt': 50, + 'node_cnt': N_REVISER, 'problem_gen_params': { 'int_min': 0, 'int_max': 1000*1000, 'scaler': 1000*1000 }, - 'pomo_size': 50 # same as node_cnt + 'pomo_size': 500, } model_params = { @@ -77,8 +88,8 @@ 'ms_hidden_dim': 16, 'ms_layer1_init': (1/2)**(1/2), 'ms_layer2_init': (1/16)**(1/2), - 'eval_type': 'softmax', - 'one_hot_seed_cnt': 20, # must be >= node_cnt + 'eval_type': 'softmax', # note here, can be greedy + 'one_hot_seed_cnt': N_REVISER, # must be >= node_cnt } tester_params = { @@ -90,8 +101,8 @@ }, 'saved_problem_folder': "../data/n20", 'saved_problem_filename': 'problem_20_0_1000000_{}.atsp', - 'test_batch_size': 1, - 'augmentation_enable': False, + 'test_batch_size': 999999, # Note this batch size is for revision + 'augmentation_enable': False, # No augementation for GLOP; requiring code modifications to enable 'aug_factor': 1, 'aug_batch_size': 1, } @@ -107,32 +118,58 @@ } -########################################################################################## -# main -L = 1.5 +########################################################################################## +# main def revision(tour, inst, tester): - revision_len = env_params['node_cnt'] - assert revision_len == 50 - sub_tours = tour.reshape(-1, revision_len) + sub_tours = tour.reshape(-1, N_REVISER) # shape: (batch, revision_len) sub_insts = [inst[sub_tour][:, sub_tour] for sub_tour in sub_tours] - original_scores = torch.stack([inst[sub_tour[:-1], torch.roll(sub_tour, shifts=-1)[:-1]].sum() for sub_tour in sub_tours]) - for sub_inst in sub_insts: # equivalent ATSP of each ASHPP - sub_inst[:, 0] += L - sub_inst[:, -1] += L - sub_inst[0, :] += L - sub_inst[-1, :] += L - sub_inst[0, 0] = sub_inst[0, -1] = sub_inst[-1, 0] = sub_inst[-1, -1] = 0 + original_scores = torch.tensor([cal_len_shpp(sub_tour, inst) for sub_tour in sub_tours]) # note that original_scores are positive values + # Scale the sub_insts to make the largest value 1 + scale_coef = [sub_inst.max() for sub_inst in sub_insts] sub_insts = torch.stack(sub_insts) + sub_insts_scaled = sub_insts / torch.tensor(scale_coef)[:, None, None] - revised_scores = torch.stack(tester.run(sub_insts)) - 2 * L - - improved = original_scores - revised_scores - improved[improved < 0] = 0 + # Main part of the revision + revised_scores, solutions = tester.run(sub_insts_scaled) # solutions shape: (batch, revision_len) - return improved.sum().item() + # Scale back the revised scores + revised_scores = - revised_scores * torch.tensor(scale_coef) # shape: (batch,); add negative sign to make positive value + # TODO: unmcomment to validate the subtours + for i in range(len(sub_insts)): + validate_subtour(solutions[i], sub_insts[i], revised_scores[i]) + + # Compare the original scores and the revised scores + improved_scores = original_scores - revised_scores + # subtours should be aranged in the same order as the original tours, if the improved_scores <= 0 + solutions[improved_scores <= 0] = torch.arange(sub_tours.shape[1]) + # Gather the subtours according to the solutions + revised_tours = sub_tours.gather(1, solutions) + # Flatten the revised_tours + revised_tours = revised_tours.reshape(-1) # shape: (batch * revision_len) i.e. (node_cnt,) + return revised_tours + +def validate_subtour(subtour, dist, cost): + truth_cost = cal_len_shpp(subtour, dist) + assert truth_cost - cost < 1e-5 + # Assert subtour is a valid tour: (1) the starting node is 0 and the terminal node is len(subtour) - 1; (2) all nodes are visited exactly once. + assert subtour[0] == 0 and subtour[-1] == len(subtour) - 1 + for i in range(1, len(subtour) - 1): + assert i in subtour + +def validate_tour(tour): + for i in range(1, len(tour) - 1): + assert i in tour + +def cal_len(tour, dist): + cost = dist[tour, torch.roll(tour, -1, -1)].sum() + return cost.item() + +def cal_len_shpp(tour, dist): + cost = dist[tour[:-1], tour[1:]].sum() + return cost.item() def main(n): dataset = torch.load('../data/atsp/ATSP{}.pt'.format(n), map_location='cuda:0') @@ -143,23 +180,42 @@ def main(n): model_params=model_params, tester_params=tester_params) - order = torch.randperm(n) + + torch.random.manual_seed(1) + order = torch.randperm(n, device='cpu').numpy() original_costs = [] revised_costs = [] + # true_cost = [] + + N_SHIFTS = N_REVISER // N_REVISIONS start = time.time() for inst in dataset: tour, cost = random_insertion_non_euclidean(inst, order) original_costs.append(cost) - improved_cost = revision(torch.tensor(tour.astype(np.int64)), inst, tester) - revised_costs.append(cost - improved_cost) + tour = torch.tensor(tour.astype(np.int64)) + + for revision_iter in range(N_REVISIONS): + tour = revision(tour, inst, tester) + # Shift the tour to the right by N_SHIFTS + tour = torch.roll(tour, shifts=N_SHIFTS, dims=-1) + + # TODO: unmcomment to validate the solution + # validate_tour(tour) + cost = cal_len(tour, inst) + revised_costs.append(cost) + total_duration = time.time() - start - print("initial costs: ", sum(original_costs) / len(original_costs)) - print("revised costs: ", sum(revised_costs) / len(revised_costs)) + print("insertion costs: ", sum(original_costs) / len(original_costs)) + print("revised costs:", sum(revised_costs) / len(revised_costs)) print("total duration: ", total_duration) if __name__ == "__main__": - main(int(sys.argv[1])) + N = int(sys.argv[1]) + env_params['pomo_size'] = N_SAMPLES.get(N, 500) + + main(N) + \ No newline at end of file diff --git a/eval_atsp/train_glop.py b/eval_atsp/train_glop.py new file mode 100644 index 0000000..df423f4 --- /dev/null +++ b/eval_atsp/train_glop.py @@ -0,0 +1,169 @@ + +""" +The MIT License + +Copyright (c) 2021 MatNet + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + + + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +########################################################################################## +# Machine Environment Config + +DEBUG_MODE = False +USE_CUDA = not DEBUG_MODE +CUDA_DEVICE_NUM = 0 + +########################################################################################## +# Path Config + +import os +import sys +os.environ["CUDA_VISIBLE_DEVICES"] = "3" + +os.chdir(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, "..") # for problem_def +sys.path.insert(0, "../..") # for utils + + +########################################################################################## +# import + +import logging + +from utils_atsp.utils import create_logger, copy_all_src +from ASHPPTrainer import ASHPPTrainer as Trainer + + +########################################################################################## +# parameters + +env_params = { + 'node_cnt': 50, + 'problem_gen_params': { + 'int_min': 0, + 'int_max': 1000*1000, + 'scaler': 1000*1000 + }, + 'pomo_size': 50 # same as node_cnt +} + +model_params = { + 'embedding_dim': 256, + 'sqrt_embedding_dim': 256**(1/2), + 'encoder_layer_num': 5, + 'qkv_dim': 16, + 'sqrt_qkv_dim': 16**(1/2), + 'head_num': 16, + 'logit_clipping': 10, + 'ff_hidden_dim': 512, + 'ms_hidden_dim': 16, + 'ms_layer1_init': (1/2)**(1/2), + 'ms_layer2_init': (1/16)**(1/2), + 'eval_type': 'argmax', + 'one_hot_seed_cnt': 50, # must be >= node_cnt +} + +optimizer_params = { + 'optimizer': { + 'lr': 3*1e-4, + 'weight_decay': 1e-6 + }, + 'scheduler': { + 'milestones': [2001, 2101], # if further training is needed + 'gamma': 0.1 + } +} + +trainer_params = { + 'use_cuda': USE_CUDA, + 'cuda_device_num': CUDA_DEVICE_NUM, + 'epochs': 1000, + 'train_episodes': 10*1000, + 'train_batch_size': 200, + 'logging': { + 'model_save_interval': 100, + 'img_save_interval': 200, + 'log_image_params_1': { + 'json_foldername': 'log_image_style', + 'filename': 'style.json' + }, + 'log_image_params_2': { + 'json_foldername': 'log_image_style', + 'filename': 'style_loss.json' + }, + }, + 'model_load': { + 'enable': True, # enable loading pre-trained model + 'path': './result/20240720_153647_matnet_train', # directory path of pre-trained model and log files saved. + 'epoch': 600, # epoch version of pre-trained model to laod. + } +} + +logger_params = { + 'log_file': { + 'desc': 'matnet_train', + 'filename': 'log.txt' + } +} + + +########################################################################################## +# main + +def main(): + if DEBUG_MODE: + _set_debug_mode() + + create_logger(**logger_params) + _print_config() + + trainer = Trainer(env_params=env_params, + model_params=model_params, + optimizer_params=optimizer_params, + trainer_params=trainer_params) + + copy_all_src(trainer.result_folder) + + trainer.run() + + +def _set_debug_mode(): + + global trainer_params + trainer_params['epochs'] = 2 + trainer_params['train_episodes'] = 4 + trainer_params['train_batch_size'] = 2 + trainer_params['validate_episodes'] = 4 + trainer_params['validate_batch_size'] = 2 + + +def _print_config(): + logger = logging.getLogger('root') + logger.info('DEBUG_MODE: {}'.format(DEBUG_MODE)) + logger.info('USE_CUDA: {}, CUDA_DEVICE_NUM: {}'.format(USE_CUDA, CUDA_DEVICE_NUM)) + [logger.info(g_key + "{}".format(globals()[g_key])) for g_key in globals().keys() if g_key.endswith('params')] + + +########################################################################################## + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/utils/insertion/insertion.so b/utils/insertion/insertion.so index eaa1b70..5ad219c 100755 Binary files a/utils/insertion/insertion.so and b/utils/insertion/insertion.so differ diff --git a/utils/insertion/makefile b/utils/insertion/makefile index a8011de..54f4747 100644 --- a/utils/insertion/makefile +++ b/utils/insertion/makefile @@ -1,13 +1,14 @@ cxx = g++ -pythonroot = $(shell python3.8 -c "import sys; print(sys.prefix)") -numpyroot = $(shell python3.8 -c "import numpy,os; print(os.path.dirname(numpy.__file__))") -cxxflags = -std=c++17 -O3 -fPIC -I$(pythonroot)/include/python3.8 -I$(numpyroot)/core/include/ +pythonversion = $(shell python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") +pythonroot = $(shell "python$(pythonversion)" -c "import sys; print(sys.prefix)") +numpyroot = $(shell "python$(pythonversion)" -c "import numpy,os; print(os.path.dirname(numpy.__file__))") +cxxflags = -std=c++17 -O3 -fPIC -I$(pythonroot)/include/python$(pythonversion) -I$(numpyroot)/core/include/ sources = $(wildcard src/*.cpp) targets := $(patsubst %.cpp,%.o,$(sources)) depends := $(patsubst %.cpp,%.d,$(sources)) -.PHONY: default clean test; +.PHONY: default clean testtsp testtsp_ne testcvrp; default: insertion.so; diff --git a/utils/insertion/src/randomInsertion.cpp b/utils/insertion/src/randomInsertion.cpp index c529a97..aa50388 100644 --- a/utils/insertion/src/randomInsertion.cpp +++ b/utils/insertion/src/randomInsertion.cpp @@ -55,7 +55,8 @@ unsigned *Insertion::randomInsertion(unsigned *order) node2->next = node1; node1->next = node2; route = node1; - node1->length = node2->length = tspi->getdist(node1->value, node2->value); + node2->length = tspi->getdist(node1->value, node2->value); + node1->length = tspi->getdist(node2->value, node1->value); } for (unsigned i = 2; i < cc; i++) @@ -68,7 +69,7 @@ unsigned *Insertion::randomInsertion(unsigned *order) // get target list and distances // and get insert position with minimum cost Node *thisnode = route, *nextnode = thisnode->next; - float thisdist = tspi->getdist(thisnode->value, city), nextdist = 0; + float thisdist = 0, nextdist = 0; Node *minnode = thisnode; float mindelta = INFINITY; float td = 0.0, nd = 0.0; @@ -76,14 +77,15 @@ unsigned *Insertion::randomInsertion(unsigned *order) for (unsigned j = 0; j < i; j++) { nextnode = thisnode->next; - nextdist = tspi->getdist(nextnode->value, city); + thisdist = tspi->getdist(thisnode->value, city); + nextdist = tspi->getdist(city, nextnode->value); float delta = thisdist + nextdist - nextnode->length; if (delta < mindelta) { mindelta = delta, minnode = thisnode; td = thisdist, nd = nextdist; } - thisnode = nextnode, thisdist = nextdist; + thisnode = nextnode; } // insert the selected node @@ -135,4 +137,4 @@ Node::~Node() { if (next != nullptr) delete next; -} \ No newline at end of file +}