test.py

from __future__ import absolute_import, division, print_function

import os
import sys
import glob
import argparse
import numpy as np
import PIL.Image as pil
from PIL import Image

import matplotlib.pyplot as plt

import torch
import model
from torchvision import transforms, datasets

import os
import cv2
#from layers import disp_to_depth
#from utils import download_model_if_doesnt_exist


def get_args():
	parser = argparse.ArgumentParser(description="Testing arguments for MonoLayout")
	parser.add_argument("--image_path", type=str,
						help="path to folder of images", required=True)
	parser.add_argument("--model_path", type=str,
						help="path to MonoLayout model", required=True)
	parser.add_argument("--ext", type=str,
						default="png", help="extension of images in the folder")
	parser.add_argument("--out_dir", type=str,
						default="output directory to save topviews")
	parser.add_argument("--type", type=str,
						default="static/dynamic/both")

	return parser.parse_args()


def save_topview(idx, tv, name_dest_im):
	tv_np = tv.squeeze().cpu().numpy()
	true_top_view = np.zeros((tv_np.shape[1], tv_np.shape[2]))
	true_top_view[tv_np[1] > tv_np[0]] = 255
	dir_name = os.path.dirname(name_dest_im)
	if not os.path.exists(dir_name):
		os.makedirs(dir_name)
	# print(name_dest_im)
	# im = Image.fromarray(true_top_view)
	# im.save(name_dest_im)
	cv2.imwrite(name_dest_im, true_top_view)

	print("Saved prediction to {}".format(name_dest_im))


def test(args):
	models = {}
	device = torch.device("cuda")
	encoder_path = os.path.join(args.model_path, "encoder.pth")
	encoder_dict = torch.load(encoder_path, map_location=device)
	feed_height = encoder_dict["height"]
	feed_width = encoder_dict["width"]
	models["encoder"] = model.Encoder(18, feed_width, feed_height, False)
	filtered_dict_enc = {k: v for k, v in encoder_dict.items() if k in models["encoder"].state_dict()}
	models["encoder"].load_state_dict(filtered_dict_enc)

	if args.type == "both":
		static_decoder_path = os.path.join(args.model_path, "static_decoder.pth")
		dynamic_decoder_path = os.path.join(args.model_path, "dynamic_decoder.pth")
		models["static_decoder"] = model.Decoder(models["encoder"].resnet_encoder.num_ch_enc)
		models["static_decoder"].load_state_dict(torch.load(static_decoder_path, map_location=device))
		models["dynamic_decoder"] = model.Decoder(models["encoder"].resnet_encoder.num_ch_enc)
		models["dynamic_decoder"].load_state_dict(torch.load(dynamic_decoder_path, map_location=device))
	else:
		decoder_path = os.path.join(args.model_path, "decoder.pth")
		models["decoder"] = model.Decoder(models["encoder"].resnet_encoder.num_ch_enc)
		models["decoder"].load_state_dict(torch.load(decoder_path, map_location=device))

	for key in models.keys():
		models[key].to(device)
		models[key].eval()


	if os.path.isfile(args.image_path):
	    # Only testing on a single image
	    paths = [args.image_path]
	    output_directory = os.path.dirname(args.image_path)
	elif os.path.isdir(args.image_path):
	    # Searching folder for images
	    paths = glob.glob(os.path.join(args.image_path, '*.{}'.format(args.ext)))
	    output_directory = args.out_dir
	    try:
	        os.mkdir(output_directory)
	    except:
	        pass
	else:
	    raise Exception("Can not find args.image_path: {}".format(args.image_path))


	print("-> Predicting on {:d} test images".format(len(paths)))

	# PREDICTING ON EACH IMAGE IN TURN
	with torch.no_grad():
	    for idx, image_path in enumerate(paths):

	        # Load image and preprocess
	        input_image = pil.open(image_path).convert('RGB')
	        original_width, original_height = input_image.size
	        input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS)
	        input_image = transforms.ToTensor()(input_image).unsqueeze(0)

	        # PREDICTION
	        input_image = input_image.to(device)
	        features = models["encoder"](input_image)
	        output_name = os.path.splitext(os.path.basename(image_path))[0]
	        print(" Processing {:d} of {:d} images - ".format(idx + 1, len(paths)))
	        if args.type=="both":
	            static_tv = models["static_decoder"](features, is_training=False)
	            dynamic_tv = models["dynamic_decoder"](features, is_training=False)
	            save_topview(idx, static_tv, os.path.join(args.out_dir, "static",  "{}.png".format(output_name)))
	            save_topview(idx, dynamic_tv, os.path.join(args.out_dir, "dynamic",  "{}.png".format(output_name)))
	        else:
	        	tv = models["decoder"](features, is_training= False)
	        	save_topview(idx, tv, os.path.join(args.out_dir, args.type,  "{}.png".format(output_name)))


	print('-> Done!')


if __name__ == "__main__":
	args = get_args()
	test(args)