Skip to content

Commit

Permalink
Merge pull request #5 from annaformaniuk/feature/similarity_tools
Browse files Browse the repository at this point in the history
Similarity measures implemented with movingpandas trajectories (and tools for analysis)
  • Loading branch information
SbastianGarzon authored Jun 23, 2020
2 parents 62de2d8 + 35c3ecc commit 7bc0060
Show file tree
Hide file tree
Showing 3 changed files with 338 additions and 35 deletions.
2 changes: 1 addition & 1 deletion envirocar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
from .trajectories.preprocessing import Preprocessing
from .trajectories.preprocessing import GeneralizationType
from .trajectories.track_converter import TrackConverter
from .trajectories.track_similarity import TrackSimilarity
from .trajectories.track_similarity import *
170 changes: 136 additions & 34 deletions envirocar/trajectories/track_similarity.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,153 @@
import numpy as np
import pandas as pd
import similaritymeasures
from math import factorial
import matplotlib.pyplot as plt
from itertools import combinations
from timeit import default_timer as timer

class TrackSimilarity():

def __init__(self):
print("Initializing TrackSimilarity class")
def track_similarity(trajA,trajB,method):

def similarity(self, method,trajectoryA,trajectoryB):

""" Compute similarity measures using the similaritymeasures
""" Compute similarity measures using the similaritymeasures package
https://pypi.org/project/similaritymeasures/
Keyword Arguments:
method {string} -- Name of the method to compute similarity
pcm: Partial Curve Mapping
frechet_dist: Discrete Frechet distance
area_between_two_curves: Area method
curve_length_measure: Curve Length
dtw: Dynamic Time Warping
trajA {movingpandas trajectory} -- movingpandas trajectory
trajB {movingpandas trajectory} -- movingpandas trajectory
method {string} -- Name of the method to compute similarity
pcm: Partial Curve Mapping
frechet_dist: Discrete Frechet distance
area_between_two_curves: Area method
curve_length_measure: Curve Length
dtw: Dynamic Time Warping
trajectoryA {envirocar trajectory} -- Envirocar trajectory
trajectoryB {envirocar trajectory} -- Envirocar trajectory
Returns:
similarity -- Float value (0,1) corresponding to the computed similarity. Values close to 1 correspond to high similarity
dtw_matrix (optional) -- Only for the Dynamic Time Warping the method returns the calculation matrix.
"""

trajA_id=trajA.df['track.id'].unique()[0]
trajB_id=trajB.df['track.id'].unique()[0]

print("Similarity between Track",trajectoryA.id, "& Track",trajectoryB.id,"using",str(method),"method:")
#print("Similarity between Track",trajA_id, "& Track",trajB_id,"using",str(method),"method:")

methods=['pcm','frechet_dist','area_between_two_curves','curve_length_measure','dtw']
methods=['pcm','frechet_dist','area_between_two_curves','curve_length_measure','dtw']

trajA_np = np.zeros((trajA.df.count()[1], 2))
trajA_np[:, 0] = trajA.df['geometry'].x
trajA_np[:, 1] = trajA.df['geometry'].y

trajA_np=trajectoryA.get_coordinates()
trajB_np=trajectoryB.get_coordinates()
trajB_np = np.zeros((trajB.df.count()[1], 2))
trajB_np[:, 0] = trajB.df['geometry'].x
trajB_np[:, 1] = trajB.df['geometry'].y

if(method not in methods):
raise RuntimeError('Method not available')

else:
similarity_method=getattr(similaritymeasures,method)

if(method not in methods):
raise RuntimeError(
'Method not available')
if(method =='dtw'):
sim,dtw_matrix=similarity_method(trajA_np,trajB_np)
similarity = 1/(1+sim)
return similarity
else:
similarity_method=getattr(similaritymeasures,method)

if(method =='dtw'):
similarity,dtw_matrix = 1/(1+similarity_method(trajA_np,trajB_np))
print(similarity)
return similarity,dtw_matrix
else:
similarity = 1/(1+similarity_method(trajA_np,trajB_np))
print(similarity)
return similarity
similarity = 1/(1+similarity_method(trajA_np,trajB_np))
return similarity

def crossed_similarity(list_traj,method):

""" Compute similarity measures of a list of trajectories
Keyword Arguments:
list_traj {list} -- List containing movingpandas trajectories
method {string} -- Name of the method to compute similarity
pcm: Partial Curve Mapping
frechet_dist: Discrete Frechet distance
area_between_two_curves: Area method
curve_length_measure: Curve Length
dtw: Dynamic Time Warping
Returns:
df{dataframe} -- Dataframe with summary of similarity measures of all posible combinations from the trajectory list (list_traj)
"""

n=(len(list_traj))

if(n<=1):
raise RuntimeError('More than 1 trajectory is required')

number_comb=factorial(n)/(factorial(n-2)*factorial(2))

start = timer()
traj1_name=[]
traj2_name=[]
similarity=[]
i=0

for combo in combinations(list_traj,2):
traj1_name.append(combo[0].df['track.id'].unique()[0])
traj2_name.append(combo[1].df['track.id'].unique()[0])
simi=track_similarity(combo[0],combo[1],method)
similarity.append(simi)
i+=1

if (i%10==0 or i==number_comb):
print (round(i/number_comb*100,1), "% of ","calculations", sep='', end='\r')

df = pd.DataFrame(list(zip(traj1_name, traj2_name,similarity)),columns=['Trajectory_1','Trajectory_2','Correlation'])
df = df.sort_values(by=['Correlation'],ascending=False).reset_index(drop=True)

end= timer()
time=end-start

print("\n%s similarity measures in %0.2f seconds" %(i,time))
return(df)

def get_similarity_matrix(df):

""" Returns a similarity matrix using the crossed similarity dataframe
Keyword Arguments:
df{df} -- Crossed similarity dataframe
Returns:
df{dataframe} -- Similarity matrix of trajectories (Symmetric matrix)
"""

uniq_traj= np.unique(list(df['Trajectory_1'].unique())+list(df['Trajectory_2'].unique()))
number_uniqtraj=len(uniq_traj)

similarity_diagonal=[1] * number_uniqtraj
df_diagonal = pd.DataFrame(list(zip(uniq_traj, uniq_traj,similarity_diagonal)),columns=['Trajectory_1','Trajectory_2','Correlation'])
frames = [df, df_diagonal]
df= pd.concat(frames,ignore_index=True)

df = df.sort_values(by=['Correlation'],ascending=False).reset_index(drop=True)
df = df.pivot(index='Trajectory_1',columns='Trajectory_2',values='Correlation').copy()

df=df.transpose().fillna(0)+df.fillna(0)
df=df.replace(2,1)

return(df)

def plot_similarity_matrix(df_similarity_matrix,title):

""" Generates similarity matrix plot
Keyword Arguments:
df{dataframe} -- Similarity matrix of trajectories
"""

sum_corr=list(df_similarity_matrix.sum().sort_values(ascending=True).index.values)
df = df_similarity_matrix.sort_values(by=sum_corr).sort_index(axis=0,level=sum_corr)
f = plt.figure(figsize=(19, 15))
plt.matshow(df, fignum=f.number)
plt.title(title, y=1.2,fontsize=25)
plt.xticks(range(df.shape[1]), df.columns, fontsize=10, rotation=90)
plt.yticks(range(df.shape[1]), df.columns, fontsize=10)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=14)
201 changes: 201 additions & 0 deletions examples/trajectories_similarity.ipynb

Large diffs are not rendered by default.

0 comments on commit 7bc0060

Please sign in to comment.