config.yaml

# 1. Full path to the first dataset (.xlsx or .csv) variable on the rows (see demo examples)
# first dataset (processed features or not depending on how you compute the similarity distances)
Data_X: "path/to/Data_X.xlsx_or_.csv"
# second dataset (processed features or not depending on how you compute the similarity distances)
Data_Y: "path/to/Data_Y.xlsx_or_.csv"
true_labels_file: "path/to/true_labels_file.xlsx_or_.csv" # Optional
# 2. What distance to compute as similarity (default metric parameter from scipy.spatial.distance.pdist or "precomputed")
# for first dataset --- use "precomputed" if you provide it as not Euclidean, "precomputed_Euclidean" if you provide it as Euclidean 
sim_X: "Euclidean"
# if sim_X is not Euclidean, embed it
Eucl_X: "TRUE"
# for second dataset --- use "precomputed" if you provide it as not Euclidean, "precomputed_Euclidean" if you provide it as Euclidean 
sim_Y: "Euclidean"
# if sim_Y is not Euclidean, embed it 
Eucl_Y: "TRUE"
# 3. Distance to use as association, must be given
d_XY: "path/to/association_XY.xlsx"
# 4. If sim_X is "precomputed" then give d_X, otherwise d_X is not used but please use a placeholder file (any file will do)
d_X: "path/to/similarity_X.xlsx"
# 5. If sim_Y is "precomputed" then give d_Y, otherwise d_Y is not used but please use a placeholder file (any file will do)
d_Y: "path/to/similarity_Y.xlsx"
# 6. OPTIONAL: embed distance of points to origin / magnitudes
norm_X: "TRUE"
norm_Y: "TRUE"
# 7a. Clustering parameters (Option: See README.md
clustering_method: "Agglomerative_ward" 
num_clust: 4
# 7b. OPTIONAL: Get Silhouette, Elbow, and Distortion to help choose the number of cluster
eval_clust: TRUE ### must be FALSE when there are too few points in X u Y
# choose range of evaluation
min_num: 2
max_num: 6 ## cannot be larger than the total number of points in X and Y
# 8. OPTIONAL: Get 2-dimensional projection
fig_miasa: TRUE
# 9. If above is TRUE, provide parameters "UMAP" or "t-SNE", "MDS", "Isomap" (sklearm.manifold)
fig_method: "UMAP"
# UMAP and t-SNE parameters, cannot be larger than the total number of points in X and Y
n_neighbors: 15
palette: "tab20" # seaborn palette parameter
min_dist: 0.99 # only used in UMAP
show_labels: FALSE # show the datapoints and labels or not (use short variable names, figure may become crowed)
convex_hull: TRUE # fill convex_hull of predicted cluster
size_X: 250 # size of markers for points in X
size_Y: 250 # size of markers for points in Y
marker_X: "o" # matplotlib marker for points in X
marker_Y: "^" # matplotlib marker for points in Y
num_col: 2 # number of panels on figure columns
# OPTIONAL: Strong associations, i.e. distance bellow some threshold
fig_miasa_connect: FALSE
connect_threshold: "none" ### give a number stictly bellow which association distance defines a connection
connection_file: "path/to/connection_file_of_same_type_as_d_XY" ### if connection_threshold is none, give a table of 0 (absence of connection) or 1 (presence of connection) between each X and Y variables (same structure as association_XY)
connect_center: "Y" # choose the centre of the connection lines, either variable "X" or variable "Y"