Skip to content

Commit 612a58a

Browse files
committed
fixing tests to exclude unchanged rows
1 parent 78498f0 commit 612a58a

File tree

4 files changed

+20
-176
lines changed

4 files changed

+20
-176
lines changed

src/cli/src/parse_and_run.rs

-28
Original file line numberDiff line numberDiff line change
@@ -1003,34 +1003,6 @@ async fn p_diff(sub_matches: &ArgMatches, is_remote: bool) {
10031003
}
10041004
}
10051005

1006-
// pub async fn diff(sub_matches: &ArgMatches) {
1007-
// let is_remote = false;
1008-
// p_diff(sub_matches, is_remote).await
1009-
// }
1010-
1011-
// async fn p_diff(sub_matches: &ArgMatches, is_remote: bool) {
1012-
// // First arg is optional
1013-
// let file_or_commit_id = sub_matches
1014-
// .get_one::<String>("FILE_OR_REVISION")
1015-
// .expect("required");
1016-
// let path = sub_matches.get_one::<String>("PATH");
1017-
// if let Some(path) = path {
1018-
// match dispatch::diff(Some(file_or_commit_id), path, is_remote).await {
1019-
// Ok(_) => {}
1020-
// Err(err) => {
1021-
// eprintln!("{err}")
1022-
// }
1023-
// }
1024-
// } else {
1025-
// match dispatch::diff(None, file_or_commit_id, is_remote).await {
1026-
// Ok(_) => {}
1027-
// Err(err) => {
1028-
// eprintln!("{err}")
1029-
// }
1030-
// }
1031-
// }
1032-
// }
1033-
10341006
pub async fn clone(sub_matches: &ArgMatches) {
10351007
let url = sub_matches.get_one::<String>("URL").expect("required");
10361008
let shallow = sub_matches.get_flag("shallow");

src/lib/src/api/local/compare.rs

+8-85
Original file line numberDiff line numberDiff line change
@@ -190,16 +190,9 @@ pub fn get_cached_compare(
190190
path: compare_entry_2.path.to_str().map(|s| s.to_owned()).unwrap(),
191191
};
192192

193-
// let match_df = tabular::read_df(get_compare_match_path(repo, compare_id), DFOpts::empty())?;
194193
let diff_df = tabular::read_df(get_compare_diff_path(repo, compare_id), DFOpts::empty())?;
195-
// let left_only_df = tabular::read_df(get_compare_left_path(repo, compare_id), DFOpts::empty())?;
196-
// let right_only_df =
197-
// tabular::read_df(get_compare_right_path(repo, compare_id), DFOpts::empty())?;
198194

199-
// let match_schema = Schema::from_polars(&match_df.schema());
200195
let diff_schema = Schema::from_polars(&diff_df.schema());
201-
// let left_only_schema = Schema::from_polars(&left_only_df.schema());
202-
// let right_only_schema = Schema::from_polars(&right_only_df.schema());
203196

204197
let source_df_left = CompareSourceDF::from_name_df_entry_schema(
205198
LEFT,
@@ -214,14 +207,6 @@ pub fn get_cached_compare(
214207
right_schema.schema.clone(),
215208
);
216209

217-
// let derived_df_match = CompareDerivedDF::from_compare_info(
218-
// MATCH,
219-
// Some(compare_id),
220-
// Some(&left_commit),
221-
// Some(&right_commit),
222-
// match_df,
223-
// match_schema,
224-
// );
225210
let derived_df_diff = CompareDerivedDF::from_compare_info(
226211
DIFF,
227212
Some(compare_id),
@@ -230,34 +215,14 @@ pub fn get_cached_compare(
230215
diff_df,
231216
diff_schema,
232217
);
233-
// let derived_df_left_only = CompareDerivedDF::from_compare_info(
234-
// LEFT_ONLY,
235-
// Some(compare_id),
236-
// Some(&left_commit),
237-
// Some(&right_commit),
238-
// left_only_df,
239-
// left_only_schema,
240-
// );
241-
// let derived_df_right_only = CompareDerivedDF::from_compare_info(
242-
// RIGHT_ONLY,
243-
// Some(compare_id),
244-
// Some(&left_commit),
245-
// Some(&right_commit),
246-
// right_only_df,
247-
// right_only_schema,
248-
// );
249218

250219
let source_dfs: HashMap<String, CompareSourceDF> = HashMap::from([
251220
(LEFT.to_string(), source_df_left),
252221
(RIGHT.to_string(), source_df_right),
253222
]);
254223

255-
let derived_dfs: HashMap<String, CompareDerivedDF> = HashMap::from([
256-
// (MATCH.to_string(), derived_df_match),
257-
(DIFF.to_string(), derived_df_diff),
258-
// (LEFT_ONLY.to_string(), derived_df_left_only),
259-
// (RIGHT_ONLY.to_string(), derived_df_right_only),
260-
]);
224+
let derived_dfs: HashMap<String, CompareDerivedDF> =
225+
HashMap::from([(DIFF.to_string(), derived_df_diff)]);
261226

262227
let compare_results = CompareTabular {
263228
source: source_dfs,
@@ -561,29 +526,14 @@ fn build_compare_tabular(
561526
compare_tabular_raw: &CompareTabularRaw,
562527
compare_id: Option<&str>,
563528
) -> CompareTabular {
564-
// let left_only_df = &compare_tabular_raw.left_only_df;
565-
// let right_only_df = &compare_tabular_raw.right_only_df;
566529
let diff_df = &compare_tabular_raw.diff_df.clone();
567-
// let match_df = &compare_tabular_raw.match_df.clone();
568530

569531
let diff_schema = Schema::from_polars(&diff_df.schema());
570-
// let match_schema = Schema::from_polars(&match_df.schema());
571-
// let left_only_schema = Schema::from_polars(&left_only_df.schema());
572-
// let right_only_schema = Schema::from_polars(&right_only_df.schema());
573532

574533
let df_1_size = DataFrameSize::from_df(df_1);
575534
let df_2_size = DataFrameSize::from_df(df_2);
576535
let og_schema_1 = Schema::from_polars(&df_1.schema());
577536
let og_schema_2 = Schema::from_polars(&df_2.schema());
578-
579-
// let derived_df_match = CompareDerivedDF::from_compare_info(
580-
// MATCH,
581-
// compare_id,
582-
// compare_entry_1.commit_entry.as_ref(),
583-
// compare_entry_2.commit_entry.as_ref(),
584-
// match_df.clone(),
585-
// match_schema,
586-
// );
587537
let derived_df_diff = CompareDerivedDF::from_compare_info(
588538
DIFF,
589539
compare_id,
@@ -592,22 +542,6 @@ fn build_compare_tabular(
592542
diff_df.clone(),
593543
diff_schema,
594544
);
595-
// let derived_df_left_only = CompareDerivedDF::from_compare_info(
596-
// LEFT_ONLY,
597-
// compare_id,
598-
// compare_entry_1.commit_entry.as_ref(),
599-
// compare_entry_2.commit_entry.as_ref(),
600-
// left_only_df.clone(),
601-
// left_only_schema,
602-
// );
603-
// let derived_df_right_only = CompareDerivedDF::from_compare_info(
604-
// RIGHT_ONLY,
605-
// compare_id,
606-
// compare_entry_1.commit_entry.as_ref(),
607-
// compare_entry_2.commit_entry.as_ref(),
608-
// right_only_df.clone(),
609-
// right_only_schema,
610-
// );
611545

612546
let source_df_left = CompareSourceDF {
613547
name: LEFT.to_string(),
@@ -638,12 +572,8 @@ fn build_compare_tabular(
638572
(RIGHT.to_string(), source_df_right),
639573
]);
640574

641-
let derived_dfs: HashMap<String, CompareDerivedDF> = HashMap::from([
642-
// (MATCH.to_string(), derived_df_match),
643-
(DIFF.to_string(), derived_df_diff),
644-
// (LEFT_ONLY.to_string(), derived_df_left_only),
645-
// (RIGHT_ONLY.to_string(), derived_df_right_only),
646-
]);
575+
let derived_dfs: HashMap<String, CompareDerivedDF> =
576+
HashMap::from([(DIFF.to_string(), derived_df_diff)]);
647577

648578
CompareTabular {
649579
source: source_dfs,
@@ -818,6 +748,7 @@ mod tests {
818748
compare_entry_2,
819749
keys,
820750
targets,
751+
vec![],
821752
None,
822753
);
823754

@@ -940,6 +871,7 @@ mod tests {
940871
String::from("gender"),
941872
],
942873
vec![String::from("target"), String::from("other_target")],
874+
vec![],
943875
None,
944876
)?;
945877

@@ -973,15 +905,10 @@ mod tests {
973905
.lazy()
974906
.filter(col(diff_col).eq(lit("modified")))
975907
.collect()?;
976-
let unchanged_df = df
977-
.lazy()
978-
.filter(col(diff_col).eq(lit("unchanged")))
979-
.collect()?;
980908

981909
assert_eq!(added_df.height(), 1);
982910
assert_eq!(removed_df.height(), 2);
983911
assert_eq!(modified_df.height(), 5);
984-
assert_eq!(unchanged_df.height(), 6);
985912

986913
// Update one of the files
987914
let path = Path::new("compare_left.csv");
@@ -1029,7 +956,7 @@ mod tests {
1029956
assert!(maybe_compare.is_none());
1030957

1031958
// Create the compare and add to the cache to ensure proper update
1032-
let result = api::local::compare::compare_files(
959+
api::local::compare::compare_files(
1033960
&repo,
1034961
Some("a_compare_id"),
1035962
new_compare_entry_1,
@@ -1040,6 +967,7 @@ mod tests {
1040967
String::from("gender"),
1041968
],
1042969
vec![String::from("target"), String::from("other_target")],
970+
vec![],
1043971
None,
1044972
)?;
1045973

@@ -1064,15 +992,10 @@ mod tests {
1064992
.lazy()
1065993
.filter(col(diff_col).eq(lit("modified")))
1066994
.collect()?;
1067-
let unchanged_df = df
1068-
.lazy()
1069-
.filter(col(diff_col).eq(lit("unchanged")))
1070-
.collect()?;
1071995

1072996
assert_eq!(added_df.height(), 6);
1073997
assert_eq!(removed_df.height(), 0);
1074998
assert_eq!(modified_df.height(), 0);
1075-
assert_eq!(unchanged_df.height(), 6);
1076999
Ok(())
10771000
})
10781001
}

src/lib/src/api/local/compare/join_compare.rs

-31
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ const DIFF_STATUS_COL: &str = ".oxen.diff.status";
1515
const DIFF_STATUS_ADDED: &str = "added";
1616
const DIFF_STATUS_REMOVED: &str = "removed";
1717
const DIFF_STATUS_MODIFIED: &str = "modified";
18-
const DIFF_STATUS_UNCHANGED: &str = "unchanged";
1918

2019
pub fn compare(
2120
df_1: &DataFrame,
@@ -202,36 +201,6 @@ fn calculate_modified_df(
202201
Ok(diff_df.select(output_columns)?)
203202
}
204203

205-
fn calculate_unchanged_df(
206-
df: &DataFrame,
207-
targets: Vec<&str>,
208-
keys: Vec<&str>,
209-
output_columns: &Vec<String>,
210-
) -> Result<DataFrame, OxenError> {
211-
// Mask behavior: if targets defined, return match on targets hash. Else, match on keys hash.
212-
// keys[0] is guaranteed to exist - if not specified, we've populated it with all columns earlier
213-
let match_mask = if targets.is_empty() {
214-
df.column(format!("{}.left", keys[0]).as_str())?
215-
.equal(df.column(format!("{}.right", keys[0]).as_str())?)?
216-
} else {
217-
df.column(format!("{}.left", TARGETS_HASH_COL).as_str())?
218-
.equal(df.column(format!("{}.right", TARGETS_HASH_COL).as_str())?)?
219-
};
220-
221-
let mut match_df = df.filter(&match_mask)?;
222-
223-
for key in keys.iter() {
224-
match_df.rename(&format!("{}.left", key), key)?;
225-
}
226-
227-
let match_df = match_df.with_column(Series::new(
228-
DIFF_STATUS_COL,
229-
vec![DIFF_STATUS_UNCHANGED; match_df.height()],
230-
))?;
231-
232-
Ok(match_df.select(output_columns)?)
233-
}
234-
235204
fn calculate_removed_df(
236205
df: &DataFrame,
237206
keys: Vec<&str>,

0 commit comments

Comments
 (0)