Closes #700.

Conflicts: README.md inst/tests/tests.Rraw
Rdatatable · Jun 23, 2014 · 6056698 · 6056698
2 parents b4b8e9f + bd2641a
commit 6056698
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -219,6 +219,8 @@ DT[, list(.N, mean(y), sum(y)), by=x] # 1.9.3+ - will use GForce.
 
   *  `DT[, list(m1 = eval(expr1), m2=eval(expr2)), by=val]` where `expr1` and `expr2` are constructed using `parse(text=.)` now works instead of resulting in error. Closes #5732 (git [#472](https://github.com/Rdatatable/data.table/issues/472)). Thanks to Benjamin Barnes for reporting with a nice reproducible example.
 
+  *  A join of the form `X[Y, roll=TRUE, nomatch=0L]` where some of Y's key columns occur more than once (duplicated keys) might at times return incorrect join. This was introduced only in 1.9.2 and is fixed now. Closes [#700](https://github.com/Rdatatable/data.table/issues/472). Thanks to Michael Smith for the very nice reproducible example and nice spotting of such a tricky case.
+
 #### NOTES
 
   *  Reminder: using `rolltolast` still works but since v1.9.2 now issues the following warning :

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
@@ -4689,6 +4689,17 @@ if ("package:reshape2" %in% search()) {
     test(1316.4, melt(x, id="d", measure="a"), error="Column 'd' not found in 'data'")
 }
 
+# bug 700 - bmerge, roll=TRUE and nomatch=0L when i's key group occurs more than once
+dt1 <- data.table(structure(list(x = c(7L, 33L), y = structure(c(15912, 15912), class = "Date"), z = c(626550.35284, 7766.385)), .Names =
+c("x", "y", "z"), class = "data.frame", row.names = c(NA, -2L)), key = "x,y")
+dt2 <- data.table(structure(list(x = c(7L, 7L, 33L, 33L, 33L, 33L), y = structure(c(15884, 15917, 15884, 15884, 15917, 15917), class = "Date"), w = c(-0.118303, 0.141225, -0.03137, -0.02533, 0.045967, 0.043694)), .Names = c("x", "y", "w"), class = "data.frame", row.names = c(NA, -6L)), key = "x,y")
+test(1317.1, dt1[dt2, roll=TRUE, nomatch=0L], data.table(x=c(7L,33L,33L), y=as.Date(c("2013-07-31", "2013-07-31", "2013-07-31")), z=c(dt1$z[1:2], dt1$z[2]), w=c(dt2$w[2], dt2$w[5:6]), key="x,y"))
+
+# also test where 'i' is not sorted.
+set.seed(1L)
+dt2 <- dt2[sample(nrow(dt2))] # key should be gone
+test(1317.2, dt1[dt2, roll=TRUE, nomatch=0L], data.table(x=c(7L,33L,33L), y=as.Date(c("2013-07-31", "2013-07-31", "2013-07-31")), z=c(dt1$z[1:2], dt1$z[2]), w=c(dt2$w[1], dt2$w[c(2,6)]), key="x,y"))
+
 # bug fix for #472 : "parse" in j
 set.seed(100)
 nrow <- 100L

diff --git a/src/bmerge.c b/src/bmerge.c
@@ -313,7 +313,7 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int lowma
             for (j=ilow+1; j<iupp; j++) {                 // will rewrite retFirst[ir] to itself, but that's ok
                 if (o) k=o[j]-1; else k=j;
                 retFirst[k] = retFirst[ir];
-                retLength[k]= 1; 
+                retLength[k]= retLength[ir]; 
             }
         }
     }