Rdatatable · MichaelChirico · Jul 29, 2024 · May 27, 2020 · May 27, 2020 · May 27, 2020
@@ -109,6 +109,8 @@ unit = "s")
 
 13. A relatively rare case of segfault when combining non-equi joins with `by=.EACHI` is now fixed, closes [#4388](https://github.com/Rdatatable/data.table/issues/4388).
 
+14. Selecting keyed list columns will retain key without a performance penalty, closes [#4498](https://github.com/Rdatatable/data.table/issues/4498). Thanks to @user9439449 on StackOverflow for the report.
+
 ## NOTES
 
 0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto.

@@ -1336,9 +1336,25 @@ replace_dot_alias = function(e) {
 
     if (is.data.table(jval)) {
       setattr(jval, 'class', class(x)) # fix for #64
-      if (haskey(x) && all(key(x) %chin% names(jval)) && suppressWarnings(is.sorted(jval, by=key(x))))  # TO DO: perhaps this usage of is.sorted should be allowed internally then (tidy up and make efficient)
+      # can jval be sorted by the same key as x? improved for #4498
+      get_shared_keys = function(jsub, jvnames, key) {
+        if (is.null(key)) return(NULL)
+        if (!jsub %iscall% "list") return(NULL)
+        jnames = as.character(Filter(is.name, jsub)[-1L])
+        key_idx = chmatch(key, jnames)
+        missing_keys = which(is.na(key_idx))
+        if (length(missing_keys) && missing_keys[1L] == 1L) return(NULL)
+        if (!length(missing_keys)) return(jvnames[key_idx])
+        return(jvnames[head(key_idx, missing_keys[1L] - 1L)])
+      }
+      shared_keys = get_shared_keys(jsub, jvnames, key(x))
+      if (is.null(irows) && !is.null(shared_keys)) {
+        setattr(jval, 'sorted', shared_keys)
+        # potentially inefficient backup -- check if jval is sorted by key(x)
+      } else if (haskey(x) && all(key(x) %chin% names(jval)) && suppressWarnings(is.sorted(jval, by=key(x)))) { # TO DO: perhaps this usage of is.sorted should be allowed internally then (tidy up and make efficient)
         setattr(jval, 'sorted', key(x))
-      if (any(sapply(jval, is.null))) stop("Internal error: j has created a data.table result containing a NULL column") # nocov
+      }
+      if (any(vapply_1b(jval, is.null))) stop("Internal error: j has created a data.table result containing a NULL column") # nocov
     }
     return(jval)
   }

@@ -3641,7 +3641,7 @@ test(1118, dt[, lapply(.SD, function(y) weighted.mean(y, b2, na.rm=TRUE)), by=x]
 
 # a(nother) test of #295
 DT <- data.table(x=5:1, y=1:5, key="y")
-test(1119, is.null(key(DT[, list(z = y, y = 1/y)])))
+test(1119, key(DT[, list(z = y, y = 1/y)]), 'z')
 
 ## various ordered factor rbind tests
 DT1 = data.table(ordered('a', levels = c('a','b','c')))
@@ -16861,3 +16861,10 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN))
 test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))
 A = data.table(A=as.complex(rep(NA, 5)))
 test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))
+
+# sub-key can also be retained in plain query, part of #4498
+DT = data.table(id = rep(1:10, 2L), grp = rep(1:2, each=10L), V = 1:20/13, key=c('id', 'grp'))
+test(2139.1, key(DT[ , .(id)]), 'id')
+test(2139.2, key(DT[ , .(grp)]), NULL)
+## renaming also caught
+test(2139.3, key(DT[ , .(newid = id, newgrp = grp)]), c('newid', 'newgrp'))