forked from tidyverse/stringr
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
313 additions
and
316 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#' Detect the presence or absence of a pattern in a string. | ||
#' | ||
#' @param string input character vector | ||
#' @param pattern pattern to look for. See \code{\link{regex}} for | ||
#' description. | ||
#' @return boolean vector | ||
#' @seealso \code{\link{grepl}} which this function wraps | ||
#' @keywords character | ||
#' @examples | ||
#' fruit <- c("apple", "banana", "pear", "pinapple") | ||
#' str_detect(fruit, "a") | ||
#' str_detect(fruit, "^a") | ||
#' str_detect(fruit, "a$") | ||
#' str_detect(fruit, "b") | ||
#' str_detect(fruit, "[aeiou]") | ||
str_detect <- function(string, pattern) { | ||
results <- grepl(pattern, string) | ||
is.na(results) <- is.na(string) | ||
|
||
results | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#' Duplicate strings within a character vector. | ||
#' | ||
#' @param string input character vector | ||
#' @param times number of times to duplicate each string | ||
#' @return character vector | ||
#' @keywords internal | ||
str_dup <- function(string, times) { | ||
# rep_matrix <- matrix(rep(string, times = times), nrow = times) | ||
strings <- mlply(cbind(x = string, times), rep.int) | ||
output <- unlist(llply(strings, str_join, collapse = "")) | ||
|
||
names(output) <- names(string) | ||
output | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#' Extract first piece of a string that matches a pattern. | ||
#' | ||
#' @param string input character vector | ||
#' @param pattern pattern to look for. See \code{\link{regex}} for | ||
#' description. | ||
#' @return character vector. | ||
#' @keywords character | ||
#' @seealso \code{\link{str_extract_all}} to extract all matches | ||
#' @examples | ||
#' shopping_list <- c("apples x4", "flour", "sugar", "milk x2") | ||
#' str_extract(shopping_list, "\\d") | ||
#' str_extract(shopping_list, "[a-z]+") | ||
#' str_extract(shopping_list, "[a-z]{1,4}") | ||
#' str_extract(shopping_list, "\\b[a-z]{1,4}\\b") | ||
str_extract <- function(string, pattern) { | ||
positions <- str_locate(string, pattern) | ||
str_sub(string, positions[, "start"], positions[, "end"]) | ||
} | ||
|
||
|
||
#' Extract all pieces of a string that match a pattern. | ||
#' | ||
#' @param string input character vector | ||
#' @param pattern pattern to look for. See \code{\link{regex}} for | ||
#' description. | ||
#' @return list of character vectors. | ||
#' @keywords character | ||
#' @seealso \code{\link{str_extract}} to extract the first match | ||
#' @examples | ||
#' shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2") | ||
#' str_extract_all(shopping_list, "[a-z]+") | ||
#' str_extract_all(shopping_list, "\\b[a-z]+\\b") | ||
#' str_extract_all(shopping_list, "\\d") | ||
str_extract_all <- function(string, pattern) { | ||
positions <- str_locate_all(string, pattern) | ||
llply(seq_along(string), function(i) { | ||
position <- positions[[i]] | ||
str_sub(string[i], position[, "start"], position[, "end"]) | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#' Join multiple strings into a single string. | ||
#' | ||
#' @param ... one or more character vectors. Zero length arguments | ||
#' are removed | ||
#' @param sep string to insert between input vectors | ||
#' @param collapse optional string used to combine input vectors into single | ||
#' string | ||
#' @return If \code{collapse = NULL} (the default) a character vector with | ||
#' length equal to the longest input string. If \code{collapse} is non- | ||
#' NULL, a character vector of length 1. | ||
#' @keywords character | ||
#' @seealso \code{\link{paste}} which this function wraps | ||
#' @examples | ||
#' str_join("Letter: ", letters) | ||
#' str_join("Letter", letters, sep = ": ") | ||
#' str_join(letters, " is for", "...") | ||
#' str_join(letters[-26], " comes before ", letters[-1]) | ||
#' | ||
#' str_join(letters, collapse = "") | ||
#' str_join(letters, collapse = ", ") | ||
str_join <- function(..., sep = "", collapse = NULL) { | ||
strings <- Filter(function(x) length(x) > 0, list(...)) | ||
|
||
do.call("paste", c(strings, list(sep = sep, collapse = collapse))) | ||
} | ||
|
||
#' Pad a string. | ||
#' | ||
#' @param string input character vector | ||
#' @param width pad strings to this minimum width | ||
#' @param side side on which padding character is added | ||
#' @param pad padding character (default is a space) | ||
#' @return character vector | ||
#' @keywords character | ||
#' @examples | ||
#' rbind( | ||
#' str_pad("hadley", 30, "left"), | ||
#' str_pad("hadley", 30, "right"), | ||
#' str_pad("hadley", 30, "center") | ||
#' ) | ||
#' # Longer strings are returned unchanged | ||
#' str_pad("hadley", 3) | ||
str_pad <- function(string, width, side = "left", pad = " ") { | ||
stopifnot(length(width) == 1) | ||
stopifnot(length(side) == 1) | ||
stopifnot(length(pad) == 1) | ||
|
||
side <- match.arg(side, c("left", "right", "center")) | ||
needed <- pmax(0, width - str_length(string)) | ||
|
||
left <- switch(side, | ||
left = needed, right = 0, center = floor(needed / 2)) | ||
right <- switch(side, | ||
left = 0, right = needed, center = ceiling(needed / 2)) | ||
|
||
str_join(str_dup(pad, left), string, str_dup(pad, right)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#' The length of a string (in characters). | ||
#' | ||
#' @param string input character vector | ||
#' @return numeric vector giving number of characters in each element of the | ||
#' character vector. Missing string have missing length. | ||
#' @keywords character | ||
#' @seealso \code{\link{nchar}} which this function wraps | ||
#' @examples | ||
#' str_length(letters) | ||
#' str_length(c("i", "like", "programming", NA)) | ||
str_length <- function(string) { | ||
string <- as.character(string) | ||
nc <- nchar(string) | ||
is.na(nc) <- is.na(string) | ||
nc | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#' Locate the position of the first occurence of a pattern in a string. | ||
#' | ||
#' @param string input character vector | ||
#' @param pattern pattern to look for. See \code{\link{regex}} for | ||
#' description. | ||
#' @return numeric matrix. First column gives start postion of match, and | ||
#' second column gives end position. | ||
#' @keywords character | ||
#' @seealso | ||
#' \code{\link{regexpr}} which this function wraps | ||
#' | ||
#' \code{\link{str_extract}} for a convenient way of extracting matches | ||
# | ||
#' \code{\link{str_locate_all}} to locate position of all matches | ||
#' | ||
#' @examples | ||
#' fruit <- c("apple", "banana", "pear", "pinapple") | ||
#' str_locate(fruit, "a") | ||
#' str_locate(fruit, "e") | ||
str_locate <- function(string, pattern) { | ||
match <- regexpr(pattern, string) | ||
|
||
start <- as.vector(match) | ||
end <- start + attr(match, "match.length") - 1 | ||
|
||
missing <- start == -1 | ||
start[missing] <- NA | ||
end[missing] <- NA | ||
|
||
cbind(start = start, end = end) | ||
} | ||
|
||
#' Locate the position of all occurences of a pattern in a string. | ||
#' | ||
#' @param string input character vector | ||
#' @param pattern pattern to look for. See \code{\link{regex}} for | ||
#' description. | ||
#' @keywords character | ||
#' @return list of numeric matrices. First column gives start postion of | ||
#' match, and second column gives end position. | ||
#' @seealso | ||
#' \code{\link{regexpr}} which this function wraps | ||
#' | ||
#' \code{\link{str_extract}} for a convenient way of extracting matches | ||
#' | ||
#' \code{\link{str_locate}} to locate position of first match | ||
#' | ||
#' @examples | ||
#' fruit <- c("apple", "banana", "pear", "pinapple") | ||
#' str_locate_all(fruit, "a") | ||
#' str_locate_all(fruit, "e") | ||
str_locate_all <- function(string, pattern) { | ||
matches <- gregexpr(pattern, string) | ||
|
||
null <- matrix(0, nrow = 0, ncol = 2) | ||
colnames(null) <- c("start", "end") | ||
|
||
llply(matches, function(match) { | ||
if (length(match) == 1 && match == -1) return(null) | ||
|
||
start <- as.vector(match) | ||
end <- start + attr(match, "match.length") - 1 | ||
cbind(start = start, end = end) | ||
}) | ||
} | ||
|
Oops, something went wrong.