Skip to content


Split into separate files
Browse files Browse the repository at this point in the history
  • Loading branch information
hadley committed Nov 11, 2009
1 parent 4befff9 commit 2a30b2e
Show file tree
Hide file tree
Showing 11 changed files with 313 additions and 316 deletions.
144 changes: 0 additions & 144 deletions R/basics.r

This file was deleted.

22 changes: 22 additions & 0 deletions R/detect.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' Detect the presence or absence of a pattern in a string.
#' @param string input character vector
#' @param pattern pattern to look for. See \code{\link{regex}} for
#' description.
#' @return boolean vector
#' @seealso \code{\link{grepl}} which this function wraps
#' @keywords character
#' @examples
#' fruit <- c("apple", "banana", "pear", "pinapple")
#' str_detect(fruit, "a")
#' str_detect(fruit, "^a")
#' str_detect(fruit, "a$")
#' str_detect(fruit, "b")
#' str_detect(fruit, "[aeiou]")
str_detect <- function(string, pattern) {
results <- grepl(pattern, string) <-


14 changes: 14 additions & 0 deletions R/dup.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#' Duplicate strings within a character vector.
#' @param string input character vector
#' @param times number of times to duplicate each string
#' @return character vector
#' @keywords internal
str_dup <- function(string, times) {
# rep_matrix <- matrix(rep(string, times = times), nrow = times)
strings <- mlply(cbind(x = string, times),
output <- unlist(llply(strings, str_join, collapse = ""))

names(output) <- names(string)
40 changes: 40 additions & 0 deletions R/extract.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' Extract first piece of a string that matches a pattern.
#' @param string input character vector
#' @param pattern pattern to look for. See \code{\link{regex}} for
#' description.
#' @return character vector.
#' @keywords character
#' @seealso \code{\link{str_extract_all}} to extract all matches
#' @examples
#' shopping_list <- c("apples x4", "flour", "sugar", "milk x2")
#' str_extract(shopping_list, "\\d")
#' str_extract(shopping_list, "[a-z]+")
#' str_extract(shopping_list, "[a-z]{1,4}")
#' str_extract(shopping_list, "\\b[a-z]{1,4}\\b")
str_extract <- function(string, pattern) {
positions <- str_locate(string, pattern)
str_sub(string, positions[, "start"], positions[, "end"])

#' Extract all pieces of a string that match a pattern.
#' @param string input character vector
#' @param pattern pattern to look for. See \code{\link{regex}} for
#' description.
#' @return list of character vectors.
#' @keywords character
#' @seealso \code{\link{str_extract}} to extract the first match
#' @examples
#' shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2")
#' str_extract_all(shopping_list, "[a-z]+")
#' str_extract_all(shopping_list, "\\b[a-z]+\\b")
#' str_extract_all(shopping_list, "\\d")
str_extract_all <- function(string, pattern) {
positions <- str_locate_all(string, pattern)
llply(seq_along(string), function(i) {
position <- positions[[i]]
str_sub(string[i], position[, "start"], position[, "end"])
57 changes: 57 additions & 0 deletions R/join.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#' Join multiple strings into a single string.
#' @param ... one or more character vectors. Zero length arguments
#' are removed
#' @param sep string to insert between input vectors
#' @param collapse optional string used to combine input vectors into single
#' string
#' @return If \code{collapse = NULL} (the default) a character vector with
#' length equal to the longest input string. If \code{collapse} is non-
#' NULL, a character vector of length 1.
#' @keywords character
#' @seealso \code{\link{paste}} which this function wraps
#' @examples
#' str_join("Letter: ", letters)
#' str_join("Letter", letters, sep = ": ")
#' str_join(letters, " is for", "...")
#' str_join(letters[-26], " comes before ", letters[-1])
#' str_join(letters, collapse = "")
#' str_join(letters, collapse = ", ")
str_join <- function(..., sep = "", collapse = NULL) {
strings <- Filter(function(x) length(x) > 0, list(...))"paste", c(strings, list(sep = sep, collapse = collapse)))

#' Pad a string.
#' @param string input character vector
#' @param width pad strings to this minimum width
#' @param side side on which padding character is added
#' @param pad padding character (default is a space)
#' @return character vector
#' @keywords character
#' @examples
#' rbind(
#' str_pad("hadley", 30, "left"),
#' str_pad("hadley", 30, "right"),
#' str_pad("hadley", 30, "center")
#' )
#' # Longer strings are returned unchanged
#' str_pad("hadley", 3)
str_pad <- function(string, width, side = "left", pad = " ") {
stopifnot(length(width) == 1)
stopifnot(length(side) == 1)
stopifnot(length(pad) == 1)

side <- match.arg(side, c("left", "right", "center"))
needed <- pmax(0, width - str_length(string))

left <- switch(side,
left = needed, right = 0, center = floor(needed / 2))
right <- switch(side,
left = 0, right = needed, center = ceiling(needed / 2))

str_join(str_dup(pad, left), string, str_dup(pad, right))
16 changes: 16 additions & 0 deletions R/length.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#' The length of a string (in characters).
#' @param string input character vector
#' @return numeric vector giving number of characters in each element of the
#' character vector. Missing string have missing length.
#' @keywords character
#' @seealso \code{\link{nchar}} which this function wraps
#' @examples
#' str_length(letters)
#' str_length(c("i", "like", "programming", NA))
str_length <- function(string) {
string <- as.character(string)
nc <- nchar(string) <-
66 changes: 66 additions & 0 deletions R/locate.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#' Locate the position of the first occurence of a pattern in a string.
#' @param string input character vector
#' @param pattern pattern to look for. See \code{\link{regex}} for
#' description.
#' @return numeric matrix. First column gives start postion of match, and
#' second column gives end position.
#' @keywords character
#' @seealso
#' \code{\link{regexpr}} which this function wraps
#' \code{\link{str_extract}} for a convenient way of extracting matches
#' \code{\link{str_locate_all}} to locate position of all matches
#' @examples
#' fruit <- c("apple", "banana", "pear", "pinapple")
#' str_locate(fruit, "a")
#' str_locate(fruit, "e")
str_locate <- function(string, pattern) {
match <- regexpr(pattern, string)

start <- as.vector(match)
end <- start + attr(match, "match.length") - 1

missing <- start == -1
start[missing] <- NA
end[missing] <- NA

cbind(start = start, end = end)

#' Locate the position of all occurences of a pattern in a string.
#' @param string input character vector
#' @param pattern pattern to look for. See \code{\link{regex}} for
#' description.
#' @keywords character
#' @return list of numeric matrices. First column gives start postion of
#' match, and second column gives end position.
#' @seealso
#' \code{\link{regexpr}} which this function wraps
#' \code{\link{str_extract}} for a convenient way of extracting matches
#' \code{\link{str_locate}} to locate position of first match
#' @examples
#' fruit <- c("apple", "banana", "pear", "pinapple")
#' str_locate_all(fruit, "a")
#' str_locate_all(fruit, "e")
str_locate_all <- function(string, pattern) {
matches <- gregexpr(pattern, string)

null <- matrix(0, nrow = 0, ncol = 2)
colnames(null) <- c("start", "end")

llply(matches, function(match) {
if (length(match) == 1 && match == -1) return(null)

start <- as.vector(match)
end <- start + attr(match, "match.length") - 1
cbind(start = start, end = end)


0 comments on commit 2a30b2e

Please sign in to comment.