-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathdplyr
executable file
·158 lines (136 loc) · 5.36 KB
/
dplyr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env Rscript
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# dplyr-cli [[email protected]]
#
# Run chains of dplyr commands in the terminal
#
# * run any dplyr command of the form "dplyr::verb(.data, code)"
# * can set input file to be a CSV or RDS file
# * if reading data from stdin (the default), assume that it is CSV format
#
# History
# v0.1.0 2020-04-20 Initial release
# v0.1.1 2020-04-21 Switch to Rscript executable
# v0.1.2 2020-04-21 Support for joins
# v0.1.3 2020-04-22 More robust tmpdir handling
# v0.1.4 2022-01-23 Fix for newer read_csv handling
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
suppressMessages({
library(docopt)
library(dplyr)
})
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# configuration for docopt
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
doc <- "dplyr-cli
Usage:
dplyr <command> [--file=fn] [--csv | -c] [--verbose | -v] [<code>...]
dplyr -h | --help
Options:
-h --help show this help text
-f FILE --file=FILE input CSV or RDS filename. If reading from stdin, assumes CSV [default: stdin]
-c --csv write output to stdout in CSV format (instead of default RDS file)
-v --verbose be verbose"
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Print help if requested
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
opt <- docopt(doc)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Helper function to reading data from a file based upon its extension
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
read_data <- function(input) {
if (!file.exists(input)) {
stop("[input]: not found: ", input)
}
ext <- tolower(tools::file_ext(input))
switch(
ext,
csv = readr::read_csv(input, col_types = readr::cols()),
rds = readRDS(input),
stop("Unknown file extension: ", ext)
)
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Helper function
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"%||%" <- function(x, y) {
if (is.null(x)) {
y
} else {
x
}
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# What did the user pass in to stdin?
# Two cases handled:
# - if stdin is only a single element, assume it's a filename
# - only reading of RDS and CSV files currently supported.
# (easy to add more)
# - otherwise assume that the user has echoed the contents of
# a CSV file and piped it into this command
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if (opt$verbose) message("------------------------------------------")
input <- readLines(opt$file)
if (length(input) == 1) {
input <- trimws(input)
if (opt$verbose) message("[input] looks like an existing file: ", input)
.data <- read_data(input)
} else {
if (opt$verbose) message("[input] reading CSV from stdin")
.data <- readr::read_csv(I(input), col_types = readr::cols())
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Run the command + code
# If the user is demanding CSV or kable output, then
# set the result to be the initial data and print it out
# later
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if (opt$command %in% c('csv', 'kable')) {
opt$csv <- TRUE
res <- .data
} else if (opt$command %in% c('left_join', 'right_join', 'full_join', 'anti_join', 'semi_join', 'inner_join')) {
rhs_filename <- opt$code[1]
.rhs <- read_data(rhs_filename)
code <- paste(opt$code[-1], collapse=" ")
command <- sprintf("dplyr::%s(.data, .rhs, %s)", opt$command, code)
if (opt$verbose) message("command: ", command)
# Avoid printing stuff about the join "by"
suppressMessages({
res <- eval(parse(text = command))
})
} else {
code <- paste(opt$code, collapse="")
command <- sprintf("dplyr::%s(.data, %s)", opt$command, code)
if (opt$verbose) message("command: ", command)
res <- eval(parse(text = command))
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Output options:
# - if command == 'kable' then use knitr::kable to output
# - if command == 'csv' or opt$csv is true, then dump
# CSV strings to the terminal. User can redirect how
# they want
# - otherwise save to an RDS file and echo to stdout such
# that another command can use it.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if (opt$command == 'kable') {
cat(knitr::kable(.data), sep = "\n")
} else if (opt$csv) {
if (opt$verbose) message(">>>> output CSV to stdout")
write.csv(res, row.names = FALSE)
} else {
# Explicitly set a known, unchanging tempdir.
# A temporary directory created via 'tempdir()' is valid only for the
# current rsession and is destroyed at the session's close.
# This does not work for the multiple rsessions being
# used with the shell pipe. So try and determine as best I can
# where the best temporary directory is.
tmpdir <- c(Sys.getenv(c('TMPDIR', 'TMP', 'TEMP')), "/tmp/")
tmpdir <- tmpdir[tmpdir != '']
tmpdir <- tmpdir[1]
tmp <- tempfile(pattern = "dplyr-cli-", tmpdir = tmpdir, fileext = ".rds")
saveRDS(res, tmp)
if (opt$verbose) message(">>>> output to RDS", tmp)
cat(tmp, "\n")
}