Skip to content

Commit

Permalink
Merge pull request #24 from mateuszkj/master
Browse files Browse the repository at this point in the history
Dependencies update + redone commands in clap 3.0 + clippy
  • Loading branch information
manojkarthick authored May 13, 2022
2 parents 9473b77 + 1ba3725 commit d33dca9
Show file tree
Hide file tree
Showing 14 changed files with 782 additions and 941 deletions.
618 changes: 377 additions & 241 deletions Cargo.lock

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ keywords = ["cli", "arrow", "parquet"]
categories = ["command-line-utilities"]

[dependencies]
thiserror = "1.0.22"
log = "0.4.11"
env_logger = "0.8.2"
parquet = { version = "4.3.0", features = ["cli"] }
arrow = "4.3.0"
clap = "2.33.3"
rand = "0.8.3"
tempfile = "3.2.0"
thiserror = "1.0.30"
log = "0.4.16"
env_logger = "0.9.0"
parquet = { version = "12.0.0", features = ["cli"] }
arrow = { version = "12.0.0", features = ["chrono-tz"] }
clap = "3.1.10"
rand = "0.8.5"
walkdir = "2.3.2"

[dev-dependencies]
assert_cmd = "1.0.3"
predicates = "1.0.7"
assert_cmd = "2.0.4"
predicates = "2.1.1"
tempfile = "3.3.0"
30 changes: 0 additions & 30 deletions src/command.rs

This file was deleted.

176 changes: 67 additions & 109 deletions src/commands/cat.rs
Original file line number Diff line number Diff line change
@@ -1,131 +1,89 @@
use crate::command::PQRSCommand;
use crate::errors::PQRSError;
use crate::errors::PQRSError::FileNotFound;
use crate::utils::{check_path_present, open_file, print_rows, is_hidden};
use clap::{App, Arg, ArgMatches, SubCommand};
use crate::utils::Formats;
use crate::utils::{check_path_present, is_hidden, open_file, print_rows};
use clap::Parser;
use log::debug;
use std::fmt;
use std::collections::HashSet;
use std::fs::metadata;
use std::path::PathBuf;
use walkdir::WalkDir;
use std::collections::HashSet;
use crate::utils::Formats;

/// The config params for the "cat" subcommand
pub struct CatCommand<'a> {
locations: Vec<&'a str>,
format: &'a Formats,
}
/// Prints the contents of Parquet file(s)
#[derive(Parser, Debug)]
pub struct CatCommandArgs {
/// Use CSV format for printing
#[clap(short, long, conflicts_with = "json")]
csv: bool,

impl<'a> CatCommand<'a> {
/// Return the clap subcommand definition
pub(crate) fn command() -> App<'static, 'static> {
SubCommand::with_name("cat")
.about("Prints the contents of Parquet file(s)")
.arg(
Arg::with_name("locations")
.index(1)
.multiple(true)
.value_name("LOCATIONS")
.value_delimiter(" ")
.required(true)
.help("Parquet files or folders to read from"),
)
.arg(
Arg::with_name("json")
.long("json")
.short("j")
.takes_value(false)
.required(false)
.conflicts_with("csv")
.help("Use JSON lines format for printing"),
)
.arg(
Arg::with_name("csv")
.long("csv")
.short("c")
.takes_value(false)
.required(false)
.conflicts_with("json")
.help("Use CSV format for printing")
)
}
/// Use JSON lines format for printing
#[clap(short, long, conflicts_with = "csv")]
json: bool,

pub(crate) fn new(matches: &'a ArgMatches<'a>) -> Self {
Self {
locations: matches.values_of("locations").unwrap().collect(),
format: if matches.is_present("json") {
&Formats::Json
} else if matches.is_present("csv") {
&Formats::Csv
} else {
&Formats::Default
},
}
}
/// Parquet files or folders to read from
locations: Vec<PathBuf>,
}

impl<'a> PQRSCommand for CatCommand<'a> {
fn execute(&self) -> Result<(), PQRSError> {
// print debugging information
debug!("{:#?}", self);
pub(crate) fn execute(opts: CatCommandArgs) -> Result<(), PQRSError> {
let format = if opts.json {
Formats::Json
} else if opts.csv {
Formats::Csv
} else {
Formats::Default
};

let mut directories = vec![];
let mut files = HashSet::new();
for location in &self.locations {
let meta = metadata(location).unwrap();
if meta.is_dir() {
directories.push(String::from(*location));
}
if meta.is_file() {
files.insert(String::from(*location));
}
}
debug!(
"The locations to read from are: {:?} Using output format: {:?}",
&opts.locations, format
);

for directory in &directories {
let walker = WalkDir::new(directory).into_iter();
for entry in walker.filter_entry(|e| !is_hidden(e)).filter_map(|e| e.ok()) {
debug!("{}", entry.path().display());
let path = String::from(entry.path().to_str().unwrap());
let meta = metadata(&path).unwrap();
if meta.is_file() {
files.insert(path);
}
}
let mut directories = vec![];
let mut files = HashSet::new();
for location in &opts.locations {
let meta = metadata(location).unwrap();
if meta.is_dir() {
directories.push(location.clone());
}
if meta.is_file() {
files.insert(location.clone());
}
}

// find all the files after walking the directories
debug!("The files are: {:#?}", files);

// make sure all files are present before printing any data
for file_name in &files {
if !check_path_present(file_name.as_ref()) {
return Err(FileNotFound(String::from(file_name)));
for directory in &directories {
let walker = WalkDir::new(directory).into_iter();
for entry in walker
.filter_entry(|e| !is_hidden(e))
.filter_map(|e| e.ok())
{
debug!("{}", entry.path().display());
let path = entry.path().to_path_buf();
let meta = metadata(&path).unwrap();
if meta.is_file() {
files.insert(path);
}
}
}

for file_name in &files {
let file = open_file(file_name)?;
let info_string = format!("File: {}", file_name);
let length = info_string.len();
eprintln!("\n{}", "#".repeat(length));
eprintln!("{}", info_string);
eprintln!("{}\n", "#".repeat(length));
print_rows(file, None, self.format)?;
}
// find all the files after walking the directories
debug!("The files are: {:#?}", files);

Ok(())
// make sure all files are present before printing any data
for file_name in &files {
if !check_path_present(file_name) {
return Err(FileNotFound(file_name.to_path_buf()));
}
}
}

impl<'a> fmt::Debug for CatCommand<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(
f,
"The locations to read from are: {}",
&self.locations.join(", ")
)?;
writeln!(f, "Using Output format: {}", self.format.to_string())?;

Ok(())
for file_name in &files {
let file = open_file(file_name)?;
let info_string = format!("File: {}", file_name.display());
let length = info_string.len();
eprintln!("\n{}", "#".repeat(length));
eprintln!("{}", info_string);
eprintln!("{}\n", "#".repeat(length));
print_rows(file, None, format)?;
}

Ok(())
}
104 changes: 35 additions & 69 deletions src/commands/head.rs
Original file line number Diff line number Diff line change
@@ -1,82 +1,48 @@
use crate::command::PQRSCommand;
use crate::errors::PQRSError;
use crate::errors::PQRSError::FileNotFound;
use crate::utils::{check_path_present, open_file, print_rows, Formats};
use clap::{App, Arg, ArgMatches, SubCommand};
use clap::Parser;
use log::debug;
use std::fmt;
use std::path::PathBuf;

pub struct HeadCommand<'a> {
file_name: &'a str,
num_records: i64,
format: &'a Formats,
}

impl<'a> HeadCommand<'a> {
pub(crate) fn command() -> App<'static, 'static> {
SubCommand::with_name("head")
.about("Prints the first n records of the Parquet file")
.arg(
Arg::with_name("file")
.index(1)
.value_name("FILE")
.required(true)
.help("Parquet file to read"),
)
.arg(
Arg::with_name("json")
.long("json")
.short("j")
.takes_value(false)
.required(false)
.help("Use JSON lines format for printing"),
)
.arg(
Arg::with_name("records")
.long("records")
.short("n")
.default_value("5")
.takes_value(true)
.required(false)
.help("The number of records to show (default: 5)"),
)
}
/// Prints the first n records of the Parquet file
#[derive(Parser, Debug)]
pub struct HeadCommandArgs {
/// Use CSV format for printing
#[clap(short, long, conflicts_with = "json")]
csv: bool,

pub(crate) fn new(matches: &'a ArgMatches<'a>) -> Self {
Self {
file_name: matches.value_of("file").unwrap(),
num_records: matches.value_of("records").unwrap().parse().unwrap(),
format: if matches.is_present("json") {
&Formats::Json
} else {
&Formats::Default
},
}
}
}
/// Use JSON lines format for printing
#[clap(short, long, conflicts_with = "csv")]
json: bool,

impl<'a> PQRSCommand for HeadCommand<'a> {
fn execute(&self) -> Result<(), PQRSError> {
// print debugging information
debug!("{:#?}", self);
/// The number of records to show (default: 5)
#[clap(short = 'n', long, default_value = "5")]
records: usize,

if !check_path_present(self.file_name) {
return Err(FileNotFound(String::from(self.file_name)));
}

let file = open_file(self.file_name)?;
print_rows(file, Some(self.num_records), self.format)?;
/// Parquet file to read
file: PathBuf,
}

Ok(())
pub(crate) fn execute(opts: HeadCommandArgs) -> Result<(), PQRSError> {
let format = if opts.json {
Formats::Json
} else if opts.csv {
Formats::Csv
} else {
Formats::Default
};

debug!("The file name to read is: {}", opts.file.display());
debug!("Number of records to print: {}", opts.records);
debug!("Use Output format: {}", format);

if !check_path_present(&opts.file) {
return Err(FileNotFound(opts.file));
}
}

impl<'a> fmt::Debug for HeadCommand<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "The file name to read is: {}", &self.file_name)?;
writeln!(f, "Number of records to print: {}", &self.num_records)?;
writeln!(f, "Use Output format: {}", self.format.to_string())?;
let file = open_file(&opts.file)?;
print_rows(file, Some(opts.records), format)?;

Ok(())
}
Ok(())
}
Loading

0 comments on commit d33dca9

Please sign in to comment.