Skip to content

Commit

Permalink
Updated CSV with improved whitespace/empty line handling and added re…
Browse files Browse the repository at this point in the history
…gex white/blacklist to CSV implementation
  • Loading branch information
hwperkins committed Oct 22, 2015
1 parent d6d2b38 commit 16435d8
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 11 deletions.
26 changes: 15 additions & 11 deletions src/IO/CSV.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ final class CSV
'colmap' => null,
'quote' => "\"",
'escape' => "\\",

'overwrite' => false
'overwrite' => false,
'include' => null,
'exclude' => null
];

public function __construct($fileName)
Expand All @@ -54,6 +55,8 @@ public function __construct($fileName)
* to be once loaded into memory (default: null)
* quote: The character used to specify literal quoted segments (default: ")
* escape: The character used to escape quotes or other special characters (default: \)
* include: Whitelist Regular Expression
* exclude: Blacklist Regular Expression
* @param array $options The option map.
* @return array Returns multi-dimensional array of row-column strings.
* @throws \Archon\Exceptions\UnknownOptionException
Expand All @@ -71,10 +74,18 @@ public function loadFile(array $options = [])
$colmapOpt = $options['colmap'];
$quoteOpt = $options['quote'];
$escapeOpt = $options['escape'];
$includeRegexOpt = $options['include'];
$excludeRegexOpt = $options['exclude'];

$fileData = file_get_contents($fileName);
$fileData = explode($nlsepOpt, $fileData);

// Remove whitespace/empty lines
$fileData = preg_grep('/^\s*$/', $fileData, PREG_GREP_INVERT);

$fileData = $includeRegexOpt ? preg_grep($includeRegexOpt, $fileData) : $fileData;
$fileData = $excludeRegexOpt ? preg_grep($excludeRegexOpt, $fileData, PREG_GREP_INVERT) : $fileData;

/**
* Determines how to assign columns of the CSV
* First checks if options specify a line of the file to use
Expand Down Expand Up @@ -107,15 +118,8 @@ public function loadFile(array $options = [])
foreach ($fileData as $i => $line) {
$line = trim($line);

if ($line === '') {
unset($fileData[$i]);
continue;
}

if ($line !== '') {
$line = str_getcsv($line, $sepOpt, $quoteOpt, $escapeOpt);
$fileData[$i] = $this->applyColMapToRowKeys($line, $columns);
}
$line = str_getcsv($line, $sepOpt, $quoteOpt, $escapeOpt);
$fileData[$i] = $this->applyColMapToRowKeys($line, $columns);
}

$fileData = array_values($fileData);
Expand Down
15 changes: 15 additions & 0 deletions tests/DataFrame/CSV/CSVDataFrameUnitTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,21 @@ public function testFromCSV()
], $df->toArray());
}

public function testFromCSVDirty()
{
$fileName = __DIR__.DIRECTORY_SEPARATOR.'TestFiles'.DIRECTORY_SEPARATOR.'testCSVdirty.csv';

$df = DataFrame::fromCSV($fileName, [
'include' => '/^([1-9]|a)/',
'exclude' => '/^([7]|junk)/'
]);

$this->assertEquals([
['a' => 1, 'b' => 2, 'c' => 3],
['a' => 4, 'b' => 5, 'c' => 6],
], $df->toArray());
}

public function testFromCSVNoHeader()
{
$fileName = __DIR__.DIRECTORY_SEPARATOR.'TestFiles'.DIRECTORY_SEPARATOR.'testCSV.csv';
Expand Down
7 changes: 7 additions & 0 deletions tests/DataFrame/CSV/TestFiles/testCSVdirty.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
a,b,c
1,2,3

4,5,6
7,8,9

junk

0 comments on commit 16435d8

Please sign in to comment.