From 4eb13acb202919120bc367088e885cbe7522728a Mon Sep 17 00:00:00 2001 From: Adam Gaskins Date: Sat, 16 Jan 2021 13:27:37 -0500 Subject: [PATCH 1/7] Write tests for getHeaders() function --- tests/SimpleExcelReaderTest.php | 97 +++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/tests/SimpleExcelReaderTest.php b/tests/SimpleExcelReaderTest.php index 26fb1f2..f2a132e 100644 --- a/tests/SimpleExcelReaderTest.php +++ b/tests/SimpleExcelReaderTest.php @@ -78,6 +78,19 @@ public function it_can_work_with_a_file_where_the_row_is_too_short() ], $rows); } + /** @test */ + public function it_can_retrieve_the_headers() + { + $headers = SimpleExcelReader::create($this->getStubPath('header-and-rows.csv')) + ->getHeaders(); + + $this->assertEquals([ + 0 => 'email', + 1 => 'first_name', + 2 => 'last_name' + ], $headers); + } + /** @test */ public function it_can_ignore_the_headers() { @@ -105,6 +118,16 @@ public function it_can_ignore_the_headers() ], $rows); } + /** @test */ + public function it_doesnt_return_headers_when_headers_are_ignored() + { + $headers = SimpleExcelReader::create($this->getStubPath('header-and-rows.csv')) + ->noHeaderRow() + ->getHeaders(); + + $this->assertEquals(null, $headers); + } + /** @test */ public function it_can_use_an_alternative_delimiter() { @@ -197,6 +220,35 @@ public function it_can_call_getRows_twice() $this->assertNotNull($firstRowAgain); } + /** @test */ + public function it_can_call_getRows_after_getHeaders() + { + $reader = SimpleExcelReader::create($this->getStubPath('header-and-rows.csv')); + + $headers = $reader->getHeaders(); + + $this->assertEquals([ + 0 => 'email', + 1 => 'first_name', + 2 => 'last_name' + ], $headers); + + $rows = $reader->getRows()->toArray(); + + $this->assertEquals([ + [ + 'email' => 'john@example.com', + 'first_name' => 'john', + 'last_name' => 'doe', + ], + [ + 'email' => 'mary-jane@example.com', + 'first_name' => 'mary jane', + 'last_name' => 'doe', + ], + ], $rows); + } + /** @test */ public function it_can_call_first_on_the_collection_twice() { @@ -239,6 +291,20 @@ public function it_can_trim_the_header_row_names() ], $rows); } + /** @test */ + public function it_can_retrieve_trimmed_header_row_names() + { + $headers = SimpleExcelReader::create($this->getStubPath('header-with-spaces.csv')) + ->trimHeaderRow() + ->getHeaders(); + + $this->assertEquals([ + 0 => 'email', + 1 => 'first_name', + 2 => 'last_name', + ], $headers); + } + /** @test */ public function it_can_trim_the_header_row_names_with_alternate_characters() { @@ -285,6 +351,21 @@ public function it_can_convert_headers_to_snake_case() ], $rows); } + /** @test */ + public function it_can_retrieve_headers_converted_to_snake_case() + { + $headers = SimpleExcelReader::create($this->getStubPath('headers-not-snake-case.csv')) + ->headersToSnakeCase() + ->getHeaders(); + + $this->assertEquals([ + 0 => 'email', + 1 => 'first_name', + 2 => 'last_name', + 3 => 'job_title', + ], $headers); + } + /** @test */ public function it_can_use_custom_header_row_formatter() { @@ -308,4 +389,20 @@ public function it_can_use_custom_header_row_formatter() ], ], $rows); } + + /** @test */ + public function it_can_retrieve_headers_with_a_custom_formatter() + { + $headers = SimpleExcelReader::create($this->getStubPath('header-and-rows.csv')) + ->formatHeadersUsing(function ($header) { + return $header . '_suffix'; + }) + ->getHeaders(); + + $this->assertEquals([ + 0 => 'email_suffix', + 1 => 'first_name_suffix', + 2 => 'last_name_suffix', + ], $headers); + } } From 85e0acf4b94b1bcb0af89da6823d7ac01c8ba3ab Mon Sep 17 00:00:00 2001 From: Adam Gaskins Date: Sat, 16 Jan 2021 13:51:20 -0500 Subject: [PATCH 2/7] Implement getHeaders() --- src/SimpleExcelReader.php | 60 ++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/src/SimpleExcelReader.php b/src/SimpleExcelReader.php index 68f47e8..bed5e03 100644 --- a/src/SimpleExcelReader.php +++ b/src/SimpleExcelReader.php @@ -28,6 +28,10 @@ class SimpleExcelReader protected ?Closure $formatHeadersUsing = null; + protected bool $spreadsheetProcessed = false; + + protected ?array $headers = null; + protected int $skip = 0; protected int $limit = 0; @@ -118,6 +122,42 @@ public function take(int $count): SimpleExcelReader public function getRows(): LazyCollection { + $this->processSpreadsheet(); + + return LazyCollection::make(function () { + while ($this->rowIterator->valid() && $this->skip && $this->skip--) { + $this->rowIterator->next(); + } + while ($this->rowIterator->valid() && (!$this->useLimit || $this->limit--)) { + $row = $this->rowIterator->current(); + + yield $this->getValueFromRow($row); + + $this->rowIterator->next(); + } + }); + } + + public function getHeaders(): ?array + { + $this->processSpreadsheet(); + + return $this->headers; + } + + public function close() + { + $this->reader->close(); + } + + protected function processSpreadsheet() + { + if ($this->spreadsheetProcessed) { + return; + } + + $this->spreadsheetProcessed = true; + $this->reader->open($this->path); $this->reader->getSheetIterator()->rewind(); @@ -139,24 +179,6 @@ public function getRows(): LazyCollection $this->headers = $this->processHeaderRow($firstRow->toArray()); $this->rowIterator->next(); } - - return LazyCollection::make(function () { - while ($this->rowIterator->valid() && $this->skip && $this->skip--) { - $this->rowIterator->next(); - } - while ($this->rowIterator->valid() && (! $this->useLimit || $this->limit--)) { - $row = $this->rowIterator->current(); - - yield $this->getValueFromRow($row); - - $this->rowIterator->next(); - } - }); - } - - public function close() - { - $this->reader->close(); } protected function processHeaderRow(array $headers): array @@ -209,7 +231,7 @@ protected function getValueFromRow(Row $row): array $values = $row->toArray(); ksort($values); - if (! $this->processHeader) { + if (!$this->processHeader) { return $values; } From 9a98a93d9cf416d21c707f3ba996bff6d730c8bf Mon Sep 17 00:00:00 2001 From: Adam Gaskins Date: Sat, 16 Jan 2021 13:59:15 -0500 Subject: [PATCH 3/7] Document getHeaders() --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 5574163..c0cc007 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,17 @@ $rows = SimpleExcelReader::create($pathToCsv) }); ``` +#### Retrieving Header Row values + +If you would like to retrieve the header row as an array, you can use the `getHeaders()` method. + +```php +$headers = SimpleExcelReader::create($pathToCsv)->getHeaders(); + +// $headers will contain +// [ 'email', 'first_name' ] +``` + #### Trimming Header Row values If the file you are reading contains a title row, but you need to trim additional characters on the title values, then you should use the `trimHeaderRow()` method. From eb52ae35be083b12536d543e5467b269e80b8dc8 Mon Sep 17 00:00:00 2001 From: Adam Gaskins Date: Mon, 18 Jan 2021 16:47:20 -0500 Subject: [PATCH 4/7] Simplify implementation --- src/SimpleExcelReader.php | 80 ++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/src/SimpleExcelReader.php b/src/SimpleExcelReader.php index bed5e03..df7073c 100644 --- a/src/SimpleExcelReader.php +++ b/src/SimpleExcelReader.php @@ -14,6 +14,8 @@ class SimpleExcelReader { protected string $path; + protected string $type; + protected ReaderInterface $reader; protected IteratorInterface $rowIterator; @@ -28,8 +30,6 @@ class SimpleExcelReader protected ?Closure $formatHeadersUsing = null; - protected bool $spreadsheetProcessed = false; - protected ?array $headers = null; protected int $skip = 0; @@ -47,6 +47,8 @@ public function __construct(string $path, string $type = '') { $this->path = $path; + $this->type = $type; + $this->reader = $type ? ReaderFactory::createFromType($type) : ReaderEntityFactory::createReaderFromFile($this->path); @@ -122,13 +124,34 @@ public function take(int $count): SimpleExcelReader public function getRows(): LazyCollection { - $this->processSpreadsheet(); + $this->reader->open($this->path); + + $this->reader->getSheetIterator()->rewind(); + + $sheet = $this->reader->getSheetIterator()->current(); + + $this->rowIterator = $sheet->getRowIterator(); + + $this->rowIterator->rewind(); + + /** @var \Box\Spout\Common\Entity\Row $firstRow */ + $firstRow = $this->rowIterator->current(); + + if (is_null($firstRow)) { + $this->noHeaderRow(); + } + + if ($this->processHeader) { + $this->headers = $this->processHeaderRow($firstRow->toArray()); + + $this->rowIterator->next(); + } return LazyCollection::make(function () { while ($this->rowIterator->valid() && $this->skip && $this->skip--) { $this->rowIterator->next(); } - while ($this->rowIterator->valid() && (!$this->useLimit || $this->limit--)) { + while ($this->rowIterator->valid() && (! $this->useLimit || $this->limit--)) { $row = $this->rowIterator->current(); yield $this->getValueFromRow($row); @@ -140,45 +163,32 @@ public function getRows(): LazyCollection public function getHeaders(): ?array { - $this->processSpreadsheet(); - - return $this->headers; - } - - public function close() - { - $this->reader->close(); - } - - protected function processSpreadsheet() - { - if ($this->spreadsheetProcessed) { - return; - } - - $this->spreadsheetProcessed = true; + if ($this->processHeader && ! $this->headers) { + $reader = $this->type ? + ReaderFactory::createFromType($this->type) : + ReaderEntityFactory::createReaderFromFile($this->path); - $this->reader->open($this->path); + $reader->open($this->path); - $this->reader->getSheetIterator()->rewind(); + $reader->getSheetIterator()->rewind(); - $sheet = $this->reader->getSheetIterator()->current(); - - $this->rowIterator = $sheet->getRowIterator(); + $sheet = $reader->getSheetIterator()->current(); - $this->rowIterator->rewind(); + $this->rowIterator = $sheet->getRowIterator(); - /** @var \Box\Spout\Common\Entity\Row $firstRow */ - $firstRow = $this->rowIterator->current(); + $this->rowIterator->rewind(); - if (is_null($firstRow)) { - $this->noHeaderRow(); - } + /** @var \Box\Spout\Common\Entity\Row $firstRow */ + $firstRow = $this->rowIterator->current(); - if ($this->processHeader) { $this->headers = $this->processHeaderRow($firstRow->toArray()); - $this->rowIterator->next(); } + return $this->headers; + } + + public function close() + { + $this->reader->close(); } protected function processHeaderRow(array $headers): array @@ -231,7 +241,7 @@ protected function getValueFromRow(Row $row): array $values = $row->toArray(); ksort($values); - if (!$this->processHeader) { + if (! $this->processHeader) { return $values; } From f588e093f5e861782ee28a31a42620207b96bc47 Mon Sep 17 00:00:00 2001 From: Adam Gaskins Date: Mon, 18 Jan 2021 17:20:32 -0500 Subject: [PATCH 5/7] Rearrange getHeaders() --- src/SimpleExcelReader.php | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/SimpleExcelReader.php b/src/SimpleExcelReader.php index df7073c..f38f281 100644 --- a/src/SimpleExcelReader.php +++ b/src/SimpleExcelReader.php @@ -163,26 +163,33 @@ public function getRows(): LazyCollection public function getHeaders(): ?array { - if ($this->processHeader && ! $this->headers) { - $reader = $this->type ? - ReaderFactory::createFromType($this->type) : - ReaderEntityFactory::createReaderFromFile($this->path); + if (! $this->processHeader) { + return null; + } - $reader->open($this->path); + if ($this->headers) { + return $this->headers; + } - $reader->getSheetIterator()->rewind(); + $reader = $this->type ? + ReaderFactory::createFromType($this->type) : + ReaderEntityFactory::createReaderFromFile($this->path); - $sheet = $reader->getSheetIterator()->current(); + $reader->open($this->path); - $this->rowIterator = $sheet->getRowIterator(); + $reader->getSheetIterator()->rewind(); - $this->rowIterator->rewind(); + $sheet = $reader->getSheetIterator()->current(); - /** @var \Box\Spout\Common\Entity\Row $firstRow */ - $firstRow = $this->rowIterator->current(); + $this->rowIterator = $sheet->getRowIterator(); + + $this->rowIterator->rewind(); + + /** @var \Box\Spout\Common\Entity\Row $firstRow */ + $firstRow = $this->rowIterator->current(); + + $this->headers = $this->processHeaderRow($firstRow->toArray()); - $this->headers = $this->processHeaderRow($firstRow->toArray()); - } return $this->headers; } From f61c6d3ee30ad89565cc8a6816f6f120cf26abcb Mon Sep 17 00:00:00 2001 From: Adam Gaskins Date: Mon, 18 Jan 2021 17:23:52 -0500 Subject: [PATCH 6/7] Add test for calling getHeaders on empty file --- tests/SimpleExcelReaderTest.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/SimpleExcelReaderTest.php b/tests/SimpleExcelReaderTest.php index f2a132e..b6c7086 100644 --- a/tests/SimpleExcelReaderTest.php +++ b/tests/SimpleExcelReaderTest.php @@ -17,6 +17,15 @@ public function it_can_work_with_an_empty_file() $this->assertEquals(0, $actualCount); } + /** @test */ + public function it_can_getHeaders_with_an_empty_file() + { + $headers = SimpleExcelReader::create($this->getStubPath('empty.csv')) + ->getHeaders(); + + $this->assertEquals(null, $headers); + } + /** @test */ public function it_can_work_with_an_file_that_has_headers() { From a5bf14642b8768a822c9bd05b0a5e0b72a4e64c1 Mon Sep 17 00:00:00 2001 From: Adam Gaskins Date: Mon, 18 Jan 2021 17:25:29 -0500 Subject: [PATCH 7/7] Fix calling getHeaders() on empty file --- src/SimpleExcelReader.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/SimpleExcelReader.php b/src/SimpleExcelReader.php index f38f281..573e784 100644 --- a/src/SimpleExcelReader.php +++ b/src/SimpleExcelReader.php @@ -188,6 +188,12 @@ public function getHeaders(): ?array /** @var \Box\Spout\Common\Entity\Row $firstRow */ $firstRow = $this->rowIterator->current(); + if (is_null($firstRow)) { + $this->noHeaderRow(); + + return null; + } + $this->headers = $this->processHeaderRow($firstRow->toArray()); return $this->headers;