Skip to content

Commit

Permalink
Do not accept empty OCRmyPDF results and fix log #79 (#80)
Browse files Browse the repository at this point in the history
Signed-off-by: Robin Windey <[email protected]>
  • Loading branch information
R0Wi authored Dec 8, 2021
1 parent 05e2509 commit 6b99bcb
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 13 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ coverage*.html
coverage_html
build
.phpunit.result.cache
*.cov
*.cov
coverage
23 changes: 16 additions & 7 deletions lib/OcrProcessors/PdfOcrProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,24 @@ public function ocrFile(string $fileContent): string {
$stdErr = $this->command->getStdErr();
$exitCode = $this->command->getExitCode();

if ($success) {
if ($stdErr !== '' || $errorOutput !== '') {
// Log warning if ocrmypdf wrote a warning to the stderr
$this->logger->warning('OCRmyPDF succeeded with warning(s): {stdErr}, {errorOutput}', [$stdErr, $errorOutput]);
}
if (!$success) {
throw new OcrNotPossibleException('OCRmyPDF exited abnormally with exit-code ' . $exitCode . '. Message: ' . $errorOutput . ' ' . $stdErr);
}

if ($stdErr !== '' || $errorOutput !== '') {
// Log warning if ocrmypdf wrote a warning to the stderr
$this->logger->warning('OCRmyPDF succeeded with warning(s): {stdErr}, {errorOutput}', [
'stdErr' => $stdErr,
'errorOutput' => $errorOutput
]);
}

$ocrOutput = $this->command->getOutput();

return $this->command->getOutput();
if (!$ocrOutput) {
throw new OcrNotPossibleException('OCRmyPDF did not produce any output');
}

throw new OcrNotPossibleException('OCRmyPDF exited abnormally with exit-code ' . $exitCode . '. Message: ' . $errorOutput . ' ' . $stdErr);
return $ocrOutput;
}
}
42 changes: 37 additions & 5 deletions tests/Unit/OcrProcessors/PdfOcrProcessorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -107,22 +107,54 @@ public function testLogsWarningIfOcrMyPdfSucceedsWithWarningOutput() {
->willReturn(true);
$this->command->expects($this->once())
->method('getError')
->willReturn('error');
->willReturn('getErrorOutput');
$this->command->expects($this->once())
->method('getStdErr')
->willReturn('stdErr');
->willReturn('stdErrOutput');
$this->command->expects($this->once())
->method('getOutput')
->willReturn('someOcrFileContent');
$this->logger->expects($this->once())
->method('warning')
->with(
$this->stringStartsWith('OCRmyPDF succeeded with warning(s):'),
'OCRmyPDF succeeded with warning(s): {stdErr}, {errorOutput}',
$this->callback(function ($paramsArray) {
return is_array($paramsArray) &&
count($paramsArray) === 2 &&
$paramsArray[0] === 'stdErr' &&
$paramsArray[1] === 'error';
$paramsArray['stdErr'] === 'stdErrOutput' &&
$paramsArray['errorOutput'] === 'getErrorOutput';
}));

$processor = new PdfOcrProcessor($this->command, $this->logger);
$processor->ocrFile('someContent');
}

public function testThrowsErrorIfOcrFileWasEmpty() {
$this->command->expects($this->once())
->method('execute')
->willReturn(true);
$this->command->expects($this->once())
->method('getError')
->willReturn('error');
$this->command->expects($this->once())
->method('getStdErr')
->willReturn('stdErr');
$this->command->expects($this->once())
->method('getOutput')
->willReturn('');


$thrown = false;
$processor = new PdfOcrProcessor($this->command, $this->logger);

try {
$processor->ocrFile('someContent');
} catch (\Throwable $t) {
$thrown = true;
$this->assertInstanceOf(OcrNotPossibleException::class, $t);
$this->assertEquals('OCRmyPDF did not produce any output', $t->getMessage());
}

$this->assertTrue($thrown);
}
}

0 comments on commit 6b99bcb

Please sign in to comment.