Skip to content
This repository has been archived by the owner on Nov 10, 2023. It is now read-only.

Commit

Permalink
Merge pull request #18 from nlnwa/checksum
Browse files Browse the repository at this point in the history
Create checksum file (optional)
  • Loading branch information
andrbo authored Nov 19, 2020
2 parents 0f88cbc + f66b1f8 commit e91b833
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 16 deletions.
22 changes: 12 additions & 10 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -93,24 +93,26 @@

<!-- Test dependencies -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.1</version>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<version>5.5.2</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>3.8.0</version>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>5.5.2</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>2.27.0</version>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<version>5.5.2</version>
<scope>test</scope>
</dependency>
</dependencies>
</dependencies>

<build>

Expand Down
10 changes: 10 additions & 0 deletions src/main/java/no/nb/nna/veidemann/warcvalidator/WarcValidator.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ public class WarcValidator {
private final static Path invalidWarcsDirectory;
private final static boolean deleteReportIfValid;
private final static boolean skipMove;
private final static boolean generateChecksumFile;
private boolean isRunning;

static {
Expand All @@ -35,6 +36,7 @@ public class WarcValidator {
sleepTime = SETTINGS.getSleepTime();
skipMove = SETTINGS.isSkipMove();
deleteReportIfValid = SETTINGS.isDeleteReportIfValid();
generateChecksumFile = SETTINGS.isGenerateChecksumFile();
warcsDirectory = Paths.get(SETTINGS.getWarcDir()); // New warcs is placed here
validWarcsDirectory = Paths.get(SETTINGS.getValidWarcDir()); // Well-formed and valid warcs is placed here
invalidWarcsDirectory = Paths.get(SETTINGS.getInvalidWarcDir()); // Warcs this isn't Well-formed and valid is placed here
Expand Down Expand Up @@ -92,10 +94,18 @@ public void runValidation(ValidationService service) throws IOException {

if (isValid) {
logger.debug(warcPath + " is valid");
Path checksumPath = null;

if (generateChecksumFile) {
checksumPath = service.generateChecksumFile(warcPath);
}

if (!skipMove) {
// move warc to validwarcs
Files.move(warcPath, validWarcsDirectory.resolve(warcPath.getFileName()), StandardCopyOption.REPLACE_EXISTING);
if (generateChecksumFile) {
Files.move(checksumPath, validWarcsDirectory.resolve(checksumPath.getFileName()), StandardCopyOption.REPLACE_EXISTING);
}
}
if (deleteReportIfValid) {
// delete report
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package no.nb.nna.veidemann.warcvalidator.service;

import no.nb.nna.veidemann.warcvalidator.validator.JhoveWarcFileValidator;
import org.apache.commons.codec.digest.DigestUtils;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -78,4 +80,24 @@ public DirectoryStream<Path> findAllWarcs(Path directory) throws IOException {
return isWarc && !isOpen;
});
}

public Path generateChecksumFile(Path path) throws IOException {
final String sep = " ";
Path sumPath = path.resolveSibling(path.getFileName() + ".md5");
String sum = md5sum(path);
Files.writeString(sumPath, sum + sep + path.getFileName().toString() + System.lineSeparator());
return sumPath;
}

/**
* Generates md5sum
*
* @param path path of file to generate checksumFilename from
* @return checksumFilename
*/
protected String md5sum(Path path) throws IOException {
try (InputStream fis = Files.newInputStream(path)) {
return DigestUtils.md5Hex(fis);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ public class Settings {

private boolean skipMove;

private boolean generateChecksumFile;

public String getWarcDir() {
return warcDir;
}
Expand Down Expand Up @@ -71,4 +73,12 @@ public boolean isSkipMove() {
public void setSkipMove(boolean skipMove) {
this.skipMove = skipMove;
}

public boolean isGenerateChecksumFile() {
return generateChecksumFile;
}

public void setGenerateChecksumFile(boolean generateChecksumFile) {
this.generateChecksumFile = generateChecksumFile;
}
}
3 changes: 3 additions & 0 deletions src/main/jib/app/resources/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,6 @@ deleteReportIfValid = ${?DELETE_REPORT_IF_VALID}

skipMove = "false"
skipMove = ${?SKIP_MOVE}

generateChecksumFile = "false"
generateChecksumFile = ${?GENERATE_CHECKSUM_FILE}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package no.nb.nna.veidemann.warcvalidator.service;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

import java.io.File;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class ChecksumTest {

@TempDir
public File temporaryFolder;

@Test
public void createChecksumFile() throws IOException {
ValidationService validationService = new ValidationService(null);
Path tmp = temporaryFolder.toPath();

// Generate files
Files.writeString(tmp.resolve("file.txt"), "This is a text");

// Generate checksum files, and assert
try (DirectoryStream<Path> warcPaths = Files.newDirectoryStream(tmp)) {
for (Path path : warcPaths) {
Path sum = validationService.generateChecksumFile(path);

String expected = validationService.md5sum(path);
String actual = Files.readString(sum);
String[] parts = actual.split(" ");

assertEquals(2, parts.length);
assertEquals(expected, parts[0]);
assertEquals(path.getFileName().toString(), parts[1].stripTrailing());
}
} catch (NoSuchFileException e) {
// warcs directory empty and non existant in remote git repo
}
}
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
package no.nb.nna.veidemann.warcvalidator.validator;

import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

import java.io.File;
import java.io.IOException;
import java.nio.file.*;

public class JhoveWarcFileValidatorTest {

@Rule
public TemporaryFolder folder = new TemporaryFolder();
@TempDir
public File temporaryFolder;

@Test
public void validate() throws IOException {
Expand All @@ -22,7 +22,7 @@ public void validate() throws IOException {
(path.toString().endsWith(".warc") ||
path.toString().endsWith(".warc.gz")))) {
for (Path warcPath : warcPaths) {
Path reportPath = folder.getRoot().toPath().resolve(warcPath.getFileName().toString() + ".xml");
Path reportPath = temporaryFolder.toPath().resolve(warcPath.getFileName().toString() + ".xml");
jwv.validate(warcPath, reportPath);
}
} catch (NoSuchFileException e) {
Expand Down

0 comments on commit e91b833

Please sign in to comment.