Skip to content

Commit

Permalink
Remove convertBookTypeToPdf and Improve File Sanitization in `FileT…
Browse files Browse the repository at this point in the history
…oPdf` (#3072)

# Description of Changes

Please provide a summary of the changes, including:

- **Removed `convertBookTypeToPdf` method**: 
- This method used `ebook-convert` from Calibre, which required external
dependencies.
- Its removal eliminates unnecessary process execution and simplifies
the codebase.
  
- **Enhanced `sanitizeZipFilename` function**:
  - Added handling for drive letters (e.g., `C:\`).
  - Ensured all slashes are normalized to forward slashes.
- Improved recursive path traversal removal to prevent directory escape
vulnerabilities.

- **Refactored `ProcessExecutor` output handling**:
  - Replaced redundant `.size() > 0` checks with `.isEmpty()`.
  
- **Expanded unit tests in `FileToPdfTest`**:
  - Added tests for `sanitizeZipFilename` to cover edge cases.
  - Improved test descriptions and added assertion messages.
  - Added debug print statements for easier test debugging.

---

## Checklist

### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.
  • Loading branch information
Ludy87 authored Feb 26, 2025
1 parent 96655f7 commit 9152e64
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 55 deletions.
44 changes: 9 additions & 35 deletions src/main/java/stirling/software/SPDF/utils/FileToPdf.java
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ private static Path unzipAndGetMainHtml(byte[] fileBytes) throws IOException {
}
}

// search for the main HTML file.
// Search for the main HTML file.
try (Stream<Path> walk = Files.walk(tempDirectory)) {
List<Path> htmlFiles =
walk.filter(file -> file.toString().endsWith(".html"))
Expand All @@ -190,46 +190,20 @@ private static Path unzipAndGetMainHtml(byte[] fileBytes) throws IOException {
}
}

public static byte[] convertBookTypeToPdf(byte[] bytes, String originalFilename)
throws IOException, InterruptedException {
if (originalFilename == null || originalFilename.lastIndexOf('.') == -1) {
throw new IllegalArgumentException("Invalid original filename.");
}

String fileExtension = originalFilename.substring(originalFilename.lastIndexOf('.'));
List<String> command = new ArrayList<>();
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Path tempInputFile = null;

try {
// Create temp file with appropriate extension
tempInputFile = Files.createTempFile("input_", fileExtension);
Files.write(tempInputFile, bytes);

command.add("ebook-convert");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.CALIBRE)
.runCommandWithOutputHandling(command);

return Files.readAllBytes(tempOutputFile);
} finally {
// Clean up temporary files
if (tempInputFile != null) {
Files.deleteIfExists(tempInputFile);
}
Files.deleteIfExists(tempOutputFile);
}
}

static String sanitizeZipFilename(String entryName) {
if (entryName == null || entryName.trim().isEmpty()) {
return entryName;
return "";
}
// Remove any drive letters (e.g., "C:\") and leading forward/backslashes
entryName = entryName.replaceAll("^[a-zA-Z]:[\\\\/]+", "");
entryName = entryName.replaceAll("^[\\\\/]+", "");

// Recursively remove path traversal sequences
while (entryName.contains("../") || entryName.contains("..\\")) {
entryName = entryName.replace("../", "").replace("..\\", "");
}
// Normalize all backslashes to forward slashes
entryName = entryName.replaceAll("\\\\", "/");
return entryName;
}
}
10 changes: 7 additions & 3 deletions src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package stirling.software.SPDF.utils;

import java.io.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.InterruptedIOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -222,15 +226,15 @@ public ProcessExecutorResult runCommandWithOutputHandling(
boolean isQpdf =
command != null && !command.isEmpty() && command.get(0).contains("qpdf");

if (outputLines.size() > 0) {
if (!outputLines.isEmpty()) {
String outputMessage = String.join("\n", outputLines);
messages += outputMessage;
if (!liveUpdates) {
log.info("Command output:\n" + outputMessage);
}
}

if (errorLines.size() > 0) {
if (!errorLines.isEmpty()) {
String errorMessage = String.join("\n", errorLines);
messages += errorMessage;
if (!liveUpdates) {
Expand Down
82 changes: 65 additions & 17 deletions src/test/java/stirling/software/SPDF/utils/FileToPdfTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,79 @@

import java.io.IOException;

import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.*;

public class FileToPdfTest {

/**
* Test the HTML to PDF conversion.
* This test expects an IOException when an empty HTML input is provided.
*/
@Test
public void testConvertHtmlToPdf() {
HTMLToPdfRequest request = new HTMLToPdfRequest();
byte[] fileBytes = new byte[0]; // Sample file bytes
String fileName = "test.html"; // Sample file name
boolean disableSanitize = false; // Sample boolean value

// Check if the method throws IOException
assertThrows(IOException.class, () -> {
FileToPdf.convertHtmlToPdf("/path/",request, fileBytes, fileName, disableSanitize);
});
byte[] fileBytes = new byte[0]; // Sample file bytes (empty input)
String fileName = "test.html"; // Sample file name indicating an HTML file
boolean disableSanitize = false; // Flag to control sanitization

// Expect an IOException to be thrown due to empty input
Throwable thrown =
assertThrows(
IOException.class,
() ->
FileToPdf.convertHtmlToPdf(
"/path/", request, fileBytes, fileName, disableSanitize));
assertNotNull(thrown);
}

/**
* Test sanitizeZipFilename with null or empty input.
* It should return an empty string in these cases.
*/
@Test
public void testSanitizeZipFilename_NullOrEmpty() {
assertEquals("", FileToPdf.sanitizeZipFilename(null));
assertEquals("", FileToPdf.sanitizeZipFilename(" "));
}

/**
* Test sanitizeZipFilename to ensure it removes path traversal sequences.
* This includes removing both forward and backward slash sequences.
*/
@Test
public void testSanitizeZipFilename_RemovesTraversalSequences() {
String input = "../some/../path/..\\to\\file.txt";
String expected = "some/path/to/file.txt";

// Print output for debugging purposes
System.out.println("sanitizeZipFilename " + FileToPdf.sanitizeZipFilename(input));
System.out.flush();

// Expect that the method replaces backslashes with forward slashes
// and removes path traversal sequences
assertEquals(expected, FileToPdf.sanitizeZipFilename(input));
}

/**
* Test sanitizeZipFilename to ensure that it removes leading drive letters and slashes.
*/
@Test
public void testSanitizeZipFilename_RemovesLeadingDriveAndSlashes() {
String input = "C:\\folder\\file.txt";
String expected = "folder/file.txt";
assertEquals(expected, FileToPdf.sanitizeZipFilename(input));

input = "/folder/file.txt";
expected = "folder/file.txt";
assertEquals(expected, FileToPdf.sanitizeZipFilename(input));
}

/**
* Test sanitizeZipFilename to verify that safe filenames remain unchanged.
*/
@Test
public void testConvertBookTypeToPdf() {
byte[] bytes = new byte[10]; // Sample bytes
String originalFilename = "test.epub"; // Sample original filename

// Check if the method throws IOException
assertThrows(IOException.class, () -> {
FileToPdf.convertBookTypeToPdf(bytes, originalFilename);
});
public void testSanitizeZipFilename_NoChangeForSafeNames() {
String input = "folder/subfolder/file.txt";
assertEquals(input, FileToPdf.sanitizeZipFilename(input));
}
}

0 comments on commit 9152e64

Please sign in to comment.