From 9152e64b9f091dc2fdfe4ad01b4e10bf74cbb853 Mon Sep 17 00:00:00 2001 From: Ludy Date: Wed, 26 Feb 2025 20:25:35 +0100 Subject: [PATCH] Remove `convertBookTypeToPdf` and Improve File Sanitization in `FileToPdf` (#3072) # Description of Changes Please provide a summary of the changes, including: - **Removed `convertBookTypeToPdf` method**: - This method used `ebook-convert` from Calibre, which required external dependencies. - Its removal eliminates unnecessary process execution and simplifies the codebase. - **Enhanced `sanitizeZipFilename` function**: - Added handling for drive letters (e.g., `C:\`). - Ensured all slashes are normalized to forward slashes. - Improved recursive path traversal removal to prevent directory escape vulnerabilities. - **Refactored `ProcessExecutor` output handling**: - Replaced redundant `.size() > 0` checks with `.isEmpty()`. - **Expanded unit tests in `FileToPdfTest`**: - Added tests for `sanitizeZipFilename` to cover edge cases. - Improved test descriptions and added assertion messages. - Added debug print statements for easier test debugging. --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --- .../software/SPDF/utils/FileToPdf.java | 44 ++-------- .../software/SPDF/utils/ProcessExecutor.java | 10 ++- .../software/SPDF/utils/FileToPdfTest.java | 82 +++++++++++++++---- 3 files changed, 81 insertions(+), 55 deletions(-) diff --git a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java index 6a0e263aa94..dbc0915bb7e 100644 --- a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java +++ b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java @@ -169,7 +169,7 @@ private static Path unzipAndGetMainHtml(byte[] fileBytes) throws IOException { } } - // search for the main HTML file. + // Search for the main HTML file. try (Stream walk = Files.walk(tempDirectory)) { List htmlFiles = walk.filter(file -> file.toString().endsWith(".html")) @@ -190,46 +190,20 @@ private static Path unzipAndGetMainHtml(byte[] fileBytes) throws IOException { } } - public static byte[] convertBookTypeToPdf(byte[] bytes, String originalFilename) - throws IOException, InterruptedException { - if (originalFilename == null || originalFilename.lastIndexOf('.') == -1) { - throw new IllegalArgumentException("Invalid original filename."); - } - - String fileExtension = originalFilename.substring(originalFilename.lastIndexOf('.')); - List command = new ArrayList<>(); - Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - Path tempInputFile = null; - - try { - // Create temp file with appropriate extension - tempInputFile = Files.createTempFile("input_", fileExtension); - Files.write(tempInputFile, bytes); - - command.add("ebook-convert"); - command.add(tempInputFile.toString()); - command.add(tempOutputFile.toString()); - ProcessExecutorResult returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.CALIBRE) - .runCommandWithOutputHandling(command); - - return Files.readAllBytes(tempOutputFile); - } finally { - // Clean up temporary files - if (tempInputFile != null) { - Files.deleteIfExists(tempInputFile); - } - Files.deleteIfExists(tempOutputFile); - } - } - static String sanitizeZipFilename(String entryName) { if (entryName == null || entryName.trim().isEmpty()) { - return entryName; + return ""; } + // Remove any drive letters (e.g., "C:\") and leading forward/backslashes + entryName = entryName.replaceAll("^[a-zA-Z]:[\\\\/]+", ""); + entryName = entryName.replaceAll("^[\\\\/]+", ""); + + // Recursively remove path traversal sequences while (entryName.contains("../") || entryName.contains("..\\")) { entryName = entryName.replace("../", "").replace("..\\", ""); } + // Normalize all backslashes to forward slashes + entryName = entryName.replaceAll("\\\\", "/"); return entryName; } } diff --git a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java index a8d39969757..e5b8fbb3682 100644 --- a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java +++ b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java @@ -1,6 +1,10 @@ package stirling.software.SPDF.utils; -import java.io.*; +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.InterruptedIOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -222,7 +226,7 @@ public ProcessExecutorResult runCommandWithOutputHandling( boolean isQpdf = command != null && !command.isEmpty() && command.get(0).contains("qpdf"); - if (outputLines.size() > 0) { + if (!outputLines.isEmpty()) { String outputMessage = String.join("\n", outputLines); messages += outputMessage; if (!liveUpdates) { @@ -230,7 +234,7 @@ public ProcessExecutorResult runCommandWithOutputHandling( } } - if (errorLines.size() > 0) { + if (!errorLines.isEmpty()) { String errorMessage = String.join("\n", errorLines); messages += errorMessage; if (!liveUpdates) { diff --git a/src/test/java/stirling/software/SPDF/utils/FileToPdfTest.java b/src/test/java/stirling/software/SPDF/utils/FileToPdfTest.java index f5cb2c80235..8edb1b8711f 100644 --- a/src/test/java/stirling/software/SPDF/utils/FileToPdfTest.java +++ b/src/test/java/stirling/software/SPDF/utils/FileToPdfTest.java @@ -5,31 +5,79 @@ import java.io.IOException; -import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.*; public class FileToPdfTest { + /** + * Test the HTML to PDF conversion. + * This test expects an IOException when an empty HTML input is provided. + */ @Test public void testConvertHtmlToPdf() { HTMLToPdfRequest request = new HTMLToPdfRequest(); - byte[] fileBytes = new byte[0]; // Sample file bytes - String fileName = "test.html"; // Sample file name - boolean disableSanitize = false; // Sample boolean value - - // Check if the method throws IOException - assertThrows(IOException.class, () -> { - FileToPdf.convertHtmlToPdf("/path/",request, fileBytes, fileName, disableSanitize); - }); + byte[] fileBytes = new byte[0]; // Sample file bytes (empty input) + String fileName = "test.html"; // Sample file name indicating an HTML file + boolean disableSanitize = false; // Flag to control sanitization + + // Expect an IOException to be thrown due to empty input + Throwable thrown = + assertThrows( + IOException.class, + () -> + FileToPdf.convertHtmlToPdf( + "/path/", request, fileBytes, fileName, disableSanitize)); + assertNotNull(thrown); + } + + /** + * Test sanitizeZipFilename with null or empty input. + * It should return an empty string in these cases. + */ + @Test + public void testSanitizeZipFilename_NullOrEmpty() { + assertEquals("", FileToPdf.sanitizeZipFilename(null)); + assertEquals("", FileToPdf.sanitizeZipFilename(" ")); + } + + /** + * Test sanitizeZipFilename to ensure it removes path traversal sequences. + * This includes removing both forward and backward slash sequences. + */ + @Test + public void testSanitizeZipFilename_RemovesTraversalSequences() { + String input = "../some/../path/..\\to\\file.txt"; + String expected = "some/path/to/file.txt"; + + // Print output for debugging purposes + System.out.println("sanitizeZipFilename " + FileToPdf.sanitizeZipFilename(input)); + System.out.flush(); + + // Expect that the method replaces backslashes with forward slashes + // and removes path traversal sequences + assertEquals(expected, FileToPdf.sanitizeZipFilename(input)); + } + + /** + * Test sanitizeZipFilename to ensure that it removes leading drive letters and slashes. + */ + @Test + public void testSanitizeZipFilename_RemovesLeadingDriveAndSlashes() { + String input = "C:\\folder\\file.txt"; + String expected = "folder/file.txt"; + assertEquals(expected, FileToPdf.sanitizeZipFilename(input)); + + input = "/folder/file.txt"; + expected = "folder/file.txt"; + assertEquals(expected, FileToPdf.sanitizeZipFilename(input)); } + /** + * Test sanitizeZipFilename to verify that safe filenames remain unchanged. + */ @Test - public void testConvertBookTypeToPdf() { - byte[] bytes = new byte[10]; // Sample bytes - String originalFilename = "test.epub"; // Sample original filename - - // Check if the method throws IOException - assertThrows(IOException.class, () -> { - FileToPdf.convertBookTypeToPdf(bytes, originalFilename); - }); + public void testSanitizeZipFilename_NoChangeForSafeNames() { + String input = "folder/subfolder/file.txt"; + assertEquals(input, FileToPdf.sanitizeZipFilename(input)); } }