From dbbfce516e6b4724bcafd5e1aa3a44cb7df94f3c Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Sun, 21 Jul 2024 18:39:10 +0900 Subject: [PATCH] feat(output): comment remove feature --- README.md | 46 +++- package-lock.json | 18 +- package.json | 4 +- repopack.config.json | 3 +- src/config/defaultConfig.ts | 1 + src/core/outputGenerator.ts | 1 + src/core/packager.ts | 2 +- src/types/index.ts | 4 + src/utils/fileHandler.ts | 21 +- src/utils/fileManipulator.ts | 93 +++++++ src/utils/gitignoreUtils.ts | 3 +- tests/utils/fileHandler.test.ts | 7 +- tests/utils/fileManipulator.test.ts | 400 ++++++++++++++++++++++++++++ 13 files changed, 592 insertions(+), 11 deletions(-) create mode 100644 src/utils/fileManipulator.ts create mode 100644 tests/utils/fileManipulator.test.ts diff --git a/README.md b/README.md index 372abd6e..29bbc69e 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ Repopack is a powerful tool that packs your entire repository into a single, AI-friendly file. Perfect for when you need to feed your codebase to Large Language Models (LLMs) or other AI tools. + + ## 🚀 Features - **AI-Optimized**: Formats your codebase in a way that's easy for AI to understand and process. @@ -13,6 +15,8 @@ Repopack is a powerful tool that packs your entire repository into a single, AI- - **Customizable**: Easily configure what to include or exclude. - **Git-Aware**: Automatically respects your .gitignore files. + + ## 🛠 Installation You can install Repopack globally using npm: @@ -33,6 +37,8 @@ Alternatively, you can use npx to run Repopack without installing it: npx repopack ``` + + ## 📊 Usage To pack your entire repository: @@ -81,6 +87,8 @@ repopack -c ./custom-config.json npx repopack src ``` + + ## ⚙️ Configuration Create a `repopack.config.json` file in your project root for custom configurations. Here's an explanation of the configuration options: @@ -89,6 +97,7 @@ Create a `repopack.config.json` file in your project root for custom configurati |--------|-------------|---------| |`output.filePath`| The name of the output file | `"repopack-output.txt"` | |`output.headerText`| Custom text to include in the file header |`null`| +|`output.removeComments`| Whether to remove comments from supported file types. Suppurts python | `false` | |`ignore.useDefaultPatterns`| Whether to use default ignore patterns |`true`| |`ignore.customPatterns`| Additional patterns to ignore |`[]`| @@ -97,8 +106,9 @@ Example configuration: ```json { "output": { - "filePath": "custom-output.txt", - "headerText": "Custom header information for the packed file." + "filePath": "repopack-output.txt", + "headerText": "Custom header information for the packed file.", + "removeComments": true }, "ignore": { "useDefaultPatterns": true, @@ -119,6 +129,36 @@ Repopack automatically ignores certain files and directories by default: This ensures that only relevant source code is included in the packed file. You can add additional ignore patterns using the `ignore.customPatterns` configuration option or the `-i` command line flag. +### Comment Removal + +When `output.removeComments` is set to `true`, Repopack will attempt to remove comments from supported file types. This feature can help reduce the size of the output file and focus on the essential code content. + +Currently supported file types for comment removal: + +- HTML (.html) +- CSS (.css, .scss, .sass) +- JavaScript, React (.js, .jsx) +- TypeScript (.ts, .tsx) +- Vue (.vue) +- Svelte (.svelte) +- Python (.py) +- PHP (.php) +- Ruby (.rb) +- C (.c) +- C# (.cs) +- Java (.java) +- Go (.go) +- Rust (.rs) +- Swift (.swift) +- Kotlin (.kt) +- Dart (.dart) +- Shell (.sh) +- YAML (.yml, .yaml) + +Note: The comment removal process is designed to be conservative to avoid accidentally removing code. In some complex cases, especially with nested comments or language-specific peculiarities, some comments might be retained. + + + ## 📄 Output Format Repopack generates a single file with clear separators between different parts of your codebase: @@ -146,5 +186,7 @@ File: src/utils.js This format ensures that AI tools can easily distinguish between different files in your codebase. + + ## 📜 License MIT diff --git a/package-lock.json b/package-lock.json index 994ce442..093068ab 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,7 +16,8 @@ "is-binary-path": "^2.1.0", "jschardet": "^3.1.3", "log-update": "^6.0.0", - "picocolors": "^1.0.1" + "picocolors": "^1.0.1", + "strip-comments": "^2.0.1" }, "bin": { "repopack": "bin/repopack.cjs" @@ -27,6 +28,7 @@ "@types/eslint__js": "~8.42.3", "@types/eslint-config-prettier": "~6.11.3", "@types/node": "^20.14.10", + "@types/strip-comments": "^2.0.4", "@typescript-eslint/eslint-plugin": "^7.16.0", "@typescript-eslint/parser": "^7.16.0", "@vitest/coverage-v8": "^2.0.2", @@ -1001,6 +1003,12 @@ "undici-types": "~5.26.4" } }, + "node_modules/@types/strip-comments": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@types/strip-comments/-/strip-comments-2.0.4.tgz", + "integrity": "sha512-YwcQqIGy90zEHrReYrMTpZfq003Um77WayeE8UwJTHvaM9g9XR9N7GMVSnjRhhDzQYVX375JnB5P6q5kAg221g==", + "dev": true + }, "node_modules/@typescript-eslint/eslint-plugin": { "version": "7.16.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.16.0.tgz", @@ -4644,6 +4652,14 @@ "node": ">=4" } }, + "node_modules/strip-comments": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/strip-comments/-/strip-comments-2.0.1.tgz", + "integrity": "sha512-ZprKx+bBLXv067WTCALv8SSz5l2+XhpYCsVtSqlMnkAXMWDq+/ekVbl1ghqP9rUHTzv6sm/DwCOiYutU/yp1fw==", + "engines": { + "node": ">=10" + } + }, "node_modules/strip-final-newline": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-3.0.0.tgz", diff --git a/package.json b/package.json index 0805f959..04fc0ec9 100644 --- a/package.json +++ b/package.json @@ -56,7 +56,8 @@ "is-binary-path": "^2.1.0", "jschardet": "^3.1.3", "log-update": "^6.0.0", - "picocolors": "^1.0.1" + "picocolors": "^1.0.1", + "strip-comments": "^2.0.1" }, "devDependencies": { "@eslint/js": "^9.7.0", @@ -64,6 +65,7 @@ "@types/eslint__js": "~8.42.3", "@types/eslint-config-prettier": "~6.11.3", "@types/node": "^20.14.10", + "@types/strip-comments": "^2.0.4", "@typescript-eslint/eslint-plugin": "^7.16.0", "@typescript-eslint/parser": "^7.16.0", "@vitest/coverage-v8": "^2.0.2", diff --git a/repopack.config.json b/repopack.config.json index d4b67457..4f3720b4 100644 --- a/repopack.config.json +++ b/repopack.config.json @@ -1,7 +1,8 @@ { "output": { "filePath": "repopack-output.txt", - "headerText": "This repository contains the source code for the Repopack tool.\nRepopack is designed to pack repository contents into a single file,\nmaking it easier for AI systems to analyze and process the codebase.\n\nKey Features:\n- Configurable ignore patterns\n- Custom header text support\n- Efficient file processing and packing\n\nPlease refer to the README.md file for more detailed information on usage and configuration.\n" + "headerText": "This repository contains the source code for the Repopack tool.\nRepopack is designed to pack repository contents into a single file,\nmaking it easier for AI systems to analyze and process the codebase.\n\nKey Features:\n- Configurable ignore patterns\n- Custom header text support\n- Efficient file processing and packing\n\nPlease refer to the README.md file for more detailed information on usage and configuration.\n", + "removeComments": false }, "ignore": { "useDefaultPatterns": true, diff --git a/src/config/defaultConfig.ts b/src/config/defaultConfig.ts index 548035d1..3408cc32 100644 --- a/src/config/defaultConfig.ts +++ b/src/config/defaultConfig.ts @@ -3,6 +3,7 @@ import { RepopackConfigDefault } from '../types/index.js'; export const defaultConfig: RepopackConfigDefault = { output: { filePath: 'repopack-output.txt', + removeComments: false, }, ignore: { useDefaultPatterns: true, diff --git a/src/core/outputGenerator.ts b/src/core/outputGenerator.ts index ac963432..9323e3aa 100644 --- a/src/core/outputGenerator.ts +++ b/src/core/outputGenerator.ts @@ -68,6 +68,7 @@ Notes: - Some files may have been excluded based on .gitignore rules and Repopack's configuration. - Binary files are not included in this packed representation. +${config.output.removeComments ? '- Code comments have been removed.\n' : ''} For more information about Repopack, visit: https://github.com/yamadashy/repopack `; diff --git a/src/core/packager.ts b/src/core/packager.ts index 943fa9e9..595ce60e 100644 --- a/src/core/packager.ts +++ b/src/core/packager.ts @@ -81,7 +81,7 @@ async function packDirectory( const subDirFiles = await packDirectory(fullPath, entryRelativePath, config, ignoreFilter, deps); packedFiles.push(...subDirFiles); } else { - const content = await deps.processFile(fullPath); + const content = await deps.processFile(fullPath, config); if (content) { packedFiles.push({ path: entryRelativePath, content }); } diff --git a/src/types/index.ts b/src/types/index.ts index 46871e2f..1f42c9d3 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -2,6 +2,7 @@ interface RepopackConfigBase { output?: { filePath?: string; headerText?: string; + removeComments?: boolean; }; ignore?: { useDefaultPatterns?: boolean; @@ -13,6 +14,7 @@ export type RepopackConfigDefault = RepopackConfigBase & { output: { filePath: string; headerText?: string; + removeComments?: boolean; }; ignore: { useDefaultPatterns: boolean; @@ -24,6 +26,7 @@ export type RepopackConfigFile = RepopackConfigBase & { output?: { filePath?: string; headerText?: string; + removeComments?: boolean; }; ignore?: { useDefaultPatterns?: boolean; @@ -35,6 +38,7 @@ export type RepopackConfigCli = RepopackConfigBase & { output?: { filePath?: string; headerText?: string; + removeComments?: boolean; }; ignore?: { useDefaultPatterns?: boolean; diff --git a/src/utils/fileHandler.ts b/src/utils/fileHandler.ts index 2c02dd90..e770dc60 100644 --- a/src/utils/fileHandler.ts +++ b/src/utils/fileHandler.ts @@ -2,8 +2,14 @@ import * as fs from 'fs/promises'; import isBinaryPath from 'is-binary-path'; import jschardet from 'jschardet'; import iconv from 'iconv-lite'; +import { RepopackConfigMerged } from '../types/index.js'; +import { getFileManipulator } from './fileManipulator.js'; -export async function processFile(filePath: string, fsModule = fs): Promise { +export async function processFile( + filePath: string, + config: RepopackConfigMerged, + fsModule = fs, +): Promise { // Skip binary files if (isBinaryPath(filePath)) { return null; @@ -12,13 +18,22 @@ export async function processFile(filePath: string, fsModule = fs): Promise manipulator.removeComments(acc), content); + } +} + +const manipulators: Record = { + '.c': new StripCommentsManipulator('c'), + '.cs': new StripCommentsManipulator('csharp'), + '.css': new StripCommentsManipulator('css'), + '.dart': new StripCommentsManipulator('c'), + '.go': new StripCommentsManipulator('c'), + '.html': new StripCommentsManipulator('html'), + '.java': new StripCommentsManipulator('java'), + '.js': new StripCommentsManipulator('javascript'), + '.jsx': new StripCommentsManipulator('javascript'), + '.kt': new StripCommentsManipulator('c'), + '.less': new StripCommentsManipulator('less'), + '.php': new StripCommentsManipulator('php'), + '.rb': new StripCommentsManipulator('ruby'), + '.rs': new StripCommentsManipulator('c'), + '.sass': new StripCommentsManipulator('sass'), + '.scss': new StripCommentsManipulator('sass'), + '.sh': new StripCommentsManipulator('perl'), + '.sql': new StripCommentsManipulator('sql'), + '.swift': new StripCommentsManipulator('swift'), + '.ts': new StripCommentsManipulator('javascript'), + '.tsx': new StripCommentsManipulator('javascript'), + '.xml': new StripCommentsManipulator('xml'), + '.yaml': new StripCommentsManipulator('perl'), + '.yml': new StripCommentsManipulator('perl'), + + '.py': new PythonManipulator(), + + '.vue': new CompositeManipulator( + new StripCommentsManipulator('html'), + new StripCommentsManipulator('css'), + new StripCommentsManipulator('javascript'), + ), + '.svelte': new CompositeManipulator( + new StripCommentsManipulator('html'), + new StripCommentsManipulator('css'), + new StripCommentsManipulator('javascript'), + ), +}; + +export function getFileManipulator(filePath: string): FileManipulator | null { + const ext = path.extname(filePath); + return manipulators[ext] || null; +} diff --git a/src/utils/gitignoreUtils.ts b/src/utils/gitignoreUtils.ts index 6100a8bb..83e276cf 100644 --- a/src/utils/gitignoreUtils.ts +++ b/src/utils/gitignoreUtils.ts @@ -1,6 +1,7 @@ import * as fs from 'fs/promises'; import path from 'path'; import ignore from 'ignore'; +import { logger } from './logger.js'; export async function getGitignorePatterns(rootDir: string, fsModule = fs): Promise { const gitignorePath = path.join(rootDir, '.gitignore'); @@ -8,7 +9,7 @@ export async function getGitignorePatterns(rootDir: string, fsModule = fs): Prom const gitignoreContent = await fsModule.readFile(gitignorePath, 'utf-8'); return parseGitignoreContent(gitignoreContent); } catch (error) { - console.warn('No .gitignore file found or unable to read it.'); + logger.warn('No .gitignore file found or unable to read it.'); return []; } } diff --git a/tests/utils/fileHandler.test.ts b/tests/utils/fileHandler.test.ts index 38b4f6f6..79a12e46 100644 --- a/tests/utils/fileHandler.test.ts +++ b/tests/utils/fileHandler.test.ts @@ -1,6 +1,7 @@ import { expect, test, vi, describe, beforeEach } from 'vitest'; import { processFile, preprocessContent } from '../../src/utils/fileHandler.js'; import * as fs from 'fs/promises'; +import { RepopackConfigMerged } from '../../src/types/index.js'; vi.mock('fs/promises'); @@ -13,7 +14,11 @@ describe('fileHandler', () => { const mockContent = ' Some file content \n'; vi.mocked(fs.readFile).mockResolvedValue(mockContent); - const result = await processFile('/path/to/file.txt'); + const mockConfig: RepopackConfigMerged = { + output: { filePath: 'output.txt' }, + ignore: { useDefaultPatterns: true }, + }; + const result = await processFile('/path/to/file.txt', mockConfig); expect(fs.readFile).toHaveBeenCalledWith('/path/to/file.txt'); expect(result).toBe('Some file content'); diff --git a/tests/utils/fileManipulator.test.ts b/tests/utils/fileManipulator.test.ts new file mode 100644 index 00000000..afe6eb28 --- /dev/null +++ b/tests/utils/fileManipulator.test.ts @@ -0,0 +1,400 @@ +import { expect, test, describe } from 'vitest'; +import { getFileManipulator } from '../../src/utils/fileManipulator.js'; + +describe('fileManipulator', () => { + const testCases = [ + { + name: 'C comment removal', + ext: '.c', + input: ` + // Single line comment + int main() { + /* Multi-line + comment */ + return 0; + } + `, + expected: ` + int main() { + return 0; + }`, + }, + { + name: 'C# comment removal', + ext: '.cs', + input: ` + // Single line comment + public class Test { + /* Multi-line + comment */ + public void Method() {} + } + `, + expected: ` + public class Test { + public void Method() {} + }`, + }, + { + name: 'CSS comment removal', + ext: '.css', + input: ` + /* Comment */ + body { + color: red; /* Inline comment */ + } + `, + expected: ` + body { + color: red; + }`, + }, + { + name: 'HTML comment removal', + ext: '.html', + input: '
Content
', + expected: '
Content
', + }, + { + name: 'Java comment removal', + ext: '.java', + input: ` + // Single line comment + public class Test { + /* Multi-line + comment */ + public void method() {} + } + `, + expected: ` + public class Test { + public void method() {} + }`, + }, + { + name: 'JavaScript comment removal', + ext: '.js', + input: ` + // Single line comment + function test() { + /* Multi-line + comment */ + return true; + } + `, + expected: ` + function test() { + return true; + }`, + }, + { + name: 'Less comment removal', + ext: '.less', + input: ` + // Single line comment + @variable: #888; + /* Multi-line + comment */ + body { color: @variable; } + `, + expected: ` + @variable: #888; + body { color: @variable; }`, + }, + { + name: 'PHP comment removal', + ext: '.php', + input: ` + + `, + expected: ` + `, + }, + { + name: 'Python comment removal', + ext: '.py', + input: ` + # Single line comment + def test(): + ''' + Multi-line comment + ''' + return True + """ + Another multi-line comment + """ + `, + expected: ` + def test(): + return True`, + }, + { + name: 'Ruby comment removal', + ext: '.rb', + input: ` + # Single line comment + def test + =begin + Multi-line comment + =end + true + end + `, + expected: ` + def test + true + end`, + }, + { + name: 'Sass comment removal', + ext: '.sass', + input: ` + // Single line comment + $variable: #888 + /* Multi-line + comment */ + body + color: $variable + `, + expected: ` + $variable: #888 + body + color: $variable`, + }, + { + name: 'SCSS comment removal', + ext: '.scss', + input: ` + // Single line comment + $variable: #888; + /* Multi-line + comment */ + body { color: $variable; } + `, + expected: ` + $variable: #888; + body { color: $variable; }`, + }, + { + name: 'SQL comment removal', + ext: '.sql', + input: ` + -- Single line comment + SELECT * FROM table WHERE id = 1; + `, + expected: ` + SELECT * FROM table WHERE id = 1;`, + }, + { + name: 'Swift comment removal', + ext: '.swift', + input: ` + // Single line comment + func test() { + /* Multi-line + comment */ + return true + } + `, + expected: ` + func test() { + return true + }`, + }, + { + name: 'TypeScript comment removal', + ext: '.ts', + input: ` + // Single line comment + function test(): boolean { + /* Multi-line + comment */ + return true; + } + `, + expected: ` + function test(): boolean { + return true; + }`, + }, + { + name: 'XML comment removal', + ext: '.xml', + input: 'Content', + expected: 'Content', + }, + { + name: 'Dart comment removal', + ext: '.dart', + input: ` + // Single line comment + void main() { + /* Multi-line + comment */ + print('Hello'); + } + `, + expected: ` + void main() { + print('Hello'); + }`, + }, + { + name: 'Go comment removal', + ext: '.go', + input: ` + // Single line comment + func main() { + /* Multi-line + comment */ + fmt.Println("Hello") + } + `, + expected: ` + func main() { + fmt.Println("Hello") + }`, + }, + { + name: 'Kotlin comment removal', + ext: '.kt', + input: ` + // Single line comment + fun main() { + /* Multi-line + comment */ + println("Hello") + } + `, + expected: ` + fun main() { + println("Hello") + }`, + }, + { + name: 'Rust comment removal', + ext: '.rs', + input: ` + // Single line comment + fn main() { + /* Multi-line + comment */ + println!("Hello"); + } + `, + expected: ` + fn main() { + println!("Hello"); + }`, + }, + { + name: 'Shell script comment removal', + ext: '.sh', + input: ` + # Single line comment + echo "Hello" + `, + expected: ` + echo "Hello"`, + }, + { + name: 'YAML comment removal', + ext: '.yml', + input: ` + key: value # Comment + another_key: another_value + `, + expected: ` + key: value + another_key: another_value`, + }, + { + name: 'Vue file comment removal', + ext: '.vue', + input: ` + + + + `, + expected: ` + + + `, + }, + { + name: 'Svelte file comment removal', + ext: '.svelte', + input: ` + +
{message}
+ + + `, + expected: ` +
{message}
+ + `, + }, + ]; + + testCases.forEach(({ name, ext, input, expected }) => { + test(name, () => { + const manipulator = getFileManipulator(`test${ext}`); + expect(manipulator?.removeComments(input)).toBe(expected); + }); + }); + + test('Unsupported file type', () => { + const manipulator = getFileManipulator('test.unsupported'); + expect(manipulator).toBeNull(); + }); +});