pax_global_header00006660000000000000000000000064147552707320014526gustar00rootroot0000000000000052 comment=1b234ff6dccb2ca3e56b5c256696558fb85306dc files-to-prompt-0.6/000077500000000000000000000000001475527073200144345ustar00rootroot00000000000000files-to-prompt-0.6/.github/000077500000000000000000000000001475527073200157745ustar00rootroot00000000000000files-to-prompt-0.6/.github/workflows/000077500000000000000000000000001475527073200200315ustar00rootroot00000000000000files-to-prompt-0.6/.github/workflows/publish.yml000066400000000000000000000022201475527073200222160ustar00rootroot00000000000000name: Publish Python Package on: release: types: [created] permissions: contents: read jobs: test: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: pip cache-dependency-path: pyproject.toml - name: Install dependencies run: | pip install '.[test]' - name: Run tests run: | pytest deploy: runs-on: ubuntu-latest needs: [test] environment: release permissions: id-token: write steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.13" cache: pip cache-dependency-path: pyproject.toml - name: Install dependencies run: | pip install setuptools wheel build - name: Build run: | python -m build - name: Publish uses: pypa/gh-action-pypi-publish@release/v1 files-to-prompt-0.6/.github/workflows/test.yml000066400000000000000000000011211475527073200215260ustar00rootroot00000000000000name: Test on: [push, pull_request] permissions: contents: read jobs: test: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} cache: pip cache-dependency-path: pyproject.toml - name: Install dependencies run: | pip install '.[test]' - name: Run tests run: | pytest files-to-prompt-0.6/.gitignore000066400000000000000000000001351475527073200164230ustar00rootroot00000000000000.venv __pycache__/ *.py[cod] *$py.class venv .eggs .pytest_cache *.egg-info .DS_Store build/ files-to-prompt-0.6/LICENSE000066400000000000000000000261351475527073200154500ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. files-to-prompt-0.6/README.md000066400000000000000000000150111475527073200157110ustar00rootroot00000000000000# files-to-prompt [![PyPI](https://img.shields.io/pypi/v/files-to-prompt.svg)](https://pypi.org/project/files-to-prompt/) [![Changelog](https://img.shields.io/github/v/release/simonw/files-to-prompt?include_prereleases&label=changelog)](https://github.com/simonw/files-to-prompt/releases) [![Tests](https://github.com/simonw/files-to-prompt/actions/workflows/test.yml/badge.svg)](https://github.com/simonw/files-to-prompt/actions/workflows/test.yml) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/files-to-prompt/blob/master/LICENSE) Concatenate a directory full of files into a single prompt for use with LLMs For background on this project see [Building files-to-prompt entirely using Claude 3 Opus](https://simonwillison.net/2024/Apr/8/files-to-prompt/). ## Installation Install this tool using `pip`: ```bash pip install files-to-prompt ``` ## Usage To use `files-to-prompt`, provide the path to one or more files or directories you want to process: ```bash files-to-prompt path/to/file_or_directory [path/to/another/file_or_directory ...] ``` This will output the contents of every file, with each file preceded by its relative path and separated by `---`. ### Options - `-e/--extension `: Only include files with the specified extension. Can be used multiple times. ```bash files-to-prompt path/to/directory -e txt -e md ``` - `--include-hidden`: Include files and folders starting with `.` (hidden files and directories). ```bash files-to-prompt path/to/directory --include-hidden ``` - `--ignore `: Specify one or more patterns to ignore. Can be used multiple times. Patterns may match file names and directory names, unless you also specify `--ignore-files-only`. Pattern syntax uses [fnmatch](https://docs.python.org/3/library/fnmatch.html), which supports `*`, `?`, `[anychar]`, `[!notchars]` and `[?]` for special character literals. ```bash files-to-prompt path/to/directory --ignore "*.log" --ignore "temp*" ``` - `--ignore-files-only`: Include directory paths which would otherwise be ignored by an `--ignore` pattern. ```bash files-to-prompt path/to/directory --ignore-files-only --ignore "*dir*" ``` - `--ignore-gitignore`: Ignore `.gitignore` files and include all files. ```bash files-to-prompt path/to/directory --ignore-gitignore ``` - `-c/--cxml`: Output in Claude XML format. ```bash files-to-prompt path/to/directory --cxml ``` - `-m/--markdown`: Output as Markdown with fenced code blocks. ```bash files-to-prompt path/to/directory --markdown ``` - `-o/--output `: Write the output to a file instead of printing it to the console. ```bash files-to-prompt path/to/directory -o output.txt ``` - `-n/--line-numbers`: Include line numbers in the output. ```bash files-to-prompt path/to/directory -n ``` Example output: ``` files_to_prompt/cli.py --- 1 import os 2 from fnmatch import fnmatch 3 4 import click ... ``` - `-0/--null`: Use NUL character as separator when reading paths from stdin. Useful when filenames may contain spaces. ```bash find . -name "*.py" -print0 | files-to-prompt --null ``` ### Example Suppose you have a directory structure like this: ``` my_directory/ ├── file1.txt ├── file2.txt ├── .hidden_file.txt ├── temp.log └── subdirectory/ └── file3.txt ``` Running `files-to-prompt my_directory` will output: ``` my_directory/file1.txt --- Contents of file1.txt --- my_directory/file2.txt --- Contents of file2.txt --- my_directory/subdirectory/file3.txt --- Contents of file3.txt --- ``` If you run `files-to-prompt my_directory --include-hidden`, the output will also include `.hidden_file.txt`: ``` my_directory/.hidden_file.txt --- Contents of .hidden_file.txt --- ... ``` If you run `files-to-prompt my_directory --ignore "*.log"`, the output will exclude `temp.log`: ``` my_directory/file1.txt --- Contents of file1.txt --- my_directory/file2.txt --- Contents of file2.txt --- my_directory/subdirectory/file3.txt --- Contents of file3.txt --- ``` If you run `files-to-prompt my_directory --ignore "sub*"`, the output will exclude all files in `subdirectory/` (unless you also specify `--ignore-files-only`): ``` my_directory/file1.txt --- Contents of file1.txt --- my_directory/file2.txt --- Contents of file2.txt --- ``` ### Reading from stdin The tool can also read paths from standard input. This can be used to pipe in the output of another command: ```bash # Find files modified in the last day find . -mtime -1 | files-to-prompt ``` When using the `--null` (or `-0`) option, paths are expected to be NUL-separated (useful when dealing with filenames containing spaces): ```bash find . -name "*.txt" -print0 | files-to-prompt --null ``` You can mix and match paths from command line arguments and stdin: ```bash # Include files modified in the last day, and also include README.md find . -mtime -1 | files-to-prompt README.md ``` ### Claude XML Output Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window. To structure the output in this way, use the optional `--cxml` flag, which will produce output like this: ```xml my_directory/file1.txt Contents of file1.txt my_directory/file2.txt Contents of file2.txt ``` ## --markdown fenced code block output The `--markdown` option will output the files as fenced code blocks, which can be useful for pasting into Markdown documents. ```bash files-to-prompt path/to/directory --markdown ``` The language tag will be guessed based on the filename. If the code itself contains triple backticks the wrapper around it will use one additional backtick. Example output: ````` myfile.py ```python def my_function(): return "Hello, world!" ``` other.js ```javascript function myFunction() { return "Hello, world!"; } ``` file_with_triple_backticks.md ````markdown This file has its own ``` fenced code blocks ``` Inside it. ```` ````` ## Development To contribute to this tool, first checkout the code. Then create a new virtual environment: ```bash cd files-to-prompt python -m venv venv source venv/bin/activate ``` Now install the dependencies and test dependencies: ```bash pip install -e '.[test]' ``` To run the tests: ```bash pytest ``` files-to-prompt-0.6/files_to_prompt/000077500000000000000000000000001475527073200176415ustar00rootroot00000000000000files-to-prompt-0.6/files_to_prompt/__init__.py000066400000000000000000000000001475527073200217400ustar00rootroot00000000000000files-to-prompt-0.6/files_to_prompt/__main__.py000066400000000000000000000000731475527073200217330ustar00rootroot00000000000000from .cli import cli if __name__ == "__main__": cli() files-to-prompt-0.6/files_to_prompt/cli.py000066400000000000000000000213121475527073200207610ustar00rootroot00000000000000import os import sys from fnmatch import fnmatch import click global_index = 1 EXT_TO_LANG = { "py": "python", "c": "c", "cpp": "cpp", "java": "java", "js": "javascript", "ts": "typescript", "html": "html", "css": "css", "xml": "xml", "json": "json", "yaml": "yaml", "yml": "yaml", "sh": "bash", "rb": "ruby", } def should_ignore(path, gitignore_rules): for rule in gitignore_rules: if fnmatch(os.path.basename(path), rule): return True if os.path.isdir(path) and fnmatch(os.path.basename(path) + "/", rule): return True return False def read_gitignore(path): gitignore_path = os.path.join(path, ".gitignore") if os.path.isfile(gitignore_path): with open(gitignore_path, "r") as f: return [ line.strip() for line in f if line.strip() and not line.startswith("#") ] return [] def add_line_numbers(content): lines = content.splitlines() padding = len(str(len(lines))) numbered_lines = [f"{i + 1:{padding}} {line}" for i, line in enumerate(lines)] return "\n".join(numbered_lines) def print_path(writer, path, content, cxml, markdown, line_numbers): if cxml: print_as_xml(writer, path, content, line_numbers) elif markdown: print_as_markdown(writer, path, content, line_numbers) else: print_default(writer, path, content, line_numbers) def print_default(writer, path, content, line_numbers): writer(path) writer("---") if line_numbers: content = add_line_numbers(content) writer(content) writer("") writer("---") def print_as_xml(writer, path, content, line_numbers): global global_index writer(f'') writer(f"{path}") writer("") if line_numbers: content = add_line_numbers(content) writer(content) writer("") writer("") global_index += 1 def print_as_markdown(writer, path, content, line_numbers): lang = EXT_TO_LANG.get(path.split(".")[-1], "") # Figure out how many backticks to use backticks = "```" while backticks in content: backticks += "`" writer(path) writer(f"{backticks}{lang}") if line_numbers: content = add_line_numbers(content) writer(content) writer(f"{backticks}") def process_path( path, extensions, include_hidden, ignore_files_only, ignore_gitignore, gitignore_rules, ignore_patterns, writer, claude_xml, markdown, line_numbers=False, ): if os.path.isfile(path): try: with open(path, "r") as f: print_path(writer, path, f.read(), claude_xml, markdown, line_numbers) except UnicodeDecodeError: warning_message = f"Warning: Skipping file {path} due to UnicodeDecodeError" click.echo(click.style(warning_message, fg="red"), err=True) elif os.path.isdir(path): for root, dirs, files in os.walk(path): if not include_hidden: dirs[:] = [d for d in dirs if not d.startswith(".")] files = [f for f in files if not f.startswith(".")] if not ignore_gitignore: gitignore_rules.extend(read_gitignore(root)) dirs[:] = [ d for d in dirs if not should_ignore(os.path.join(root, d), gitignore_rules) ] files = [ f for f in files if not should_ignore(os.path.join(root, f), gitignore_rules) ] if ignore_patterns: if not ignore_files_only: dirs[:] = [ d for d in dirs if not any(fnmatch(d, pattern) for pattern in ignore_patterns) ] files = [ f for f in files if not any(fnmatch(f, pattern) for pattern in ignore_patterns) ] if extensions: files = [f for f in files if f.endswith(extensions)] for file in sorted(files): file_path = os.path.join(root, file) try: with open(file_path, "r") as f: print_path( writer, file_path, f.read(), claude_xml, markdown, line_numbers, ) except UnicodeDecodeError: warning_message = ( f"Warning: Skipping file {file_path} due to UnicodeDecodeError" ) click.echo(click.style(warning_message, fg="red"), err=True) def read_paths_from_stdin(use_null_separator): if sys.stdin.isatty(): # No ready input from stdin, don't block for input return [] stdin_content = sys.stdin.read() if use_null_separator: paths = stdin_content.split("\0") else: paths = stdin_content.split() # split on whitespace return [p for p in paths if p] @click.command() @click.argument("paths", nargs=-1, type=click.Path(exists=True)) @click.option("extensions", "-e", "--extension", multiple=True) @click.option( "--include-hidden", is_flag=True, help="Include files and folders starting with .", ) @click.option( "--ignore-files-only", is_flag=True, help="--ignore option only ignores files", ) @click.option( "--ignore-gitignore", is_flag=True, help="Ignore .gitignore files and include all files", ) @click.option( "ignore_patterns", "--ignore", multiple=True, default=[], help="List of patterns to ignore", ) @click.option( "output_file", "-o", "--output", type=click.Path(writable=True), help="Output to a file instead of stdout", ) @click.option( "claude_xml", "-c", "--cxml", is_flag=True, help="Output in XML-ish format suitable for Claude's long context window.", ) @click.option( "markdown", "-m", "--markdown", is_flag=True, help="Output Markdown with fenced code blocks", ) @click.option( "line_numbers", "-n", "--line-numbers", is_flag=True, help="Add line numbers to the output", ) @click.option( "--null", "-0", is_flag=True, help="Use NUL character as separator when reading from stdin", ) @click.version_option() def cli( paths, extensions, include_hidden, ignore_files_only, ignore_gitignore, ignore_patterns, output_file, claude_xml, markdown, line_numbers, null, ): """ Takes one or more paths to files or directories and outputs every file, recursively, each one preceded with its filename like this: \b path/to/file.py ---- Contents of file.py goes here --- path/to/file2.py --- ... If the `--cxml` flag is provided, the output will be structured as follows: \b Contents of file1.txt Contents of file2.txt ... If the `--markdown` flag is provided, the output will be structured as follows: \b path/to/file1.py ```python Contents of file1.py ``` """ # Reset global_index for pytest global global_index global_index = 1 # Read paths from stdin if available stdin_paths = read_paths_from_stdin(use_null_separator=null) # Combine paths from arguments and stdin paths = [*paths, *stdin_paths] gitignore_rules = [] writer = click.echo fp = None if output_file: fp = open(output_file, "w", encoding="utf-8") writer = lambda s: print(s, file=fp) for path in paths: if not os.path.exists(path): raise click.BadArgumentUsage(f"Path does not exist: {path}") if not ignore_gitignore: gitignore_rules.extend(read_gitignore(os.path.dirname(path))) if claude_xml and path == paths[0]: writer("") process_path( path, extensions, include_hidden, ignore_files_only, ignore_gitignore, gitignore_rules, ignore_patterns, writer, claude_xml, markdown, line_numbers, ) if claude_xml: writer("") if fp: fp.close() files-to-prompt-0.6/pyproject.toml000066400000000000000000000013631475527073200173530ustar00rootroot00000000000000[project] name = "files-to-prompt" version = "0.6" description = "Concatenate a directory full of files into a single prompt for use with LLMs" readme = "README.md" authors = [{name = "Simon Willison"}] license = {text = "Apache-2.0"} requires-python = ">=3.8" classifiers = [ "License :: OSI Approved :: Apache Software License" ] dependencies = [ "click" ] [project.urls] Homepage = "https://github.com/simonw/files-to-prompt" Changelog = "https://github.com/simonw/files-to-prompt/releases" Issues = "https://github.com/simonw/files-to-prompt/issues" CI = "https://github.com/simonw/files-to-prompt/actions" [project.entry-points.console_scripts] files-to-prompt = "files_to_prompt.cli:cli" [project.optional-dependencies] test = ["pytest"] files-to-prompt-0.6/tests/000077500000000000000000000000001475527073200155765ustar00rootroot00000000000000files-to-prompt-0.6/tests/test_files_to_prompt.py000066400000000000000000000403031475527073200224140ustar00rootroot00000000000000import os import pytest import re from click.testing import CliRunner from files_to_prompt.cli import cli def filenames_from_cxml(cxml_string): "Return set of filenames from ... tags" return set(re.findall(r"(.*?)", cxml_string)) def test_basic_functionality(tmpdir): runner = CliRunner() with tmpdir.as_cwd(): os.makedirs("test_dir") with open("test_dir/file1.txt", "w") as f: f.write("Contents of file1") with open("test_dir/file2.txt", "w") as f: f.write("Contents of file2") result = runner.invoke(cli, ["test_dir"]) assert result.exit_code == 0 assert "test_dir/file1.txt" in result.output assert "Contents of file1" in result.output assert "test_dir/file2.txt" in result.output assert "Contents of file2" in result.output def test_include_hidden(tmpdir): runner = CliRunner() with tmpdir.as_cwd(): os.makedirs("test_dir") with open("test_dir/.hidden.txt", "w") as f: f.write("Contents of hidden file") result = runner.invoke(cli, ["test_dir"]) assert result.exit_code == 0 assert "test_dir/.hidden.txt" not in result.output result = runner.invoke(cli, ["test_dir", "--include-hidden"]) assert result.exit_code == 0 assert "test_dir/.hidden.txt" in result.output assert "Contents of hidden file" in result.output def test_ignore_gitignore(tmpdir): runner = CliRunner() with tmpdir.as_cwd(): os.makedirs("test_dir") os.makedirs("test_dir/nested_include") os.makedirs("test_dir/nested_ignore") with open("test_dir/.gitignore", "w") as f: f.write("ignored.txt") with open("test_dir/ignored.txt", "w") as f: f.write("This file should be ignored") with open("test_dir/included.txt", "w") as f: f.write("This file should be included") with open("test_dir/nested_include/included2.txt", "w") as f: f.write("This nested file should be included") with open("test_dir/nested_ignore/.gitignore", "w") as f: f.write("nested_ignore.txt") with open("test_dir/nested_ignore/nested_ignore.txt", "w") as f: f.write("This nested file should not be included") with open("test_dir/nested_ignore/actually_include.txt", "w") as f: f.write("This nested file should actually be included") result = runner.invoke(cli, ["test_dir", "-c"]) assert result.exit_code == 0 filenames = filenames_from_cxml(result.output) assert filenames == { "test_dir/included.txt", "test_dir/nested_include/included2.txt", "test_dir/nested_ignore/actually_include.txt", } result2 = runner.invoke(cli, ["test_dir", "-c", "--ignore-gitignore"]) assert result2.exit_code == 0 filenames2 = filenames_from_cxml(result2.output) assert filenames2 == { "test_dir/included.txt", "test_dir/ignored.txt", "test_dir/nested_include/included2.txt", "test_dir/nested_ignore/nested_ignore.txt", "test_dir/nested_ignore/actually_include.txt", } def test_multiple_paths(tmpdir): runner = CliRunner() with tmpdir.as_cwd(): os.makedirs("test_dir1") with open("test_dir1/file1.txt", "w") as f: f.write("Contents of file1") os.makedirs("test_dir2") with open("test_dir2/file2.txt", "w") as f: f.write("Contents of file2") with open("single_file.txt", "w") as f: f.write("Contents of single file") result = runner.invoke(cli, ["test_dir1", "test_dir2", "single_file.txt"]) assert result.exit_code == 0 assert "test_dir1/file1.txt" in result.output assert "Contents of file1" in result.output assert "test_dir2/file2.txt" in result.output assert "Contents of file2" in result.output assert "single_file.txt" in result.output assert "Contents of single file" in result.output def test_ignore_patterns(tmpdir): runner = CliRunner() with tmpdir.as_cwd(): os.makedirs("test_dir", exist_ok=True) with open("test_dir/file_to_ignore.txt", "w") as f: f.write("This file should be ignored due to ignore patterns") with open("test_dir/file_to_include.txt", "w") as f: f.write("This file should be included") result = runner.invoke(cli, ["test_dir", "--ignore", "*.txt"]) assert result.exit_code == 0 assert "test_dir/file_to_ignore.txt" not in result.output assert "This file should be ignored due to ignore patterns" not in result.output assert "test_dir/file_to_include.txt" not in result.output os.makedirs("test_dir/test_subdir", exist_ok=True) with open("test_dir/test_subdir/any_file.txt", "w") as f: f.write("This entire subdirectory should be ignored due to ignore patterns") result = runner.invoke(cli, ["test_dir", "--ignore", "*subdir*"]) assert result.exit_code == 0 assert "test_dir/test_subdir/any_file.txt" not in result.output assert ( "This entire subdirectory should be ignored due to ignore patterns" not in result.output ) assert "test_dir/file_to_include.txt" in result.output assert "This file should be included" in result.output assert "This file should be included" in result.output result = runner.invoke( cli, ["test_dir", "--ignore", "*subdir*", "--ignore-files-only"] ) assert result.exit_code == 0 assert "test_dir/test_subdir/any_file.txt" in result.output result = runner.invoke(cli, ["test_dir", "--ignore", ""]) def test_specific_extensions(tmpdir): runner = CliRunner() with tmpdir.as_cwd(): # Write one.txt one.py two/two.txt two/two.py three.md os.makedirs("test_dir/two") with open("test_dir/one.txt", "w") as f: f.write("This is one.txt") with open("test_dir/one.py", "w") as f: f.write("This is one.py") with open("test_dir/two/two.txt", "w") as f: f.write("This is two/two.txt") with open("test_dir/two/two.py", "w") as f: f.write("This is two/two.py") with open("test_dir/three.md", "w") as f: f.write("This is three.md") # Try with -e py -e md result = runner.invoke(cli, ["test_dir", "-e", "py", "-e", "md"]) assert result.exit_code == 0 assert ".txt" not in result.output assert "test_dir/one.py" in result.output assert "test_dir/two/two.py" in result.output assert "test_dir/three.md" in result.output def test_mixed_paths_with_options(tmpdir): runner = CliRunner() with tmpdir.as_cwd(): os.makedirs("test_dir") with open("test_dir/.gitignore", "w") as f: f.write("ignored_in_gitignore.txt\n.hidden_ignored_in_gitignore.txt") with open("test_dir/ignored_in_gitignore.txt", "w") as f: f.write("This file should be ignored by .gitignore") with open("test_dir/.hidden_ignored_in_gitignore.txt", "w") as f: f.write("This hidden file should be ignored by .gitignore") with open("test_dir/included.txt", "w") as f: f.write("This file should be included") with open("test_dir/.hidden_included.txt", "w") as f: f.write("This hidden file should be included") with open("single_file.txt", "w") as f: f.write("Contents of single file") result = runner.invoke(cli, ["test_dir", "single_file.txt"]) assert result.exit_code == 0 assert "test_dir/ignored_in_gitignore.txt" not in result.output assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output assert "test_dir/included.txt" in result.output assert "test_dir/.hidden_included.txt" not in result.output assert "single_file.txt" in result.output assert "Contents of single file" in result.output result = runner.invoke(cli, ["test_dir", "single_file.txt", "--include-hidden"]) assert result.exit_code == 0 assert "test_dir/ignored_in_gitignore.txt" not in result.output assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output assert "test_dir/included.txt" in result.output assert "test_dir/.hidden_included.txt" in result.output assert "single_file.txt" in result.output assert "Contents of single file" in result.output result = runner.invoke( cli, ["test_dir", "single_file.txt", "--ignore-gitignore"] ) assert result.exit_code == 0 assert "test_dir/ignored_in_gitignore.txt" in result.output assert "test_dir/.hidden_ignored_in_gitignore.txt" not in result.output assert "test_dir/included.txt" in result.output assert "test_dir/.hidden_included.txt" not in result.output assert "single_file.txt" in result.output assert "Contents of single file" in result.output result = runner.invoke( cli, ["test_dir", "single_file.txt", "--ignore-gitignore", "--include-hidden"], ) assert result.exit_code == 0 assert "test_dir/ignored_in_gitignore.txt" in result.output assert "test_dir/.hidden_ignored_in_gitignore.txt" in result.output assert "test_dir/included.txt" in result.output assert "test_dir/.hidden_included.txt" in result.output assert "single_file.txt" in result.output assert "Contents of single file" in result.output def test_binary_file_warning(tmpdir): runner = CliRunner(mix_stderr=False) with tmpdir.as_cwd(): os.makedirs("test_dir") with open("test_dir/binary_file.bin", "wb") as f: f.write(b"\xff") with open("test_dir/text_file.txt", "w") as f: f.write("This is a text file") result = runner.invoke(cli, ["test_dir"]) assert result.exit_code == 0 stdout = result.stdout stderr = result.stderr assert "test_dir/text_file.txt" in stdout assert "This is a text file" in stdout assert "\ntest_dir/binary_file.bin" not in stdout assert ( "Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError" in stderr ) @pytest.mark.parametrize( "args", (["test_dir"], ["test_dir/file1.txt", "test_dir/file2.txt"]) ) def test_xml_format_dir(tmpdir, args): runner = CliRunner() with tmpdir.as_cwd(): os.makedirs("test_dir") with open("test_dir/file1.txt", "w") as f: f.write("Contents of file1.txt") with open("test_dir/file2.txt", "w") as f: f.write("Contents of file2.txt") result = runner.invoke(cli, args + ["--cxml"]) assert result.exit_code == 0 actual = result.output expected = """ test_dir/file1.txt Contents of file1.txt test_dir/file2.txt Contents of file2.txt """ assert expected.strip() == actual.strip() @pytest.mark.parametrize("arg", ("-o", "--output")) def test_output_option(tmpdir, arg): runner = CliRunner() with tmpdir.as_cwd(): os.makedirs("test_dir") with open("test_dir/file1.txt", "w") as f: f.write("Contents of file1.txt") with open("test_dir/file2.txt", "w") as f: f.write("Contents of file2.txt") output_file = "output.txt" result = runner.invoke( cli, ["test_dir", arg, output_file], catch_exceptions=False ) assert result.exit_code == 0 assert not result.output with open(output_file, "r") as f: actual = f.read() expected = """ test_dir/file1.txt --- Contents of file1.txt --- test_dir/file2.txt --- Contents of file2.txt --- """ assert expected.strip() == actual.strip() def test_line_numbers(tmpdir): runner = CliRunner() with tmpdir.as_cwd(): os.makedirs("test_dir") test_content = "First line\nSecond line\nThird line\nFourth line\n" with open("test_dir/multiline.txt", "w") as f: f.write(test_content) result = runner.invoke(cli, ["test_dir"]) assert result.exit_code == 0 assert "1 First line" not in result.output assert test_content in result.output result = runner.invoke(cli, ["test_dir", "-n"]) assert result.exit_code == 0 assert "1 First line" in result.output assert "2 Second line" in result.output assert "3 Third line" in result.output assert "4 Fourth line" in result.output result = runner.invoke(cli, ["test_dir", "--line-numbers"]) assert result.exit_code == 0 assert "1 First line" in result.output assert "2 Second line" in result.output assert "3 Third line" in result.output assert "4 Fourth line" in result.output @pytest.mark.parametrize( "input,extra_args", ( ("test_dir1/file1.txt\ntest_dir2/file2.txt", []), ("test_dir1/file1.txt\ntest_dir2/file2.txt", []), ("test_dir1/file1.txt\0test_dir2/file2.txt", ["--null"]), ("test_dir1/file1.txt\0test_dir2/file2.txt", ["-0"]), ), ) def test_reading_paths_from_stdin(tmpdir, input, extra_args): runner = CliRunner() with tmpdir.as_cwd(): # Create test files os.makedirs("test_dir1") os.makedirs("test_dir2") with open("test_dir1/file1.txt", "w") as f: f.write("Contents of file1") with open("test_dir2/file2.txt", "w") as f: f.write("Contents of file2") # Test space-separated paths from stdin result = runner.invoke(cli, args=extra_args, input=input) assert result.exit_code == 0 assert "test_dir1/file1.txt" in result.output assert "Contents of file1" in result.output assert "test_dir2/file2.txt" in result.output assert "Contents of file2" in result.output def test_paths_from_arguments_and_stdin(tmpdir): runner = CliRunner() with tmpdir.as_cwd(): # Create test files os.makedirs("test_dir1") os.makedirs("test_dir2") with open("test_dir1/file1.txt", "w") as f: f.write("Contents of file1") with open("test_dir2/file2.txt", "w") as f: f.write("Contents of file2") # Test paths from arguments and stdin result = runner.invoke( cli, args=["test_dir1"], input="test_dir2/file2.txt", ) assert result.exit_code == 0 assert "test_dir1/file1.txt" in result.output assert "Contents of file1" in result.output assert "test_dir2/file2.txt" in result.output assert "Contents of file2" in result.output @pytest.mark.parametrize("option", ("-m", "--markdown")) def test_markdown(tmpdir, option): runner = CliRunner() with tmpdir.as_cwd(): os.makedirs("test_dir") with open("test_dir/python.py", "w") as f: f.write("This is python") with open("test_dir/python_with_quad_backticks.py", "w") as f: f.write("This is python with ```` in it already") with open("test_dir/code.js", "w") as f: f.write("This is javascript") with open("test_dir/code.unknown", "w") as f: f.write("This is an unknown file type") result = runner.invoke(cli, ["test_dir", option]) assert result.exit_code == 0 actual = result.output expected = ( "test_dir/code.js\n" "```javascript\n" "This is javascript\n" "```\n" "test_dir/code.unknown\n" "```\n" "This is an unknown file type\n" "```\n" "test_dir/python.py\n" "```python\n" "This is python\n" "```\n" "test_dir/python_with_quad_backticks.py\n" "`````python\n" "This is python with ```` in it already\n" "`````\n" ) assert expected.strip() == actual.strip()