Skip to content

Commit

Permalink
Add initial code
Browse files Browse the repository at this point in the history
  • Loading branch information
akx committed Oct 6, 2021
0 parents commit 619a235
Show file tree
Hide file tree
Showing 9 changed files with 427 additions and 0 deletions.
49 changes: 49 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: CI
'on':
push:
branches:
- master
pull_request:
branches:
- master
jobs:
Test:
runs-on: '${{ matrix.os }}'
strategy:
matrix:
os:
- ubuntu-20.04
python-version:
- '3.6'
- '3.7'
- '3.8'
- '3.9'
- '3.10'
steps:
- name: 'Set up Python ${{ matrix.python-version }}'
uses: actions/setup-python@v2
with:
python-version: '${{ matrix.python-version }}'
- uses: actions/checkout@v2
- run: pip install -U pip setuptools wheel
- run: pip install pytest pytest-cov
- run: py.test -vvv --cov .
# - uses: codecov/codecov-action@v2
Lint:
runs-on: '${{ matrix.os }}'
strategy:
matrix:
os:
- ubuntu-20.04
python-version:
- '3.9'
steps:
- name: 'Set up Python ${{ matrix.python-version }}'
uses: actions/setup-python@v2
with:
python-version: '${{ matrix.python-version }}'
- uses: actions/checkout@v2
- run: pip install -U pip setuptools wheel
- run: pip install flake8 mypy pytest
- run: flake8 .
- run: mypy --strict --install-types --non-interactive .
17 changes: 17 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
*.egg
*.egg-info/
*.log
*.manifest
*.mo
*.pot
*.py[cod]
.*cache
.coverage
.idea/
.tox
__pycache__/
build/
dist/
htmlcov/
sdist/
venv*
22 changes: 22 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

The MIT License (MIT)

Copyright (c) 2021 Valohai

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Gitignorant

Gitignorant is (aspires to be)

* a [spec]-compliant .gitignore file parser and matcher
* for Python 3.6 and newer
* with full type hinting and test coverage
* and nothing you don't need

## Features

* Parses .gitignore (and .gitignore style) files
* Matches against list of parsed rules with the same
semantics as Git ("last rule wins")

## Unfeatures

* Trees of .gitignore files are not directly supported,
but can be supported by client code.

[spec]: https://git-scm.com/docs/gitignore
Empty file added flake8.ini
Empty file.
145 changes: 145 additions & 0 deletions gitignorant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import re
from functools import lru_cache
from typing import Iterable, TextIO, Optional, Union, List

__all__ = [
"Rule",
"check_match",
"parse_gitignore_file",
]

# TODO: this may not correctly support \] within ]
specials_re = re.compile(r"(\*+|\[|]|\?)")


@lru_cache(maxsize=512)
def compile_pattern(pat: str, is_dir: bool) -> "re.Pattern": # type: ignore[type-arg]
anchor = False
if pat.startswith("/"):
anchor = True
pat = pat[1:]
elif is_dir and "/" in pat:
anchor = True

re_bits = []
if anchor:
re_bits.append("^")

bits = specials_re.split(pat)
while bits:
bit = bits.pop(0)
if not bit:
continue

if bit == "?":
re_bits.append(r"[^/]")
continue

if bit.startswith("*"):
if len(bit) > 1:
re_bits.append(".*")
else:
re_bits.append(r"[^/]*")
continue

if bit.startswith("["):
alternation_contents = []
while True:
try:
bit = bits.pop(0)
except IndexError:
# Instead of failing to parse,
# we just assume an unterminated [] seq is to the end of string
break
if bit == "]":
break
# Unescape everything but the dash – this may not be 100% correct.
esc_bit = re.escape(bit).replace("\\-", "-")
alternation_contents.append(esc_bit)
re_bits.append("[%s]" % "".join(alternation_contents))
continue

if re_bits and re_bits[-1] == ".*":
# If the last bit was a double star, we'll need to fix up any
# leading slashes from this bit (since the double star would consume them).
bit = bit.lstrip("/")

re_bits.append(re.escape(bit))

re_bits.append("$")
re_content = "".join(re_bits)
return re.compile(re_content)


class Rule:
def __init__(self, *, negative: bool, content: str) -> None:
self.negative = bool(negative)
self.content = str(content)

def __repr__(self) -> str:
return f'<Rule {self.content!r}{ "(negative)" if self.negative else ""}>'

def matches(self, path: str, is_dir: bool = False) -> bool:
pat = self.content
if pat.endswith("/"):
if not is_dir:
# * If there is a separator at the end of the pattern then the pattern
# will only match directories, otherwise the pattern can match both
# files and directories.
return False
pat = pat.rstrip("/")
re_pat = compile_pattern(pat, is_dir=is_dir)
# `search` is correct since the regex is possibly anchored from the start
res = re_pat.search(path)
# This commented-out print() is useful for debugging.
# print(self.content, "->", re_pat, "?", path, is_dir, ":", res)
return bool(res)


def try_parse_rule(line: str) -> Optional["Rule"]:
line = line.rstrip() # Remove all trailing spaces
if line.endswith("\\"):
# "Trailing spaces are ignored unless they are quoted with backslash ("\")."

# That is, now that we only have a slash left at the end of the path,
# it must have been escaping a space.
line = line[:-1] + " "
if not line:
# "A blank line matches no files, so it can serve as a separator
# for readability."
return None
if line.startswith("#"):
# "A line starting with # serves as a comment."
return None
negative = False
if line.startswith("!"):
# "An optional prefix "!" which negates the pattern; any matching file
# excluded by a previous pattern will become included again. It is not
# possible to re-include a file if a parent directory of that file is
# excluded. Git doesn’t list excluded directories for performance
# reasons, so any patterns on contained files have no effect, no matter
# where they are defined."
negative = True
line = line[1:]
elif line.startswith("\\!"):
# "Put a backslash ("\") in front of the
# first "!" for patterns that begin with a literal "!", for
# example, "\!important!.txt"."
line = line[1:]
return Rule(negative=negative, content=line)


def check_match(rules: List[Rule], path: str, is_dir: bool = False) -> bool:
# Algorithm: Find the last matching rule in the list and
# figure out whether it was not negative.
for rule in reversed(rules):
if rule.matches(path, is_dir):
return not rule.negative
return False


def parse_gitignore_file(f: Union[TextIO, Iterable[str]]) -> Iterable[Rule]:
for line in f:
rule = try_parse_rule(line)
if rule is not None:
yield rule
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[build-system]
requires = [
"setuptools>=42",
"wheel"
]
build-backend = "setuptools.build_meta"
23 changes: 23 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[flake8]
max-line-length = 88
extend-ignore = E203

[metadata]
name = gitignorant
version = 0.1.0
author = Aarni Koskela
author_email = [email protected]
description = A parser for gitignore files
long_description = file: README.md
long_description_content_type = text/markdown
url = https://github.com/valohai/gitignorant
project_urls =
Bug Tracker = https://github.com/valohai/gitignorant/issues
classifiers =
Programming Language :: Python :: 3
License :: OSI Approved :: MIT License
Operating System :: OS Independent

[options]
py_modules = gitignorant
python_requires = >=3.6
Loading

0 comments on commit 619a235

Please sign in to comment.