diff --git a/doc/api.rst b/doc/api.rst
index 99a0f1292..a8e0fbb83 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -34,7 +34,6 @@ The Project
Project.groupbydoc
Project.import_from
Project.id
- Project.index
Project.isfile
Project.min_len_unique_id
Project.num_jobs
diff --git a/doc/cli.rst b/doc/cli.rst
index c8bed84bb..37d175c0f 100644
--- a/doc/cli.rst
+++ b/doc/cli.rst
@@ -34,7 +34,6 @@ Sharing and archiving a project:
* `move`_
* `clone`_
* `sync`_
- * `index`_
.. _signac-cli-clone:
@@ -92,14 +91,6 @@ import
.. program-output:: signac import --help
-.. _signac-cli-index:
-
-index
-=====
-
-.. program-output:: signac index --help
-
-
.. _signac-cli-init:
init
diff --git a/signac/__main__.py b/signac/__main__.py
index d9c819f1c..73b600cb7 100644
--- a/signac/__main__.py
+++ b/signac/__main__.py
@@ -174,7 +174,7 @@ def main_project(args):
"""Handle project subcommand."""
project = get_project()
if args.index:
- for doc in project.index():
+ for doc in project._index():
print(json.dumps(doc))
return
if args.workspace:
diff --git a/signac/contrib/indexing.py b/signac/contrib/indexing.py
deleted file mode 100644
index ee93aa9b7..000000000
--- a/signac/contrib/indexing.py
+++ /dev/null
@@ -1,451 +0,0 @@
-# Copyright (c) 2017 The Regents of the University of Michigan
-# All rights reserved.
-# This software is licensed under the BSD 3-Clause License.
-"""Indexing features."""
-
-import errno
-import hashlib
-import json
-import logging
-import math
-import os
-import re
-import warnings
-
-from ..common import errors
-from .hashing import calc_id
-from .utility import walkdepth
-
-logger = logging.getLogger(__name__)
-
-KEY_PROJECT = "project"
-KEY_FILENAME = "filename"
-KEY_PATH = "root"
-KEY_PAYLOAD = "format"
-
-
-def _compute_file_md5(file):
- """Calculate and return the md5 hash value for the file data."""
- m = hashlib.md5()
- for chunk in iter(lambda: file.read(4096), b""):
- m.update(chunk)
- return m.hexdigest()
-
-
-class _BaseCrawler:
- """Crawl through ``root`` and index all files.
-
- The crawler creates an index on data, which can be exported to a database
- for easier access.
- """
-
- tags = None
-
- def __init__(self, root):
- """Initialize a _BaseCrawler instance.
-
- Parameters
- ----------
- root : str
- The path to the root directory to crawl through.
-
- """
- self.root = os.path.expanduser(root)
- self.tags = set() if self.tags is None else set(self.tags)
-
- def docs_from_file(self, dirpath, fn):
- """Implement this method to generate documents from files.
-
- Parameters
- ----------
- dirpath : str
- The path of the file, relative to ``root``.
- fn : str
- The filename.
-
- Yields
- ------
- dict
- Index documents.
-
- """
- raise NotImplementedError()
-
- def fetch(self, doc, mode="r"):
- """Implement this generator method to associate data with a document."""
- raise errors.FetchError(f"Unable to fetch object for '{doc}'.")
-
- @classmethod
- def _calculate_hash(cls, doc, dirpath, fn):
- blob = json.dumps(doc, sort_keys=True)
- m = hashlib.md5()
- m.update(dirpath.encode("utf-8"))
- m.update(fn.encode("utf-8"))
- m.update(blob.encode("utf-8"))
- return m.hexdigest()
-
- def crawl(self, depth=0):
- """Crawl through the ``root`` directory.
-
- The crawler will inspect every file and directory up
- until the specified ``depth`` and call the
- :meth:`docs_from_file` method.
-
- Parameters
- ----------
- depth : int
- Maximum directory depth to crawl. A value of 0 specifies no limit.
-
- Yields
- ------
- dict
- Document.
-
- """
- logger.info(f"Crawling '{self.root}' (depth={depth})...")
- for dirpath, dirnames, filenames in walkdepth(self.root, depth):
- for fn in filenames:
- for doc in self.docs_from_file(dirpath, fn):
- logger.debug(f"doc from file: '{os.path.join(dirpath, fn)}'.")
- doc.setdefault(KEY_PAYLOAD, None)
- doc.setdefault("_id", self._calculate_hash(doc, dirpath, fn))
- yield doc
- logger.info(f"Crawl of '{self.root}' done.")
-
- def process(self, doc, dirpath, fn):
- """Implement this method for processing generated documents.
-
- The default implementation will return the unmodified ``doc``.
-
- Parameters
- ----------
- dirpath : str
- The path of the file, relative to `root`.
- fn : str
- The filename.
-
- Returns
- -------
- dict
- A document.
-
- """
- return doc
-
-
-class _RegexFileCrawler(_BaseCrawler):
- r"""Generate documents from filenames and associate each file with a data type.
-
- The :py:class:`_RegexFileCrawler` uses regular expressions to generate
- data from files. This is a particular easy method to retrieve metadata
- associated with files. Inherit from this class to configure a crawler
- for your data structure.
-
- Let's assume we want to index text files, with a naming pattern, that
- specifies a parameter ``a`` through the filename, e.g.:
-
- .. code-block:: python
-
- ~/my_project/a_0.txt
- ~/my_project/a_1.txt
- ...
-
- A valid regular expression to match this pattern would
- be: ``.*\/a_(?P\d+)\.txt`` which may be defined for a crawler as such:
-
- .. code-block:: python
-
- MyCrawler(_RegexFileCrawler):
- pass
-
- MyCrawler.define('.*\/a_(?P\d+)\.txt', 'TextFile')
-
- """
-
- "Mapping of compiled regex objects and associated formats."
- definitions = {} # type: ignore
-
- @classmethod
- def define(cls, regex, format_=None):
- """Define a format for a particular regular expression.
-
- Parameters
- ----------
- regex : str
- A regular expression used to match files of the specified format.
- format_ : object
- The format associated with all matching files.
-
- """
- if isinstance(regex, str):
- regex = re.compile(regex)
- definitions = dict(cls.definitions)
- definitions[regex] = format_
- cls.definitions = definitions
-
- @classmethod
- def compute_file_id(cls, doc, file):
- """Compute the file id for a given doc and the associated file.
-
- The resulting id is assigned to ``doc["md5"]``.
-
- Parameters
- ----------
- doc : dict
- The index document.
- file : file-like object
- The associated file
-
- Returns
- -------
- str
- The file id.
-
- """
- file_id = doc["md5"] = _compute_file_md5(file)
- return file_id
-
- def docs_from_file(self, dirpath, fn):
- """Generate documents from filenames.
-
- This method implements the abstract
- :py:meth:`~._BaseCrawler.docs_from_file` and yields index
- documents associated with files.
-
- Notes
- -----
- It is not recommended to reimplement this method to modify
- documents generated from filenames.
- See :py:meth:`~_RegexFileCrawler.process` instead.
-
- Parameters
- ----------
- dirpath : str
- The path of the file relative to root.
- fn : str
- The filename of the file.
-
- Yields
- ------
- dict
- Index document.
-
- """
- for regex, format_ in self.definitions.items():
- m = regex.match(os.path.join(dirpath, fn))
- if m:
- doc = self.process(m.groupdict(), dirpath, fn)
- doc[KEY_FILENAME] = os.path.relpath(
- os.path.join(dirpath, fn), self.root
- )
- doc[KEY_PATH] = os.path.abspath(self.root)
- doc[KEY_PAYLOAD] = str(format_)
- with open(os.path.join(dirpath, fn), "rb") as file:
- doc["file_id"] = self.compute_file_id(doc, file)
- yield doc
-
- def fetch(self, doc, mode="r"):
- """Fetch the data associated with ``doc``.
-
- Parameters
- ----------
- doc : dict
- An index document.
- mode : str
- Mode used to open file object.
-
- Returns
- -------
- file-like object
- The file associated with the index document.
-
- """
- fn = doc.get(KEY_FILENAME)
- if fn:
- for regex, format_ in self.definitions.items():
- ffn = os.path.join(self.root, fn)
- m = regex.match(ffn)
- if m:
- if isinstance(format_, str):
- return open(ffn, mode=mode)
- else:
- for meth in ("read", "close"):
- if not callable(getattr(format_, meth, None)):
- msg = f"Format {format_} has no {meth}() method."
- warnings.warn(msg)
- return format_(open(ffn, mode=mode))
- else:
- raise errors.FetchError(
- f"Unable to match file path of doc '{doc}' to format definition."
- )
- else:
- raise errors.FetchError(f"Insufficient metadata in doc '{doc}'.")
-
- def process(self, doc, dirpath, fn):
- """Post-process documents generated from filenames.
-
- Examples
- --------
- .. code-block:: python
-
- MyCrawler(signac.indexing._RegexFileCrawler):
- def process(self, doc, dirpath, fn):
- doc['long_name_for_a'] = doc['a']
- return super(MyCrawler, self).process(doc, dirpath, fn)
-
- Parameters
- ----------
- dirpath : str
- The path of the file, relative to ``root``.
- fn : str
- The filename.
-
- Returns
- -------
- dict
- An index document.
-
- """
- result = {}
- for key, value in doc.items():
- if value is None or isinstance(value, bool):
- result[key] = value
- continue
- try:
- value = float(value)
- except Exception:
- result[key] = value
- else:
- if not math.isnan(value) or math.isinf(value):
- if float(value) == int(value):
- result[key] = int(value)
- else:
- result[key] = float(value)
- return super().process(result, dirpath, fn)
-
- def crawl(self, depth=0):
- if self.definitions:
- yield from super().crawl(depth=depth)
- else:
- return
-
-
-def _index_signac_project_workspace(
- root,
- include_job_document=True,
- fn_statepoint="signac_statepoint.json",
- fn_job_document="signac_job_document.json",
- statepoint_index="statepoint",
- signac_id_alias="_id",
- encoding="utf-8",
- statepoint_dict=None,
-):
- """Yield standard index documents for a signac project workspace."""
- logger.debug(f"Indexing workspace '{root}'...")
- m = re.compile(r"[a-f0-9]{32}")
- try:
- job_ids = [jid for jid in os.listdir(root) if m.match(jid)]
- except OSError as error:
- if error.errno == errno.ENOENT:
- return
- else:
- raise
- for i, job_id in enumerate(job_ids):
- if not m.match(job_id):
- continue
- doc = {"signac_id": job_id, KEY_PATH: root}
- if signac_id_alias:
- doc[signac_id_alias] = job_id
- fn_sp = os.path.join(root, job_id, fn_statepoint)
- with open(fn_sp, "rb") as file:
- sp = json.loads(file.read().decode(encoding))
- if statepoint_dict is not None:
- statepoint_dict[job_id] = sp
- if statepoint_index:
- doc[statepoint_index] = sp
- else:
- doc.update(sp)
- if include_job_document:
- fn_doc = os.path.join(root, job_id, fn_job_document)
- try:
- with open(fn_doc, "rb") as file:
- doc.update(json.loads(file.read().decode(encoding)))
- except OSError as error:
- if error.errno != errno.ENOENT:
- raise
- yield doc
- if job_ids:
- logger.debug(f"Indexed workspace '{root}', {i + 1} entries.")
-
-
-class _SignacProjectCrawler(_RegexFileCrawler):
- """Index a signac project workspace.
-
- Without any file format definitions, this crawler yields index documents
- for each job, including the state point and the job document.
-
- See Also
- --------
- :py:class:`~._RegexFileCrawler`
-
- Parameters
- ----------
- root : str
- The path to the project's root directory.
-
- """
-
- encoding = "utf-8"
- statepoint_index = "statepoint"
- fn_statepoint = "signac_statepoint.json"
- fn_job_document = "signac_job_document.json"
- signac_id_alias = "_id"
-
- def __init__(self, root):
- from .project import get_project
-
- root = get_project(root=root).workspace()
- self._statepoints = {}
- return super().__init__(root=root)
-
- def _get_job_id(self, dirpath):
- return os.path.relpath(dirpath, self.root).split("/")[0]
-
- def _read_statepoint(self, job_id):
- fn_sp = os.path.join(self.root, job_id, self.fn_statepoint)
- with open(fn_sp, "rb") as file:
- return json.loads(file.read().decode(self.encoding))
-
- def _get_statepoint(self, job_id):
- sp = self._statepoints.setdefault(job_id, self._read_statepoint(job_id))
- assert calc_id(sp) == job_id
- return sp
-
- def get_statepoint(self, dirpath):
- job_id = self._get_job_id(dirpath)
- return job_id, self._get_statepoint(self, job_id)
-
- def process(self, doc, dirpath, fn):
- if dirpath is not None:
- job_id = self._get_job_id(dirpath)
- statepoint = self._get_statepoint(job_id)
- doc["signac_id"] = job_id
- if self.statepoint_index:
- doc[self.statepoint_index] = statepoint
- else:
- doc.update(statepoint)
- return super().process(doc, dirpath, fn)
-
- def crawl(self, depth=0):
- for doc in _index_signac_project_workspace(
- root=self.root,
- fn_statepoint=self.fn_statepoint,
- fn_job_document=self.fn_job_document,
- statepoint_index=self.statepoint_index,
- signac_id_alias=self.signac_id_alias,
- encoding=self.encoding,
- statepoint_dict=self._statepoints,
- ):
- yield self.process(doc, None, None)
- for doc in super().crawl(depth=depth):
- yield doc
diff --git a/signac/contrib/project.py b/signac/contrib/project.py
index dfab86c8d..a5cacbf78 100644
--- a/signac/contrib/project.py
+++ b/signac/contrib/project.py
@@ -38,7 +38,6 @@
)
from .filterparse import _add_prefix, _root_keys, parse_filter
from .hashing import calc_id
-from .indexing import _SignacProjectCrawler
from .job import Job
from .schema import ProjectSchema
from .utility import _mkdir_p, _nested_dicts_to_dotted_keys, _split_and_print_progress
@@ -816,9 +815,7 @@ def detect_schema(self, exclude_const=False, subset=None, index=None):
from .schema import _build_job_statepoint_index
if index is None:
- index = self.index(include_job_document=False)
- else:
- warnings.warn(INDEX_DEPRECATION_WARNING, DeprecationWarning)
+ index = self._index(include_job_document=False)
if subset is not None:
subset = {str(s) for s in subset}
index = [doc for doc in index if doc["_id"] in subset]
@@ -884,9 +881,9 @@ def _find_job_ids(self, filter=None, doc_filter=None, index=None):
if doc_filter:
warnings.warn(DOC_FILTER_WARNING, DeprecationWarning)
filter.update(parse_filter(_add_prefix("doc.", doc_filter)))
- index = self.index(include_job_document=True)
+ index = self._index(include_job_document=True)
elif "doc" in _root_keys(filter):
- index = self.index(include_job_document=True)
+ index = self._index(include_job_document=True)
else:
index = self._sp_index()
else:
@@ -1883,15 +1880,7 @@ def _read_cache(self):
logger.debug(f"Read cache in {delta:.3f} seconds.")
return cache
- @deprecated(
- deprecated_in="1.8",
- removed_in="2.0",
- current_version=__version__,
- details="Indexing is deprecated.",
- )
- def index(
- self, formats=None, depth=0, skip_errors=False, include_job_document=True
- ):
+ def _index(self, *, include_job_document=True):
r"""Generate an index of the project's workspace.
This generator function indexes every file in the project's
@@ -1901,11 +1890,6 @@ def index(
See :ref:`signac project -i ` for the command line equivalent.
- .. code-block:: python
-
- for doc in project.index({r'.*\.txt', 'TextFile'}):
- print(doc)
-
Parameters
----------
formats : str, dict
@@ -1928,42 +1912,28 @@ def index(
Index document.
"""
- if formats is None:
- root = self.workspace()
+ root = self.workspace()
- def _full_doc(doc):
- """Add `signac_id` and `root` to the index document.
+ def _full_doc(doc):
+ """Add `signac_id` and `root` to the index document.
- Parameters
- ----------
- doc : dict
- Index document.
+ Parameters
+ ----------
+ doc : dict
+ Index document.
- Returns
- -------
- dict
- Modified index document.
+ Returns
+ -------
+ dict
+ Modified index document.
- """
- doc["signac_id"] = doc["_id"]
- doc["root"] = root
- return doc
+ """
+ doc["signac_id"] = doc["_id"]
+ doc["root"] = root
+ return doc
- docs = self._build_index(include_job_document=include_job_document)
- docs = map(_full_doc, docs)
- else:
- if isinstance(formats, str):
- formats = {formats: "File"}
-
- class Crawler(_SignacProjectCrawler):
- pass
-
- for pattern, fmt in formats.items():
- Crawler.define(pattern, fmt)
- crawler = Crawler(self.root_directory())
- docs = crawler.crawl(depth=depth)
- if skip_errors:
- docs = _skip_errors(docs, logger.critical)
+ docs = self._build_index(include_job_document=include_job_document)
+ docs = map(_full_doc, docs)
for doc in docs:
yield doc
diff --git a/signac/contrib/utility.py b/signac/contrib/utility.py
index 5753e9681..216377a3a 100644
--- a/signac/contrib/utility.py
+++ b/signac/contrib/utility.py
@@ -88,47 +88,6 @@ def add_verbosity_argument(parser, default=0):
)
-def walkdepth(path, depth=0):
- """Transverse the directory starting from path.
-
- Parameters
- ----------
- path :str
- Directory passed to walk (transverse from).
- depth : int
- (Default value = 0)
-
- Yields
- ------
- str
- When depth==0.
- tuple
- When depth>0.
-
- Raises
- ------
- ValueError
- When the value of depth is negative.
- OSError
- When path is not name of a directory.
-
- """
- if depth == 0:
- yield from os.walk(path)
- elif depth > 0:
- path = path.rstrip(os.path.sep)
- if not os.path.isdir(path):
- raise OSError(f"Not a directory: '{path}'.")
- num_sep = path.count(os.path.sep)
- for root, dirs, files in os.walk(path):
- yield root, dirs, files
- num_sep_this = root.count(os.path.sep)
- if num_sep + depth <= num_sep_this:
- del dirs[:]
- else:
- raise ValueError("The value of depth must be non-negative.")
-
-
def _mkdir_p(path):
"""Make a new directory, or do nothing if the directory already exists.
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
deleted file mode 100644
index c8019c5f0..000000000
--- a/tests/test_indexing.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright (c) 2017 The Regents of the University of Michigan
-# All rights reserved.
-# This software is licensed under the BSD 3-Clause License.
-import json
-import os
-import re
-from tempfile import TemporaryDirectory
-from unittest.mock import Mock
-
-import pytest
-
-from signac import Collection
-from signac.contrib import indexing
-from signac.errors import FetchError
-
-
-class TestFormat:
- def read(self):
- assert 0
-
- def close(self):
- assert 0
-
-
-class TestIndexingBase:
- @pytest.fixture(autouse=True)
- def setUp(self, request):
- self._tmp_dir = TemporaryDirectory(prefix="signac_")
- request.addfinalizer(self._tmp_dir.cleanup)
-
- def setup_project(self):
- def fn(name):
- return os.path.join(self._tmp_dir.name, name)
-
- with open(fn("a_0.txt"), "w") as file:
- file.write('{"a": 0}')
- with open(fn("a_1.txt"), "w") as file:
- file.write('{"a": 1}')
- with open(fn("a_0.json"), "w") as file:
- json.dump(dict(a=0), file)
- with open(fn("a_1.json"), "w") as file:
- json.dump(dict(a=1), file)
-
- def get_index_collection(self):
- c = Collection()
- return Mock(spec=c, wraps=c)
-
- def test_base_crawler(self):
- crawler = indexing._BaseCrawler(root=self._tmp_dir.name)
- assert len(list(crawler.crawl())) == 0
- doc = dict(a=0)
- with pytest.raises(FetchError):
- assert crawler.fetch(doc) is None
- assert doc == crawler.process(doc, None, None)
- with pytest.raises(NotImplementedError):
- for doc in crawler.docs_from_file(None, None):
- pass
-
- def test_regex_file_crawler_pre_compiled(self):
- self.setup_project()
-
- class Crawler(indexing._RegexFileCrawler):
- pass
-
- regex = re.compile(r".*a_(?P\d)\.txt")
- Crawler.define(regex, TestFormat)
- crawler = Crawler(root=self._tmp_dir.name)
- no_find = True
- for doc in crawler.crawl():
- no_find = False
- ffn = os.path.join(doc["root"], doc["filename"])
- m = regex.match(ffn)
- assert m is not None
- assert os.path.isfile(ffn)
- with open(ffn) as file:
- doc2 = json.load(file)
- assert doc2["a"] == doc["a"]
- assert not no_find
-
- def test_regex_file_crawler(self):
- self.setup_project()
-
- class Crawler(indexing._RegexFileCrawler):
- pass
-
- # First test without pattern
- crawler = Crawler(root=self._tmp_dir.name)
- assert len(list(crawler.crawl())) == 0
-
- # Now with pattern(s)
- pattern = r".*a_(?P\d)\.txt"
- regex = re.compile(pattern)
- Crawler.define(pattern, TestFormat)
- Crawler.define("negativematch", "negativeformat")
- crawler = Crawler(root=self._tmp_dir.name)
- no_find = True
- for doc in crawler.crawl():
- no_find = False
- ffn = os.path.join(doc["root"], doc["filename"])
- m = regex.match(ffn)
- assert m is not None
- assert os.path.isfile(ffn)
- with open(ffn) as file:
- doc2 = json.load(file)
- assert doc2["a"] == doc["a"]
- assert not no_find
- with pytest.raises(FetchError):
- crawler.fetch(dict())
- with pytest.raises(FetchError):
- crawler.fetch({"filename": "shouldnotmatch"})
-
- def test_regex_file_crawler_inheritance(self):
- self.setup_project()
-
- class CrawlerA(indexing._RegexFileCrawler):
- pass
-
- class CrawlerB(indexing._RegexFileCrawler):
- pass
-
- CrawlerA.define("a", TestFormat)
- CrawlerB.define("b", TestFormat)
- assert len(CrawlerA.definitions) == 1
- assert len(CrawlerB.definitions) == 1
-
- class CrawlerC(CrawlerA):
- pass
-
- assert len(CrawlerA.definitions) == 1
- assert len(CrawlerC.definitions) == 1
- assert len(CrawlerB.definitions) == 1
- CrawlerC.define("c", TestFormat)
- assert len(CrawlerA.definitions) == 1
- assert len(CrawlerB.definitions) == 1
- assert len(CrawlerC.definitions) == 2
diff --git a/tests/test_project.py b/tests/test_project.py
index 80adf48e4..7c4fe36fd 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -630,77 +630,18 @@ def test_repair_corrupted_workspace(self):
logging.disable(logging.NOTSET)
def test_index(self):
- docs = list(self.project.index(include_job_document=True))
+ docs = list(self.project._index(include_job_document=True))
assert len(docs) == 0
- docs = list(self.project.index(include_job_document=False))
+ docs = list(self.project._index(include_job_document=False))
assert len(docs) == 0
statepoints = [{"a": i} for i in range(5)]
for sp in statepoints:
self.project.open_job(sp).document["test"] = True
job_ids = {job.id for job in self.project.find_jobs()}
- docs = list(self.project.index())
+ docs = list(self.project._index())
job_ids_cmp = {doc["_id"] for doc in docs}
assert job_ids == job_ids_cmp
assert len(docs) == len(statepoints)
- for sp in statepoints:
- with self.project.open_job(sp):
- with open("test.txt", "w"):
- pass
- docs = list(
- self.project.index(
- {".*" + re.escape(os.path.sep) + r"test\.txt": "TextFile"}
- )
- )
- assert len(docs) == 2 * len(statepoints)
- assert len({doc["_id"] for doc in docs}) == len(docs)
-
- # Index schema is changed
- @pytest.mark.xfail()
- def test_signac_project_crawler(self):
- statepoints = [{"a": i} for i in range(5)]
- for sp in statepoints:
- self.project.open_job(sp).document["test"] = True
- job_ids = {job.id for job in self.project.find_jobs()}
- index = {}
- for doc in self.project.index():
- index[doc["_id"]] = doc
- assert len(index) == len(job_ids)
- assert set(index.keys()) == set(job_ids)
- crawler = signac.contrib._SignacProjectCrawler(self.project.root_directory())
- index2 = {}
- for doc in crawler.crawl():
- index2[doc["_id"]] = doc
- for _id, _id2 in zip(index, index2):
- assert _id == _id2
- assert index[_id] == index2[_id]
- assert index == index2
- for job in self.project.find_jobs():
- with open(job.fn("test.txt"), "w") as file:
- file.write("test\n")
- formats = {r".*" + re.escape(os.path.sep) + r"test\.txt": "TextFile"}
- index = {}
- for doc in self.project.index(formats):
- index[doc["_id"]] = doc
- assert len(index) == 2 * len(job_ids)
-
- class Crawler(signac.contrib._SignacProjectCrawler):
- called = False
-
- def process(self_, doc, dirpath, fn):
- Crawler.called = True
- doc = super().process(doc=doc, dirpath=dirpath, fn=fn)
- if "format" in doc and doc["format"] is None:
- assert doc["_id"] == doc["signac_id"]
- return doc
-
- for p, fmt in formats.items():
- with pytest.deprecated_call():
- Crawler.define(p, fmt)
- index2 = {}
- for doc in Crawler(root=self.project.root_directory()).crawl():
- index2[doc["_id"]] = doc
- assert index == index2
- assert Crawler.called
def test_custom_project(self):
class CustomProject(signac.Project):