Skip to content

Commit

Permalink
Isomorphisms + Profiling (#7)
Browse files Browse the repository at this point in the history
* [tests] Remove unittest classes

* Add profiling signature

* Move comparisions to nb directory

* add isomorphisms filter

* Add random test cases

* Docstring tweaks in next-candidates

* [tests] only generate truthy node IDs

* Update python-package.yml

* Update python-package.yml

* [docs] Update readme with tests/coverage
  • Loading branch information
j6k4m8 authored Oct 15, 2020
1 parent 04936ff commit c7d57fe
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 61 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
pip install flake8 pytest pytest-cov
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
Expand All @@ -36,4 +36,6 @@ jobs:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest
pytest --cov=./ --cov-report=xml
- name: Codecov
uses: codecov/[email protected]
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

notebooks/*.ipynb
# Created by https://www.toptal.com/developers/gitignore/api/macos,python,windows,jupyternotebooks,visualstudiocode
# Edit at https://www.toptal.com/developers/gitignore?templates=macos,python,windows,jupyternotebooks,visualstudiocode

Expand Down
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
# Grand Isomorphisms
<p align="center"><h1>Grand Isomorphisms</h1></p>

<p align="center">
<a href="https://codecov.io/gh/aplbrain/grandiso-networkx/"><img alt="Codecov" src="https://img.shields.io/codecov/c/github/aplbrain/grandiso-networkx?style=for-the-badge"></a>
<a href="https://github.com/aplbrain/grandiso-networkx/actions"><img alt="GitHub Workflow Status" src="https://img.shields.io/github/workflow/status/aplbrain/grandiso-networkx/Python%20package?style=for-the-badge"></a>
<a href="https://bossdb.org/tools/DotMotif"><img src="https://img.shields.io/badge/Pretty Dope-👌-00ddcc.svg?style=for-the-badge" /></a>
<img alt="GitHub" src="https://img.shields.io/github/license/aplbrain/grandiso-networkx?style=for-the-badge">
</p>

Subgraph isomorphism is a resource-heavy (but branch-parallelizable) algorithm that is hugely impactful for large graph analysis. SotA algorithms for this (Ullmann, VF2, BB-Graph) are heavily RAM-bound, but this is due to a large number of small processes each of which hold a small portion of a traversal tree in memory.

Expand Down Expand Up @@ -65,3 +72,11 @@ For very large graphs, you may use a good chunk of RAM not only on the queue of
- Reporting
- Return the set R to the user.
```

## Hacking on this repo

### Running Tests

```shell
coverage run --source=grandiso -m pytest
```
96 changes: 81 additions & 15 deletions grandiso/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"""
from typing import List, Union

import itertools
import time
import queue

Expand Down Expand Up @@ -108,7 +109,7 @@ def get_next_backbone_candidates(
interestingness: dict,
next_node: str = None,
directed: bool = True,
enforce_inequality: bool = True,
isomorphisms_only: bool = False,
) -> List[dict]:
"""
Get a list of candidate node assignments for the next "step" of this map.
Expand All @@ -120,8 +121,8 @@ def get_next_backbone_candidates(
interestingness (dict): A mapping of motif node IDs to interestingness
next_node (str: None): Optional suggestion for the next node to assign
directed (bool: True): Whether host and motif are both directed
enforce_inequality (bool: True): If true, two nodes in backbone cannot
be assigned to the same host-graph
isomorphisms_only (bool: False): If true, only isomorphisms will be
returned (instead of all monomorphisms)
Returns:
List[dict]: A new list of mappings with one additional element mapped
Expand Down Expand Up @@ -272,7 +273,9 @@ def get_next_backbone_candidates(
# edges between them DO exist in the host graph. Otherwise, when we check
# in find_motifs that len(motif) == len(mapping), we will discover that the
# mapping is "complete" even though we haven't yet checked it at all.
results = []

monomorphism_candidates = []

for mapping in tentative_results:
if len(mapping) == len(motif):
if all(
Expand All @@ -282,12 +285,31 @@ def get_next_backbone_candidates(
]
):
# This is a "complete" match!
results.append(mapping)
monomorphism_candidates.append(mapping)
else:
# This is a partial match, so we'll continue building.
results.append(mapping)

return results
monomorphism_candidates.append(mapping)

if not isomorphisms_only:
return monomorphism_candidates

# Additionally, if isomorphisms_only == True, we can use this opportunity
# to confirm that no spurious edges exist in the induced subgraph:
isomorphism_candidates = []
for result in monomorphism_candidates:
for (motif_u, motif_v) in itertools.product(result.keys(), result.keys()):
# if the motif has this edge, then it doesn't rule any of the
# above results out as an isomorphism.
# if the motif does NOT have the edge, then NO RESULT may have
# the equivalent edge in the host graph:
if not motif.has_edge(motif_u, motif_v) and host.has_edge(
result[motif_u], result[motif_v]
):
# this is a violation.
break
else:
isomorphism_candidates.append(result)
return isomorphism_candidates


def uniform_node_interestingness(motif: nx.Graph) -> dict:
Expand All @@ -301,11 +323,33 @@ def uniform_node_interestingness(motif: nx.Graph) -> dict:
return {n: 1 for n in motif.nodes()}


class ProfilingQueue(queue.SimpleQueue):
def __init__(self):
super(ProfilingQueue, self).__init__()
self._size_history = queue.SimpleQueue()
self._size = 0

def put(self, *args, **kwargs):
res = super(ProfilingQueue, self).put(*args, **kwargs)
self._size += 1
self._size_history.put(self._size)
return res

def get(self, *args, **kwargs):
res = super(ProfilingQueue, self).get(*args, **kwargs)
self._size -= 1
self._size_history.put(self._size)
return res


def find_motifs(
motif: nx.DiGraph,
host: nx.DiGraph,
interestingness: dict = None,
count_only: bool = False,
directed: bool = None,
profile: bool = False,
isomorphisms_only: bool = False,
) -> List[dict]:
"""
Get a list of mappings from motif node IDs to host graph IDs.
Expand All @@ -323,6 +367,13 @@ def find_motifs(
number that indicates an ordinality in which to address each node
count_only (bool: False): If True, return only an integer count of the
number of motifs, rather than a list of mappings.
directed (bool: None): Whether direction should be considered during
search. If omitted, this will be based upon the motif directedness.
profile (bool: False): SLOWER! Whether to include additional metrics
in addition to results. Note that you should only ever use this to
debug or understand your results, not for use in production.
isomorphisms_only (bool: False): Whether to return isomorphisms (the
default is monomorphisms).
Returns:
List[dict]: A list of mappings from motif node IDs to host graph IDs
Expand All @@ -331,13 +382,18 @@ def find_motifs(
"""
interestingness = interestingness or uniform_node_interestingness(motif)

if isinstance(motif, nx.DiGraph):
# This will be a directed query.
directed = True
else:
directed = False
if directed is None:
# guess directedness from motif
if isinstance(motif, nx.DiGraph):
# This will be a directed query.
directed = True
else:
directed = False

q = queue.SimpleQueue()
if profile:
q = ProfilingQueue()
else:
q = queue.SimpleQueue()

results = []
results_count = 0
Expand All @@ -348,7 +404,12 @@ def find_motifs(
while not q.empty():
new_backbone = q.get()
next_candidate_backbones = get_next_backbone_candidates(
new_backbone, motif, host, interestingness, directed=directed
new_backbone,
motif,
host,
interestingness,
directed=directed,
isomorphisms_only=isomorphisms_only,
)

for candidate in next_candidate_backbones:
Expand All @@ -360,6 +421,11 @@ def find_motifs(
else:
q.put(candidate)

if profile:
if count_only:
return results_count, q
return results, q
if count_only:
return results_count
return results

Loading

0 comments on commit c7d57fe

Please sign in to comment.