Skip to content

Commit

Permalink
Merge pull request #85 from HazyResearch/lf_helpers
Browse files Browse the repository at this point in the history
Split up lf_helpers by modality in docs
  • Loading branch information
lukehsiao authored Jul 24, 2018
2 parents b5ba611 + 5fbbf27 commit fc3b7f1
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 70 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
Version 0.2.4 (coming soon...)
------------------------------

* `@lukehsiao`_: Organize documentation for lf_helpers by modality.
(`#85 <https://github.com/HazyResearch/fonduer/pull/85>`_)

Version 0.2.3
-------------

Expand Down
32 changes: 30 additions & 2 deletions docs/user/lf_helpers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,39 @@ Labeling Function Helpers

This page shows descriptions of the helper functions included with Fonduer_
which can be used to label candidates based on textual, structural, tabular,
and visual information.
and visual information. We group each lf_helper based on the modality of
information that they leverage.

----

.. automodule:: fonduer.supervision.lf_helpers
General Labeling Function Helpers
---------------------------------

.. automodule:: fonduer.supervision.lf_helpers.utils
:members:

Textual Labeling Function Helpers
---------------------------------

.. automodule:: fonduer.supervision.lf_helpers.textual
:members:

Structural Labeling Function Helpers
---------------------------------

.. automodule:: fonduer.supervision.lf_helpers.structural
:members:

Tabular Labeling Function Helpers
---------------------------------

.. automodule:: fonduer.supervision.lf_helpers.tabular
:members:

Visual Labeling Function Helpers
---------------------------------

.. automodule:: fonduer.supervision.lf_helpers.visual
:members:

.. _Fonduer: https://github.com/HazyResearch/fonduer
2 changes: 1 addition & 1 deletion fonduer/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.3"
__version__ = "0.2.4"
88 changes: 21 additions & 67 deletions fonduer/supervision/lf_helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import logging

from fonduer.supervision.lf_helpers.structural import (
common_ancestor,
get_ancestor_class_names,
Expand All @@ -13,90 +11,46 @@
lowest_common_ancestor_depth,
)
from fonduer.supervision.lf_helpers.tabular import (
same_document,
same_table,
same_row,
same_col,
is_tabular_aligned,
same_cell,
same_sentence,
get_aligned_ngrams,
get_cell_ngrams,
get_col_ngrams,
get_head_ngrams,
get_max_col_num,
get_min_col_num,
get_sentence_ngrams,
get_neighbor_sentence_ngrams,
get_cell_ngrams,
get_neighbor_cell_ngrams,
get_neighbor_sentence_ngrams,
get_row_ngrams,
get_col_ngrams,
get_aligned_ngrams,
get_head_ngrams,
get_sentence_ngrams,
is_tabular_aligned,
same_cell,
same_col,
same_document,
same_row,
same_sentence,
same_table,
)
from fonduer.supervision.lf_helpers.textual import (
get_between_ngrams,
get_left_ngrams,
get_right_ngrams,
)
from fonduer.supervision.lf_helpers.utils import get_matches, is_superset, overlap
from fonduer.supervision.lf_helpers.visual import (
get_aligned_lemmas,
get_horz_ngrams,
get_page,
get_page_horz_percentile,
get_page_vert_percentile,
get_vert_ngrams,
get_visual_aligned_lemmas,
is_horz_aligned,
is_vert_aligned,
is_vert_aligned_center,
is_vert_aligned_left,
is_vert_aligned_right,
is_vert_aligned_center,
same_page,
get_horz_ngrams,
get_vert_ngrams,
get_page_vert_percentile,
get_page_horz_percentile,
get_visual_aligned_lemmas,
get_aligned_lemmas,
)


def is_superset(a, b):
"""Check if a is a superset of b.
This is typically used to check if ALL of a list of sentences is in the ngrams returned by an lf_helper.
:param a: A collection of items
:param b: A collection of items
:rtype: boolean
"""
return set(a).issuperset(b)


def overlap(a, b):
"""Check if a overlaps b.
This is typically used to check if ANY of a list of sentences is in the ngrams returned by an lf_helper.
:param a: A collection of items
:param b: A collection of items
:rtype: boolean
"""
return not set(a).isdisjoint(b)


def get_matches(lf, candidate_set, match_values=[1, -1]):
"""Return a list of candidates that are matched by a particular LF.
A simple helper function to see how many matches (non-zero by default) an LF gets.
:param lf: The labeling function to apply to the candidate_set
:param candidate_set: The set of candidates to evaluate
:param match_values: An option list of the values to consider as matched. [1, -1] by default.
:rtype: a list of candidates
"""
logger = logging.getLogger(__name__)
matches = []
for c in candidate_set:
label = lf(c)
if label in match_values:
matches.append(c)
logger.info(("%s matches") % len(matches))
return matches


__all__ = [
"common_ancestor",
"get_aligned_lemmas",
Expand Down
49 changes: 49 additions & 0 deletions fonduer/supervision/lf_helpers/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import logging


def is_superset(a, b):
"""Check if a is a superset of b.
This is typically used to check if ALL of a list of sentences is in the
ngrams returned by an lf_helper.
:param a: A collection of items
:param b: A collection of items
:rtype: boolean
"""
return set(a).issuperset(b)


def overlap(a, b):
"""Check if a overlaps b.
This is typically used to check if ANY of a list of sentences is in the
ngrams returned by an lf_helper.
:param a: A collection of items
:param b: A collection of items
:rtype: boolean
"""
return not set(a).isdisjoint(b)


def get_matches(lf, candidate_set, match_values=[1, -1]):
"""Return a list of candidates that are matched by a particular LF.
A simple helper function to see how many matches (non-zero by default) an
LF gets.
:param lf: The labeling function to apply to the candidate_set
:param candidate_set: The set of candidates to evaluate
:param match_values: An option list of the values to consider as matched.
[1, -1] by default.
:rtype: a list of candidates
"""
logger = logging.getLogger(__name__)
matches = []
for c in candidate_set:
label = lf(c)
if label in match_values:
matches.append(c)
logger.info(("%s matches") % len(matches))
return matches

0 comments on commit fc3b7f1

Please sign in to comment.