diff --git a/convokit/classifier/classifier.py b/convokit/classifier/classifier.py index 7cddb993..d464f471 100644 --- a/convokit/classifier/classifier.py +++ b/convokit/classifier/classifier.py @@ -1,12 +1,11 @@ -from sklearn.model_selection import train_test_split, cross_val_score, KFold +from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix, classification_report +from sklearn.model_selection import train_test_split, cross_val_score, KFold from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler -from sklearn.linear_model import LogisticRegression +from convokit import Transformer from convokit.classifier.util import * -from convokit import Transformer, CorpusComponent -from convokit.util import deprecation class Classifier(Transformer): @@ -34,9 +33,7 @@ def __init__( labeller: Callable[[CorpusComponent], bool] = lambda x: True, clf=None, clf_attribute_name: str = "prediction", - clf_feat_name=None, clf_prob_attribute_name: str = "pred_score", - clf_prob_feat_name=None, ): self.pred_feats = pred_feats self.labeller = labeller @@ -50,16 +47,8 @@ def __init__( ) print("Initialized default classification model (standard scaled logistic regression).") self.clf = clf - self.clf_attribute_name = clf_attribute_name if clf_feat_name is None else clf_feat_name - self.clf_prob_attribute_name = ( - clf_prob_attribute_name if clf_prob_feat_name is None else clf_prob_feat_name - ) - - if clf_feat_name is not None: - deprecation("Classifier's clf_feat_name parameter", "clf_attribute_name") - - if clf_prob_feat_name is not None: - deprecation("Classifier's clf_prob_feat_name parameter", "clf_prob_attribute_name") + self.clf_attribute_name = clf_attribute_name + self.clf_prob_attribute_name = clf_prob_attribute_name def fit( self, corpus: Corpus, y=None, selector: Callable[[CorpusComponent], bool] = lambda x: True diff --git a/convokit/coordination/coordination.py b/convokit/coordination/coordination.py index f5c45104..1fb712b1 100644 --- a/convokit/coordination/coordination.py +++ b/convokit/coordination/coordination.py @@ -1,11 +1,12 @@ -import pkg_resources -from convokit.model import Corpus, Speaker, Utterance from collections import defaultdict from typing import Callable, Tuple, List, Dict, Optional, Collection, Union -from .coordinationScore import CoordinationScore, CoordinationWordCategories +import pkg_resources + +from convokit.model import Corpus, Speaker, Utterance from convokit.transformer import Transformer from convokit.util import deprecation +from .coordinationScore import CoordinationScore, CoordinationWordCategories class Coordination(Transformer): @@ -445,15 +446,13 @@ def _scores_over_utterances( speaker_thresh_indiv: int, target_thresh_indiv: int, utterances_thresh_indiv: int, - utterance_thresh_func: Optional[Callable[[Tuple[Utterance, Utterance]], bool]] = None, + utterance_thresh_func: Optional[Callable[[Utterance, Utterance], bool]] = None, focus: str = "speakers", split_by_attribs: Optional[List[str]] = None, speaker_utterance_selector: Callable[ - [Tuple[Utterance, Utterance]], bool - ] = lambda utt1, utt2: True, - target_utterance_selector: Callable[ - [Tuple[Utterance, Utterance]], bool + [Utterance, Utterance], bool ] = lambda utt1, utt2: True, + target_utterance_selector: Callable[[Utterance, Utterance], bool] = lambda utt1, utt2: True, ) -> CoordinationScore: assert not isinstance(speakers, str) assert focus == "speakers" or focus == "targets" @@ -479,8 +478,6 @@ def _scores_over_utterances( speaker, utt2, split_by_attribs ), Coordination._annot_speaker(target, utt1, split_by_attribs) - # speaker_has_attribs = Coordination._utterance_has_attribs(utt2, speaker_attribs) - # target_has_attribs = Coordination._utterance_has_attribs(utt1, target_attribs) speaker_filter = speaker_utterance_selector(utt2, utt1) target_filter = target_utterance_selector(utt2, utt1) diff --git a/convokit/coordination/coordinationScore.py b/convokit/coordination/coordinationScore.py index 945bd594..78b965be 100644 --- a/convokit/coordination/coordinationScore.py +++ b/convokit/coordination/coordinationScore.py @@ -1,7 +1,7 @@ -from convokit.model import Speaker -from convokit.util import deprecation from collections import defaultdict -from typing import Callable, Tuple, List, Dict, Optional, Collection, Hashable, Union +from typing import Dict, Optional, Hashable, Union + +from convokit.model import Speaker CoordinationWordCategories = [ "article", @@ -48,10 +48,6 @@ def scores_for_marker(self, marker: str) -> Dict[Union[Speaker, Hashable], float """ return {speaker: scores[marker] for speaker, scores in self.items()} - def averages_by_user(self): - deprecation("averages_by_user()", "averages_by_speaker()") - return {speaker: sum(scores.values()) / len(scores) for speaker, scores in self.items()} - def averages_by_speaker(self) -> Dict[Union[Speaker, Hashable], float]: """Return a dictionary from speakers to the average of each speaker's marker scores.""" diff --git a/convokit/expected_context_framework/demos/demo_text_pipelines.py b/convokit/expected_context_framework/demos/demo_text_pipelines.py index 9ed1dc60..d862a497 100644 --- a/convokit/expected_context_framework/demos/demo_text_pipelines.py +++ b/convokit/expected_context_framework/demos/demo_text_pipelines.py @@ -1,6 +1,6 @@ -from convokit.text_processing import TextProcessor, TextParser, TextToArcs -from convokit.phrasing_motifs import CensorNouns, QuestionSentences from convokit.convokitPipeline import ConvokitPipeline +from convokit.phrasing_motifs import CensorNouns, QuestionSentences +from convokit.text_processing import TextProcessor, TextParser, TextToArcs """ Some pipelines to compute the feature representations used in each Expected Context Model demo. @@ -11,11 +11,12 @@ def parliament_arc_pipeline(): return ConvokitPipeline( [ # to avoid most computations, we'll only run the pipeline if the desired attributes don't exist - ("parser", TextParser(input_filter=lambda utt, aux: utt.get_info("arcs") is None)), + ("parser", TextParser(input_filter=lambda utt, aux: utt.retrieve_meta("arcs") is None)), ( "censor_nouns", CensorNouns( - "parsed_censored", input_filter=lambda utt, aux: utt.get_info("arcs") is None + "parsed_censored", + input_filter=lambda utt, aux: utt.retrieve_meta("arcs") is None, ), ), ( @@ -24,7 +25,7 @@ def parliament_arc_pipeline(): "arc_arr", input_field="parsed_censored", root_only=True, - input_filter=lambda utt, aux: utt.get_info("arcs") is None, + input_filter=lambda utt, aux: utt.retrieve_meta("arcs") is None, ), ), ( @@ -32,7 +33,7 @@ def parliament_arc_pipeline(): QuestionSentences( "q_arc_arr", input_field="arc_arr", - input_filter=lambda utt, aux: utt.get_info("q_arcs") is None, + input_filter=lambda utt, aux: utt.retrieve_meta("q_arcs") is None, ), ), ( @@ -41,7 +42,7 @@ def parliament_arc_pipeline(): output_field="arcs", input_field="arc_arr", proc_fn=lambda x: "\n".join(x), - input_filter=lambda utt, aux: utt.get_info("arcs") is None, + input_filter=lambda utt, aux: utt.retrieve_meta("arcs") is None, ), ), ( @@ -50,7 +51,7 @@ def parliament_arc_pipeline(): output_field="q_arcs", input_field="q_arc_arr", proc_fn=lambda x: "\n".join(x), - input_filter=lambda utt, aux: utt.get_info("q_arcs") is None, + input_filter=lambda utt, aux: utt.retrieve_meta("q_arcs") is None, ), ), ] @@ -63,14 +64,15 @@ def wiki_arc_pipeline(): ( "parser", TextParser( - input_filter=lambda utt, aux: (utt.get_info("arcs") is None) - and (utt.get_info("parsed") is None) + input_filter=lambda utt, aux: (utt.retrieve_meta("arcs") is None) + and (utt.retrieve_meta("parsed") is None) ), ), ( "censor_nouns", CensorNouns( - "parsed_censored", input_filter=lambda utt, aux: utt.get_info("arcs") is None + "parsed_censored", + input_filter=lambda utt, aux: utt.retrieve_meta("arcs") is None, ), ), ( @@ -79,7 +81,7 @@ def wiki_arc_pipeline(): "arc_arr", input_field="parsed_censored", root_only=False, - input_filter=lambda utt, aux: utt.get_info("arcs") is None, + input_filter=lambda utt, aux: utt.retrieve_meta("arcs") is None, ), ), ( @@ -88,7 +90,7 @@ def wiki_arc_pipeline(): output_field="arcs", input_field="arc_arr", proc_fn=lambda x: "\n".join(x), - input_filter=lambda utt, aux: utt.get_info("arcs") is None, + input_filter=lambda utt, aux: utt.retrieve_meta("arcs") is None, ), ), ] @@ -98,14 +100,14 @@ def wiki_arc_pipeline(): def scotus_arc_pipeline(): return ConvokitPipeline( [ - ("parser", TextParser(input_filter=lambda utt, aux: utt.get_info("arcs") is None)), + ("parser", TextParser(input_filter=lambda utt, aux: utt.retrieve_meta("arcs") is None)), ( "arcs", TextToArcs( "arc_arr", input_field="parsed", root_only=False, - input_filter=lambda utt, aux: utt.get_info("arcs") is None, + input_filter=lambda utt, aux: utt.retrieve_meta("arcs") is None, ), ), ( @@ -114,7 +116,7 @@ def scotus_arc_pipeline(): output_field="arcs", input_field="arc_arr", proc_fn=lambda x: "\n".join(x), - input_filter=lambda utt, aux: utt.get_info("arcs") is None, + input_filter=lambda utt, aux: utt.retrieve_meta("arcs") is None, ), ), ] @@ -130,7 +132,7 @@ def switchboard_text_pipeline(): TextProcessor( proc_fn=lambda x: x, output_field="alpha_text", - input_filter=lambda utt, aux: utt.get_info("alpha_text") is None, + input_filter=lambda utt, aux: utt.retrieve_meta("alpha_text") is None, ), ) ] diff --git a/convokit/forecaster/forecasterModel.py b/convokit/forecaster/forecasterModel.py index b06924c8..b5392fdf 100644 --- a/convokit/forecaster/forecasterModel.py +++ b/convokit/forecaster/forecasterModel.py @@ -1,35 +1,19 @@ from abc import ABC, abstractmethod -from convokit.util import deprecation class ForecasterModel(ABC): def __init__( self, forecast_attribute_name: str = "prediction", - forecast_feat_name=None, forecast_prob_attribute_name: str = "score", - forecast_prob_feat_name=None, ): """ :param forecast_attribute_name: name for DataFrame column containing predictions, default: "prediction" :param forecast_prob_attribute_name: name for column containing prediction scores, default: "score" """ - self.forecast_attribute_name = ( - forecast_attribute_name if forecast_feat_name is None else forecast_feat_name - ) - self.forecast_prob_attribute_name = ( - forecast_prob_attribute_name - if forecast_prob_feat_name is None - else forecast_prob_feat_name - ) - - for deprecated_set in [ - (forecast_feat_name, "forecast_feat_name", "forecast_attribute_name"), - (forecast_prob_feat_name, "forecast_prob_feat_name", "forecast_prob_attribute_name"), - ]: - if deprecated_set[0] is not None: - deprecation(f"Forecaster's {deprecated_set[1]} parameter", f"{deprecated_set[2]}") + self.forecast_attribute_name = forecast_attribute_name + self.forecast_prob_attribute_name = forecast_prob_attribute_name @abstractmethod def train(self, id_to_context_reply_label): diff --git a/convokit/hyperconvo/hyperconvo.py b/convokit/hyperconvo/hyperconvo.py index d8abcf64..94f8c44a 100644 --- a/convokit/hyperconvo/hyperconvo.py +++ b/convokit/hyperconvo/hyperconvo.py @@ -1,12 +1,12 @@ +from typing import Dict, Optional, Callable + import numpy as np -import scipy.stats import pandas as pd +import scipy.stats from scipy.sparse import csr_matrix -from typing import Dict, Optional, Callable -from convokit.util import deprecation -from convokit.transformer import Transformer from convokit.model import Corpus, Conversation +from convokit.transformer import Transformer from .hypergraph import Hypergraph @@ -71,15 +71,11 @@ def __init__( prefix_len: int = 10, min_convo_len: int = 10, vector_name: str = "hyperconvo", - feat_name=None, invalid_val: float = np.nan, ): self.prefix_len = prefix_len self.min_convo_len = min_convo_len - self.vector_name = vector_name if feat_name is None else feat_name - if feat_name is not None: - deprecation("HyperConvo's feat_name parameter", "vector_name") - + self.vector_name = vector_name self.invalid_val = invalid_val def transform( diff --git a/convokit/model/__init__.py b/convokit/model/__init__.py index 6aa6434b..038ef994 100644 --- a/convokit/model/__init__.py +++ b/convokit/model/__init__.py @@ -5,6 +5,5 @@ from .corpusComponent import CorpusComponent from .corpus_helpers import * from .speaker import Speaker -from .user import User from .utterance import Utterance from .utteranceNode import UtteranceNode diff --git a/convokit/model/conversation.py b/convokit/model/conversation.py index 447b4c60..fc8ec751 100644 --- a/convokit/model/conversation.py +++ b/convokit/model/conversation.py @@ -1,11 +1,12 @@ +from collections import defaultdict from typing import Dict, List, Callable, Generator, Optional -from .utterance import Utterance -from .speaker import Speaker -from convokit.util import deprecation, warn + +from convokit.util import warn from .corpusComponent import CorpusComponent -from collections import defaultdict -from .utteranceNode import UtteranceNode from .corpusUtil import * +from .speaker import Speaker +from .utterance import Utterance +from .utteranceNode import UtteranceNode class Conversation(CorpusComponent): @@ -90,23 +91,6 @@ def get_utterances_dataframe( """ return get_utterances_dataframe(self, selector, exclude_meta) - def get_usernames(self) -> List[str]: - """Produces a list of names of all speakers in the Conversation, which can - be used in calls to get_speaker() to retrieve specific speakers. Provides no - ordering guarantees for the list. - - :return: a list of usernames - """ - deprecation("get_usernames()", "get_speaker_ids()") - if self._speaker_ids is None: - # first call to get_usernames or iter_speakers; precompute cached list - # of usernames - self._speaker_ids = set() - for ut_id in self._utterance_ids: - ut = self._owner.get_utterance(ut_id) - self._speaker_ids.add(ut.speaker.name) - return list(self._speaker_ids) - def get_speaker_ids(self) -> List[str]: """ Produces a list of ids of all speakers in the Conversation, which can be used in calls to get_speaker() @@ -133,10 +117,6 @@ def get_speaker(self, speaker_id: str) -> Speaker: # any Utterances return self._owner.get_speaker(speaker_id) - def get_user(self, speaker_id: str): - deprecation("get_user()", "get_speaker()") - return self.get_speaker(speaker_id) - def iter_speakers( self, selector: Callable[[Speaker], bool] = lambda speaker: True ) -> Generator[Speaker, None, None]: @@ -176,10 +156,6 @@ def get_speakers_dataframe( """ return get_speakers_dataframe(self, selector, exclude_meta) - def iter_users(self, selector=lambda speaker: True): - deprecation("iter_users()", "iter_speakers()") - return self.iter_speakers(selector) - def print_conversation_stats(self): """ Helper function for printing the number of Utterances and Spekaers in the Conversation. diff --git a/convokit/model/corpus.py b/convokit/model/corpus.py index 56ed296e..350d0574 100644 --- a/convokit/model/corpus.py +++ b/convokit/model/corpus.py @@ -6,7 +6,7 @@ from tqdm import tqdm from convokit.convokitConfig import ConvoKitConfig -from convokit.util import deprecation, create_safe_id +from convokit.util import create_safe_id from .convoKitIndex import ConvoKitIndex from .convoKitMatrix import ConvoKitMatrix from .corpusUtil import * @@ -414,10 +414,6 @@ def get_speaker(self, speaker_id: str) -> Speaker: """ return self.speakers[speaker_id] - def get_user(self, user_id: str) -> Speaker: - deprecation("get_user()", "get_speaker()") - return self.get_speaker(user_id) - def get_object(self, obj_type: str, oid: str): """ General Corpus object getter. Gets Speaker / Utterance / Conversation of specified id from the Corpus @@ -461,10 +457,6 @@ def has_speaker(self, speaker_id: str) -> bool: """ return speaker_id in self.speakers - def has_user(self, speaker_id): - deprecation("has_user()", "has_speaker()") - return self.has_speaker(speaker_id) - def random_utterance(self) -> Utterance: """ Get a random Utterance from the Corpus @@ -489,10 +481,6 @@ def random_speaker(self) -> Speaker: """ return random.choice(list(self.speakers.values())) - def random_user(self) -> Speaker: - deprecation("random_user()", "random_speaker()") - return self.random_speaker() - def iter_utterances( self, selector: Optional[Callable[[Utterance], bool]] = lambda utt: True ) -> Generator[Utterance, None, None]: @@ -584,10 +572,6 @@ def get_speakers_dataframe( """ return get_speakers_dataframe(self, selector, exclude_meta) - def iter_users(self, selector=lambda speaker: True): - deprecation("iter_users()", "iter_speakers()") - return self.iter_speakers(selector) - def iter_objs( self, obj_type: str, @@ -663,25 +647,6 @@ def get_object_ids( assert obj_type in ["speaker", "utterance", "conversation"] return [obj.id for obj in self.iter_objs(obj_type, selector)] - def get_usernames( - self, selector: Optional[Callable[[Speaker], bool]] = lambda user: True - ) -> Set[str]: - """Get names of speakers in the dataset. - - This function will be deprecated and replaced by get_speaker_ids() - - :param selector: optional function that takes in a - `Speaker` and returns True to include the speaker's name in the - resulting list, or False otherwise. - - :return: Set containing all speaker names selected by the selector - function, or all speaker names in the dataset if no selector function - was used. - - """ - deprecation("get_usernames()", "get_speaker_ids()") - return set([u.id for u in self.iter_speakers(selector)]) - def filter_conversations_by(self, selector: Callable[[Conversation], bool]): """ Mutate the corpus by filtering for a subset of Conversations within the Corpus. @@ -1365,7 +1330,7 @@ def load_info(self, obj_type, fields=None, dir_name=None): for k, v in entries.items(): try: obj = getter(k) - obj.set_info(field, v) + obj.add_meta(field, v) except: continue @@ -1529,7 +1494,7 @@ def organize_speaker_convo_history(self, utterance_filter=None): self.set_speaker_convo_info(speaker, convo, "n_utterances", len(sorted_utts)) for speaker in self.iter_speakers(): try: - speaker.set_info("n_convos", len(speaker.retrieve_meta("conversations"))) + speaker.add_meta("n_convos", len(speaker.retrieve_meta("conversations"))) except: continue @@ -1537,7 +1502,7 @@ def organize_speaker_convo_history(self, utterance_filter=None): speaker.retrieve_meta("conversations").items(), key=lambda x: (x[1]["start_time"], x[1]["utterance_ids"][0]), ) - speaker.set_info("start_time", sorted_convos[0][1]["start_time"]) + speaker.add_meta("start_time", sorted_convos[0][1]["start_time"]) for idx, (convo_id, _) in enumerate(sorted_convos): self.set_speaker_convo_info(speaker.id, convo_id, "idx", idx) diff --git a/convokit/model/corpusComponent.py b/convokit/model/corpusComponent.py index 2ae4fd48..3c6e9606 100644 --- a/convokit/model/corpusComponent.py +++ b/convokit/model/corpusComponent.py @@ -1,7 +1,8 @@ -from .convoKitMeta import ConvoKitMeta -from convokit.util import warn, deprecation from typing import List, Optional +from convokit.util import warn +from .convoKitMeta import ConvoKitMeta + class CorpusComponent: def __init__( @@ -134,25 +135,6 @@ def add_meta(self, key: str, value) -> None: """ self.meta[key] = value - def get_info(self, key): - """ - Gets attribute of the corpus object. Returns None if the corpus object does not have this attribute. - :param key: name of attribute - :return: attribute - """ - deprecation("get_info()", "retrieve_meta()") - return self.meta.get(key, None) - - def set_info(self, key, value): - """ - Sets attribute of the corpus object to . - :param key: name of attribute - :param value: value to set - :return: None - """ - deprecation("set_info()", "add_meta()") - self.meta[key] = value - def get_vector( self, vector_name: str, as_dataframe: bool = False, columns: Optional[List[str]] = None ): diff --git a/convokit/model/speaker.py b/convokit/model/speaker.py index a6392d49..0f4018aa 100644 --- a/convokit/model/speaker.py +++ b/convokit/model/speaker.py @@ -1,6 +1,6 @@ from functools import total_ordering -from typing import Dict, List, Optional, Callable -from convokit.util import deprecation +from typing import Dict, List, Optional + from .corpusComponent import CorpusComponent from .corpusUtil import * @@ -27,13 +27,11 @@ def __init__( self, owner=None, id: str = None, - name: str = None, utts=None, convos=None, meta: Optional[Dict] = None, ): - name_var = id if id is not None else name # to be deprecated - super().__init__(obj_type="speaker", owner=owner, id=name_var, meta=meta) + super().__init__(obj_type="speaker", owner=owner, id=id, meta=meta) self.utterances = utts if utts is not None else dict() self.conversations = convos if convos is not None else dict() # self._split_attribs = set() @@ -55,17 +53,6 @@ def __init__( # self._split_attribs = set(attribs) # # self._update_uid() - def _get_name(self): - deprecation("speaker.name", "speaker.id") - return self._id - - def _set_name(self, value: str): - deprecation("speaker.name", "speaker.id") - self._id = value - # self._update_uid() - - name = property(_get_name, _set_name) - def _add_utterance(self, utt): self.utterances[utt.id] = utt diff --git a/convokit/model/user.py b/convokit/model/user.py deleted file mode 100644 index 2a0eefe4..00000000 --- a/convokit/model/user.py +++ /dev/null @@ -1,8 +0,0 @@ -from .speaker import Speaker -from convokit.util import deprecation - - -class User(Speaker): - def __init__(self, *args, **kwargs): - deprecation("The User class", "the Speaker class") - super().__init__(*args, **kwargs) diff --git a/convokit/model/utterance.py b/convokit/model/utterance.py index a3a4edda..f066090c 100644 --- a/convokit/model/utterance.py +++ b/convokit/model/utterance.py @@ -1,5 +1,6 @@ from typing import Dict, Optional -from convokit.util import deprecation, warn + +from convokit.util import warn from .corpusComponent import CorpusComponent from .speaker import Speaker @@ -30,9 +31,7 @@ def __init__( owner=None, id: Optional[str] = None, speaker: Optional[Speaker] = None, - user: Optional[Speaker] = None, conversation_id: Optional[str] = None, - root: Optional[str] = None, reply_to: Optional[str] = None, timestamp: Optional[int] = None, text: str = "", @@ -40,12 +39,7 @@ def __init__( ): # check arguments that have alternate naming due to backwards compatibility if speaker is None: - if user is not None: - speaker = user - else: - raise ValueError("No Speaker found: Utterance must be initialized with a Speaker.") - if conversation_id is None and root is not None: - conversation_id = root + raise ValueError("No Speaker found: Utterance must be initialized with a Speaker.") if conversation_id is not None and not isinstance(conversation_id, str): warn( @@ -116,17 +110,6 @@ def _set_text(self, val): text = property(_get_text, _set_text) - def _get_root(self): - deprecation("utterance.root", "utterance.conversation_id") - return self.conversation_id - - def _set_root(self, value: str): - deprecation("utterance.root", "utterance.conversation_id") - self.conversation_id = value - # self._update_uid() - - root = property(_get_root, _set_root) - ############################################################################ ## end properties ############################################################################ diff --git a/convokit/paired_prediction/pairedPrediction.py b/convokit/paired_prediction/pairedPrediction.py index 88649081..1f24ac4f 100644 --- a/convokit/paired_prediction/pairedPrediction.py +++ b/convokit/paired_prediction/pairedPrediction.py @@ -1,12 +1,13 @@ -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import StandardScaler +from typing import List, Callable + from sklearn.linear_model import LogisticRegression from sklearn.model_selection import cross_val_score, KFold -from typing import List, Callable +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler + from convokit import Transformer, CorpusComponent, Corpus -from .util import * from convokit.classifier.util import get_coefs_helper -from convokit.util import deprecation +from .util import * class PairedPrediction(Transformer): @@ -33,11 +34,8 @@ def __init__( pred_feats: List[str], clf=None, pair_id_attribute_name: str = "pair_id", - pair_id_feat_name=None, label_attribute_name: str = "pair_obj_label", - label_feat_name=None, pair_orientation_attribute_name: str = "pair_orientation", - pair_orientation_feat_name=None, ): assert obj_type in ["speaker", "utterance", "conversation"] @@ -53,31 +51,9 @@ def __init__( else clf ) self.pred_feats = pred_feats - self.pair_id_attribute_name = ( - pair_id_attribute_name if pair_id_feat_name is None else pair_id_feat_name - ) - self.label_attribute_name = ( - label_attribute_name if label_feat_name is None else label_feat_name - ) - self.pair_orientation_attribute_name = ( - pair_orientation_attribute_name - if pair_orientation_feat_name is None - else pair_orientation_feat_name - ) - - for deprecated_set in [ - (pair_id_feat_name, "pair_id_feat_name", "pair_id_attribute_name"), - (label_feat_name, "label_feat_name", "label_attribute_name"), - ( - pair_orientation_feat_name, - "pair_orientation_feat_name", - "pair_orientation_attribute_name", - ), - ]: - if deprecated_set[0] is not None: - deprecation( - f"PairedPrediction's {deprecated_set[1]} parameter", f"{deprecated_set[2]}" - ) + self.pair_id_attribute_name = pair_id_attribute_name + self.label_attribute_name = label_attribute_name + self.pair_orientation_attribute_name = pair_orientation_attribute_name def fit( self, corpus: Corpus, y=None, selector: Callable[[CorpusComponent], bool] = lambda x: True diff --git a/convokit/paired_prediction/pairer.py b/convokit/paired_prediction/pairer.py index f9d421a6..0d39d7ed 100644 --- a/convokit/paired_prediction/pairer.py +++ b/convokit/paired_prediction/pairer.py @@ -1,9 +1,7 @@ -from typing import Callable -from .util import * from collections import defaultdict from random import shuffle, choice +from typing import Callable -from convokit.util import deprecation from convokit import Transformer, CorpusComponent, Corpus @@ -30,11 +28,8 @@ def __init__( neg_label_func: Callable[[CorpusComponent], bool], pair_mode: str = "random", pair_id_attribute_name: str = "pair_id", - pair_id_feat_name=None, label_attribute_name: str = "pair_obj_label", - label_feat_name=None, pair_orientation_attribute_name: str = "pair_orientation", - pair_orientation_feat_name=None, ): assert obj_type in ["speaker", "utterance", "conversation"] self.obj_type = obj_type @@ -42,29 +37,9 @@ def __init__( self.pos_label_func = pos_label_func self.neg_label_func = neg_label_func self.pair_mode = pair_mode - self.pair_id_attribute_name = ( - pair_id_attribute_name if pair_id_feat_name is None else pair_id_feat_name - ) - self.label_attribute_name = ( - label_attribute_name if label_feat_name is None else label_feat_name - ) - self.pair_orientation_attribute_name = ( - pair_orientation_attribute_name - if pair_orientation_feat_name is None - else pair_orientation_feat_name - ) - - for deprecated_set in [ - (pair_id_feat_name, "pair_id_feat_name", "pair_id_attribute_name"), - (label_feat_name, "label_feat_name", "label_attribute_name"), - ( - pair_orientation_feat_name, - "pair_orientation_feat_name", - "pair_orientation_attribute_name", - ), - ]: - if deprecated_set[0] is not None: - deprecation(f"Pairer's {deprecated_set[1]} parameter", f"{deprecated_set[2]}") + self.pair_id_attribute_name = pair_id_attribute_name + self.label_attribute_name = label_attribute_name + self.pair_orientation_attribute_name = pair_orientation_attribute_name def _get_pos_neg_objects(self, corpus: Corpus, selector): """ diff --git a/convokit/phrasing_motifs/phrasingMotifs.py b/convokit/phrasing_motifs/phrasingMotifs.py index b428c054..ab2348c5 100644 --- a/convokit/phrasing_motifs/phrasingMotifs.py +++ b/convokit/phrasing_motifs/phrasingMotifs.py @@ -1,8 +1,9 @@ -from convokit.text_processing import TextProcessor import itertools -from collections import defaultdict import json import os +from collections import defaultdict + +from convokit.text_processing import TextProcessor class PhrasingMotifs(TextProcessor): @@ -102,7 +103,7 @@ def _get_sent_arcset_dict(self, corpus): sent_dict = {} for utterance in corpus.iter_utterances(): if self.fit_filter(utterance): - for idx, sent in enumerate(utterance.get_info(self.input_field)): + for idx, sent in enumerate(utterance.retrieve_meta(self.input_field)): sent_dict["%s__%d" % (utterance.id, idx)] = sent.split() return sent_dict diff --git a/convokit/prompt_types/promptTypeWrapper.py b/convokit/prompt_types/promptTypeWrapper.py index 9576868f..0c6e1ef3 100644 --- a/convokit/prompt_types/promptTypeWrapper.py +++ b/convokit/prompt_types/promptTypeWrapper.py @@ -1,12 +1,11 @@ -from convokit import Corpus -from convokit.text_processing import TextProcessor, TextParser, TextToArcs +import os + +from convokit.convokitPipeline import ConvokitPipeline +from convokit.model import Utterance from convokit.phrasing_motifs import CensorNouns, QuestionSentences, PhrasingMotifs from convokit.prompt_types import PromptTypes -from convokit.convokitPipeline import ConvokitPipeline +from convokit.text_processing import TextParser, TextToArcs from convokit.transformer import Transformer -from convokit.model import Utterance - -import os class PromptTypeWrapper(Transformer): @@ -64,7 +63,8 @@ def __init__( "parser", TextParser( verbosity=verbosity, - input_filter=lambda utt, aux: recompute_all or (utt.get_info("parsed") is None), + input_filter=lambda utt, aux: recompute_all + or (utt.retrieve_meta("parsed") is None), ), ), ( @@ -72,7 +72,7 @@ def __init__( CensorNouns( "parsed_censored", input_filter=lambda utt, aux: recompute_all - or (utt.get_info("parsed_censored") is None), + or (utt.retrieve_meta("parsed_censored") is None), verbosity=verbosity, ), ), @@ -81,7 +81,8 @@ def __init__( TextToArcs( "arcs", input_field="parsed_censored", - input_filter=lambda utt, aux: recompute_all or (utt.get_info("arcs") is None), + input_filter=lambda utt, aux: recompute_all + or (utt.retrieve_meta("arcs") is None), root_only=root_only, verbosity=verbosity, ), diff --git a/convokit/ranker/ranker.py b/convokit/ranker/ranker.py index 177e1d97..c60e011b 100644 --- a/convokit/ranker/ranker.py +++ b/convokit/ranker/ranker.py @@ -1,8 +1,8 @@ from typing import List, Callable, Union + import pandas as pd from convokit import Corpus, Transformer, CorpusComponent -from convokit.util import deprecation class Ranker(Transformer): @@ -20,22 +20,12 @@ def __init__( obj_type: str, score_func: Callable[[CorpusComponent], Union[int, float]], score_attribute_name: str = "score", - score_feat_name=None, rank_attribute_name: str = "rank", - rank_feat_name=None, ): self.obj_type = obj_type self.score_func = score_func - self.score_attribute_name = ( - score_attribute_name if score_feat_name is None else score_feat_name - ) - self.rank_attribute_name = rank_attribute_name if rank_feat_name is None else rank_feat_name - - if score_feat_name is not None: - deprecation("Ranker's score_feat_name parameter", "score_attribute_name") - - if rank_feat_name is not None: - deprecation("Ranker's rank_feat_name parameter", "rank_attribute_name") + self.score_attribute_name = score_attribute_name + self.rank_attribute_name = rank_attribute_name def transform( self, corpus: Corpus, y=None, selector: Callable[[CorpusComponent], bool] = lambda obj: True diff --git a/convokit/tests/phrasing_motifs/test_questionSentences.py b/convokit/tests/phrasing_motifs/test_questionSentences.py index 15ed18de..babe7fc3 100644 --- a/convokit/tests/phrasing_motifs/test_questionSentences.py +++ b/convokit/tests/phrasing_motifs/test_questionSentences.py @@ -1,13 +1,13 @@ import unittest -from convokit.tests.util import parsed_burr_sir_corpus from convokit.phrasing_motifs.questionSentences import QuestionSentences +from convokit.tests.util import parsed_burr_sir_corpus def parsed_burr_sir_corpus_with_lowercase_are(): corpus = parsed_burr_sir_corpus() for utterance in corpus.iter_utterances(): - parsed = utterance.get_info("parsed") + parsed = utterance.retrieve_meta("parsed") for sentence in parsed: if sentence["toks"][0]["tok"] == "Are": sentence["toks"][0]["tok"] = "are" @@ -27,7 +27,7 @@ def test_use_caps(self): for utterance, expected_sentences in zip( transformed_corpus.iter_utterances(), expected_sentences_list ): - self.assertListEqual(expected_sentences, utterance.get_info("questions")) + self.assertListEqual(expected_sentences, utterance.retrieve_meta("questions")) def test_dont_use_caps(self): corpus = parsed_burr_sir_corpus_with_lowercase_are() @@ -40,4 +40,4 @@ def test_dont_use_caps(self): for utterance, expected_sentences in zip( transformed_corpus.iter_utterances(), expected_sentences_list ): - self.assertListEqual(expected_sentences, utterance.get_info("questions")) + self.assertListEqual(expected_sentences, utterance.retrieve_meta("questions")) diff --git a/convokit/tests/util.py b/convokit/tests/util.py index 5e230bfa..ae259e1a 100644 --- a/convokit/tests/util.py +++ b/convokit/tests/util.py @@ -183,7 +183,7 @@ def parsed_burr_sir_corpus(): [BURR_SIR_SENTENCE_3, BURR_SIR_SENTENCE_4], ] for utterance, sentences in zip(corpus.iter_utterances(), sentence_lists): - utterance.set_info("sentences", sentences) + utterance.add_meta("sentences", sentences) return corpus