diff --git a/.gitignore b/.gitignore
index 9bac2e8..cdca365 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,4 @@ __pycache__
 docs/build
 .coverage
 poetry.lock
-coverage.xml
+coverage.xml
\ No newline at end of file
diff --git a/README.rst b/README.rst
index 5d56f15..28a4ec1 100644
--- a/README.rst
+++ b/README.rst
@@ -1,22 +1,24 @@
-|Tests|_ |Coverage|_ |ReadTheDocs|_ |PythonVersion|_ |Black|_ |License|_
+|Tests| |Coverage| |ReadTheDocs| |PythonVersion| |PyPI| |Black| |License|
 
 .. |Tests| image:: https://github.com/GauravPandeyLab/eipy/actions/workflows/tests.yml/badge.svg
-.. _Tests: https://github.com/GauravPandeyLab/eipy/actions/workflows/tests.yml
+  :target:  https://github.com/GauravPandeyLab/eipy/actions/workflows/tests.yml
 
 .. |Coverage| image:: https://codecov.io/gh/GauravPandeyLab/eipy/graph/badge.svg?token=M2AU2XWJB8 
-.. _Coverage: https://codecov.io/gh/GauravPandeyLab/eipy
+  :target: https://codecov.io/gh/GauravPandeyLab/eipy
 
 .. |ReadTheDocs| image:: https://readthedocs.org/projects/eipy/badge/?version=latest
-.. _ReadTheDocs: https://eipy.readthedocs.io/en/latest/
+  :target: https://eipy.readthedocs.io/en/latest/
+
+.. |PyPI| image:: https://img.shields.io/pypi/v/ensemble-integration
+  :target: https://pypi.org/project/ensemble-integration/
 
 .. |PythonVersion| image:: https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue
-.. _PythonVersion: https://github.com/GauravPandeyLab/eipy
 
 .. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg
-.. _Black: https://github.com/psf/black
+  :target: https://github.com/psf/black
 
 .. |License| image:: https://img.shields.io/badge/License-GPLv3-blue
-.. _License: https://github.com/GauravPandeyLab/eipy/blob/main/COPYING
+  :target: https://github.com/GauravPandeyLab/eipy/blob/main/COPYING
 
 
 ``ensemble-integration``: Integrating multi-modal data for predictive modeling
diff --git a/docs/source/development.rst b/docs/source/development.rst
index f44941f..e543169 100644
--- a/docs/source/development.rst
+++ b/docs/source/development.rst
@@ -1,7 +1,7 @@
 Development
 ===========
 
-We welcome contributions to the development of ``eipy``. To contribute follow the below instructions to submit a pull request:
+We welcome contributions to the development of ``ensemble-integration``. To contribute follow the below instructions to submit a pull request:
 
 1. **Install Python**. First of all make sure you have a supported version of Python on your local machine (see `GitHub <https://github.com/GauravPandeyLab/eipy>`__ for supported versions).
 2. **Install Poetry**. ``eipy`` uses Poetry to manage dependencies. To install Poetry follow the instructions on their `website <https://python-poetry.org/docs/>`__.
@@ -49,9 +49,9 @@ Note that new test file names must have the prefix `test_`.
 9. **Submit pull request**. Updates must be made via a pull request. Internal users should note that pushing 
 to the main branch has been disabled.
 
-10. **Publishing new versions to PyPI** (internal only). We now use `poetry-dynamic-versioning <https://github.com/mtkennerly/poetry-dynamic-versioning>` 
+10. **Publishing new versions to PyPI** (internal only). We now use `poetry-dynamic-versioning <https://github.com/mtkennerly/poetry-dynamic-versioning>`__ 
 to iterate version numbers in pyproject.toml automatically. You can publish to 
 PyPI by creating a new `release <https://github.com/GauravPandeyLab/eipy/releases>`__, 
 which will run the "Publish to PyPI" workflow. This workflow determines the PyPI version number from the
 GitHub release tag, which you should manually iterate.  
-Note: to test things out first, you can try manually running the "Publish to test PyPI" workflow.
\ No newline at end of file
+Note: to test things out first, you can try manually running the "Publish to test PyPI" workflow.
diff --git a/eipy/additional_ensembles.py b/eipy/additional_ensembles.py
index c62a4a9..b803264 100644
--- a/eipy/additional_ensembles.py
+++ b/eipy/additional_ensembles.py
@@ -14,7 +14,18 @@
 
 class MeanAggregation(BaseEstimator, ClassifierMixin):
     """
+    Mean Aggregation
+
     Trivially takes the mean of X.
+
+    Attributes
+    ----------
+    classes : array
+        Ordered arrray of unique labels for computing mean.
+    X_ : array of (n_samples, n_features)
+        Base predictor data for computing mean.
+    y_ : array of (n_samples,)
+        True labels of X_.
     """
 
     def __init__(self):
@@ -36,7 +47,18 @@ def predict_proba(self, X):
 
 class MedianAggregation(BaseEstimator, ClassifierMixin):
     """
+    Median Aggregation
+
     Trivially takes the median of X.
+
+    Attributes
+    ----------
+    classes : array
+        Ordered arrray of unique labels for computing mean.
+    X_ : array of (n_samples, n_features)
+        Base predictor data for computing mean.
+    y_ : array of (n_samples,)
+        True labels of X_.
     """
 
     def __init__(self):
@@ -63,6 +85,28 @@ class CES(BaseEstimator, ClassifierMixin):
     Caruana R. et al. (2006) Getting the most out of ensemble selection.
     In: Sixth International Conference on Data
     Mining (ICDM'06), 2006 IEEE, Piscataway, NJ, USA, pp. 828-833.
+
+    Parameters
+    ----------
+    scoring :
+
+    max_ensemble_size : int
+        Maximum number of base models to ensemble.
+    random_state : int
+        For determining a random state.
+    greater_is_better : bool
+        For sorting models by performance with respect to a metric.
+    
+    Attributes
+    ----------
+    selected_ensemble : list
+        List of models selected for ensemble.
+    train_performance : list
+        Record of model performances.
+    argbest : bool
+        True if metric of interest is to be maximized. Used for model selection.
+    best : bool
+        True if metric of interest is to be maximized. Used for selecting maximum scorers.
     """
 
     def __init__(
diff --git a/eipy/datasets.py b/eipy/datasets.py
index 7e79315..7bba460 100644
--- a/eipy/datasets.py
+++ b/eipy/datasets.py
@@ -25,7 +25,7 @@ def load_diabetes():
     """
     zenodo_link = "https://zenodo.org/records/10035422/files/diabetes.zip?download=1"
     # Get data path
-    data_path = get_data_home()
+    data_path = _get_data_home()
     folder_ext = "diabetes"
     data_ext_path = join(data_path, folder_ext)
     # check data downloaded before
@@ -66,7 +66,7 @@ def _load_csv(file_path, fn, suffix):
     return pd.read_csv(join(file_path, f"{fn}_{suffix}.csv"), index_col=0)
 
 
-def get_data_home(data_home=None):
+def _get_data_home(data_home=None):
     """Return the path of the eipy data directory.
 
     This function is referring from scikit-learn.
diff --git a/eipy/ei.py b/eipy/ei.py
index 8bc13fc..a8b45d6 100755
--- a/eipy/ei.py
+++ b/eipy/ei.py
@@ -17,21 +17,21 @@
 from joblib import Parallel, delayed
 import warnings
 from eipy.utils import (
-    X_is_dict,
-    X_to_numpy,
-    y_to_numpy,
-    set_predictor_seeds,
-    random_integers,
-    sample,
-    retrieve_X_y,
-    append_modality,
-    safe_predict_proba,
+    _X_is_dict,
+    _X_to_numpy,
+    _y_to_numpy,
+    _set_predictor_seeds,
+    _random_integers,
+    _sample,
+    _retrieve_X_y,
+    _append_modality,
+    _safe_predict_proba,
     dummy_cv,
     bar_format,
 )
 from eipy.metrics import (
-    base_summary,
-    ensemble_summary,
+    _base_summary,
+    _ensemble_summary,
 )
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
@@ -181,7 +181,7 @@ def __init__(
         self.modality_names = []
         self.n_features_per_modality = []
 
-        self.random_numbers_for_samples = random_integers(
+        self.random_numbers_for_samples = _random_integers(
             n_integers=n_samples, seed=self.random_state
         )
         self.feature_names = {}
@@ -210,17 +210,17 @@ def fit_base(self, X, y, base_predictors=None, modality_name=None):
         \n... for ensemble performance analysis..."""
         )
         #  convert y to a numpy array
-        y = y_to_numpy(y)
+        y = _y_to_numpy(y)
 
         #  check if base_predictors are passed here
         if base_predictors is not None:
             self.base_predictors = base_predictors  # update base predictors
 
         #  set random_states in base_predictors
-        set_predictor_seeds(self.base_predictors, self.random_state)
+        _set_predictor_seeds(self.base_predictors, self.random_state)
 
         #  check data format and train accordingly
-        if X_is_dict(X):
+        if _X_is_dict(X):
             for modality_name, modality in X.items():
                 self._fit_base(
                     X=modality,
@@ -252,12 +252,12 @@ def fit_ensemble(self, ensemble_predictors=None):
         if ensemble_predictors is not None:
             self.ensemble_predictors = ensemble_predictors
 
-        set_predictor_seeds(self.ensemble_predictors, self.random_state)
+        _set_predictor_seeds(self.ensemble_predictors, self.random_state)
 
         y_test_combined = []
 
         for fold_id in range(self.k_outer):
-            _, y_test = retrieve_X_y(labelled_data=self.ensemble_test_data[fold_id])
+            _, y_test = _retrieve_X_y(labelled_data=self.ensemble_test_data[fold_id])
             y_test_combined.extend(y_test)
 
         ensemble_predictions = {}
@@ -270,17 +270,17 @@ def fit_ensemble(self, ensemble_predictors=None):
             y_pred_combined = []
 
             for fold_id in range(self.k_outer):
-                X_train, y_train = retrieve_X_y(
+                X_train, y_train = _retrieve_X_y(
                     labelled_data=self.ensemble_training_data[fold_id]
                 )
-                X_test, _ = retrieve_X_y(labelled_data=self.ensemble_test_data[fold_id])
+                X_test, _ = _retrieve_X_y(labelled_data=self.ensemble_test_data[fold_id])
 
                 if self.sampling_aggregation == "mean":
                     X_train = X_train.T.groupby(level=[0, 1]).mean().T
                     X_test = X_test.T.groupby(level=[0, 1]).mean().T
 
                 model.fit(X_train, y_train)
-                y_pred = safe_predict_proba(model, X_test)
+                y_pred = _safe_predict_proba(model, X_test)
                 y_pred_combined.extend(y_pred)
 
             ensemble_predictions[model_name] = y_pred_combined
@@ -288,7 +288,7 @@ def fit_ensemble(self, ensemble_predictors=None):
         ensemble_predictions["labels"] = y_test_combined
 
         self.ensemble_predictions = pd.DataFrame.from_dict(ensemble_predictions)
-        self.ensemble_summary = ensemble_summary(
+        self.ensemble_summary = _ensemble_summary(
             self.ensemble_predictions, self.metrics
         )
 
@@ -298,7 +298,7 @@ def fit_ensemble(self, ensemble_predictors=None):
                 desc="Training final ensemble models",
                 bar_format=bar_format,
             ):
-                X_train, y_train = retrieve_X_y(
+                X_train, y_train = _retrieve_X_y(
                     labelled_data=self.ensemble_training_data_final[0]
                 )
 
@@ -314,7 +314,7 @@ def fit_ensemble(self, ensemble_predictors=None):
 
     def predict(self, X_dict, ensemble_model_key):
         """
-        Predict class labels for samples in X
+        Predict class labels for samples in X.
 
         Parameters
         ----------
@@ -336,7 +336,7 @@ def predict(self, X_dict, ensemble_model_key):
             modality_name = self.modality_names[i]
             X = X_dict[modality_name]
 
-            X, _ = X_to_numpy(X)
+            X, _ = _X_to_numpy(X)
 
             base_models = copy.deepcopy(self.final_models["base models"][modality_name])
             self.base_predictors = {}
@@ -345,7 +345,7 @@ def predict(self, X_dict, ensemble_model_key):
                     self.base_predictors[base_model_dict["model name"]] = 0
 
                 base_model = pickle.loads(base_model_dict["pickled model"])
-                y_pred = safe_predict_proba(base_model, X)
+                y_pred = _safe_predict_proba(base_model, X)
 
                 base_model_dict["fold id"] = 0
                 base_model_dict["y_pred"] = y_pred
@@ -353,7 +353,7 @@ def predict(self, X_dict, ensemble_model_key):
             combined_predictions = self._combine_predictions_outer(
                 base_models, modality_name, model_building=True
             )
-            ensemble_prediction_data = append_modality(
+            ensemble_prediction_data = _append_modality(
                 ensemble_prediction_data, combined_predictions, model_building=True
             )
         ensemble_prediction_data = ensemble_prediction_data[0]
@@ -367,12 +367,12 @@ def predict(self, X_dict, ensemble_model_key):
             self.final_models["ensemble models"][ensemble_model_key]
         )
 
-        y_pred = safe_predict_proba(ensemble_model, ensemble_prediction_data)
+        y_pred = _safe_predict_proba(ensemble_model, ensemble_prediction_data)
         return y_pred
 
     @ignore_warnings(category=ConvergenceWarning)
     def _fit_base(self, X, y, base_predictors=None, modality_name=None):
-        X, feature_names = X_to_numpy(X)
+        X, feature_names = _X_to_numpy(X)
 
         self.modality_names.append(modality_name)
         self.feature_names[modality_name] = feature_names
@@ -387,7 +387,7 @@ def _fit_base(self, X, y, base_predictors=None, modality_name=None):
             modality_name=modality_name,
         )
 
-        self.ensemble_training_data = append_modality(
+        self.ensemble_training_data = _append_modality(
             self.ensemble_training_data, ensemble_training_data_modality
         )
 
@@ -399,12 +399,12 @@ def _fit_base(self, X, y, base_predictors=None, modality_name=None):
             modality_name=modality_name,
         )
 
-        self.ensemble_test_data = append_modality(
+        self.ensemble_test_data = _append_modality(
             self.ensemble_test_data, ensemble_test_data_modality
         )  # append data to dataframe
 
         # create a summary of base predictor performance
-        self.base_summary = base_summary(self.ensemble_test_data, self.metrics)
+        self.base_summary = _base_summary(self.ensemble_test_data, self.metrics)
 
         if self.model_building:
             self._fit_base_final(X=X, y=y, modality_name=modality_name)
@@ -428,7 +428,7 @@ def _fit_base_final(self, X, y, modality_name=None):
             modality_name=modality_name,
         )
 
-        self.ensemble_training_data_final = append_modality(
+        self.ensemble_training_data_final = _append_modality(
             self.ensemble_training_data_final, ensemble_training_data_modality
         )
 
@@ -562,7 +562,7 @@ def _train_predict_single_base_predictor(
 
         X_train, X_test = X[train_index], X[test_index]
         y_train, y_test = y[train_index], y[test_index]
-        X_sample, y_sample = sample(
+        X_sample, y_sample = _sample(
             X_train,
             y_train,
             strategy=self.sampling_strategy,
@@ -581,7 +581,7 @@ def _train_predict_single_base_predictor(
             }
 
         else:
-            y_pred = safe_predict_proba(model, X_test)
+            y_pred = _safe_predict_proba(model, X_test)
 
             results_dict = {
                 "model name": model_name,
@@ -677,7 +677,6 @@ def save(self, path=None):
 
         Parameters
         ----------
-
         path : optional, default=None
             Path to save the EnsembleIntegration class object.
         """
@@ -695,7 +694,6 @@ def load(cls, path):
 
         Parameters
         ----------
-
         path : str
             Path to load the EnsembleIntegration class object.
         """
diff --git a/eipy/interpretation.py b/eipy/interpretation.py
index 8b6025a..fc65c9c 100644
--- a/eipy/interpretation.py
+++ b/eipy/interpretation.py
@@ -1,5 +1,5 @@
 from sklearn.inspection import permutation_importance
-from eipy.utils import X_to_numpy, retrieve_X_y, bar_format, y_to_numpy
+from eipy.utils import _X_to_numpy, _retrieve_X_y, bar_format, _y_to_numpy
 import pandas as pd
 from tqdm import tqdm
 import numpy as np
@@ -102,10 +102,10 @@ def rank_product_score(self, X_dict, y):
             ensemble_predictor_keys = self.ensemble_predictor_keys
 
         if self.LFR is None:
-            self.local_feature_rank(X_dict, y_to_numpy(y))
+            self._local_feature_rank(X_dict, _y_to_numpy(y))
 
         if self.LMR is None:
-            self.local_model_rank(ensemble_predictor_keys=ensemble_predictor_keys)
+            self._local_model_rank(ensemble_predictor_keys=ensemble_predictor_keys)
 
         print("Calculating combined rank product score...")
 
@@ -151,7 +151,7 @@ def rank_product_score(self, X_dict, y):
 
         return self
 
-    def local_feature_rank(self, X_dict, y):
+    def _local_feature_rank(self, X_dict, y):
         """
         Local Feature Ranks (LFRs) for each base predictor
 
@@ -177,7 +177,7 @@ def local_feature_rank(self, X_dict, y):
             bar_format=bar_format,
         ):
             X = X_dict[modality_name]
-            X, feature_names = X_to_numpy(X)
+            X, feature_names = _X_to_numpy(X)
 
             # check feature names were seen during training
             if len(self.EI.feature_names[modality_name]) > 1:
@@ -285,7 +285,7 @@ def local_feature_rank(self, X_dict, y):
 
         return self
 
-    def local_model_rank(self, ensemble_predictor_keys):
+    def _local_model_rank(self, ensemble_predictor_keys):
         """
         Local Model Ranks (LMRs)
 
@@ -302,7 +302,7 @@ def local_model_rank(self, ensemble_predictor_keys):
         """
         #  load ensemble training data from EI training
 
-        ensemble_X_train, ensemble_y_train = retrieve_X_y(
+        ensemble_X_train, ensemble_y_train = _retrieve_X_y(
             labelled_data=self.EI.ensemble_training_data_final[0]
         )
 
diff --git a/eipy/metrics.py b/eipy/metrics.py
index 3233277..74e84d5 100644
--- a/eipy/metrics.py
+++ b/eipy/metrics.py
@@ -1,18 +1,39 @@
 import numpy as np
 import pandas as pd
 import inspect
-from eipy.utils import minority_class
+from eipy.utils import _minority_class
 from sklearn.metrics import roc_auc_score, precision_recall_curve
 
 
 def fmax_score(y_test, y_score, beta=1.0, pos_label=1):
-    fmax_score, _, _, threshold_fmax = fmax_precision_recall_threshold(
+    """
+    Computes the maximum F-score (the harmonic mean of precision and recall) and the corresponding threshold.
+
+    Parameters
+    ----------
+    y_test : array of shape (n_samples,)
+        Array of test labels.
+    y_pred : array of shape (n_samples,)
+        Array of predicted probabilities on test data.
+    beta : float
+        Parameter for weighing precision and recall in F score calculations.
+    pos_label : bool
+        Class selection for computing F scores.
+
+    Returns
+    -------
+    fmax_score : float64
+        Calculated fmax
+    threshold_fmax : float64
+        Threshold corresponding to returned fmax
+    """
+    fmax_score, _, _, threshold_fmax = _fmax_precision_recall_threshold(
         y_test, y_score, beta=beta, pos_label=pos_label
     )
     return fmax_score, threshold_fmax
 
 
-def fmax_precision_recall_threshold(labels, y_score, beta=1.0, pos_label=1):
+def _fmax_precision_recall_threshold(labels, y_score, beta=1.0, pos_label=1):
     """
     Radivojac, P. et al. (2013). A Large-Scale Evaluation of Computational Protein
     Function Prediction. Nature Methods, 10(3), 221-227.
@@ -44,7 +65,7 @@ def fmax_precision_recall_threshold(labels, y_score, beta=1.0, pos_label=1):
     return fmax_score, precision_fmax, recall_fmax, threshold_fmax
 
 
-def try_metric_with_pos_label(y_true, y_pred, metric, pos_label):
+def _try_metric_with_pos_label(y_true, y_pred, metric, pos_label):
     """
     Compute score for a given metric.
     """
@@ -55,7 +76,7 @@ def try_metric_with_pos_label(y_true, y_pred, metric, pos_label):
     return score
 
 
-def scores(y_true, y_pred, metrics):
+def _scores(y_true, y_pred, metrics):
     """
     Compute all metrics for a single set of predictions. Returns a dictionary
     containing metric keys, each paired to a tuple (score, threshold).
@@ -65,7 +86,7 @@ def scores(y_true, y_pred, metrics):
     if metrics is None:
         metrics = {"fmax (minority)": fmax_score, "auc": roc_auc_score}
 
-    pos_label = minority_class(y_true)  # gives value 1 or 0
+    pos_label = _minority_class(y_true)  # gives value 1 or 0
 
     metric_threshold_dict = {}
 
@@ -75,14 +96,14 @@ def scores(y_true, y_pred, metrics):
         if "y_pred" in inspect.signature(metric).parameters:
             # calculate metric for target vector with threshold=0.5
             metric_threshold_dict[metric_key] = (
-                try_metric_with_pos_label(
+                _try_metric_with_pos_label(
                     y_true, (np.array(y_pred) >= 0.5).astype(int), metric, pos_label
                 ),
                 0.5,
             )
         # if y_score parameter exists in metric function then y should be probability vector
         elif "y_score" in inspect.signature(metric).parameters:
-            metric_results = try_metric_with_pos_label(
+            metric_results = _try_metric_with_pos_label(
                 y_true, y_pred, metric, pos_label
             )
             if isinstance(
@@ -95,7 +116,7 @@ def scores(y_true, y_pred, metrics):
     return metric_threshold_dict
 
 
-def scores_matrix(X, labels, metrics):
+def _scores_matrix(X, labels, metrics):
     """
     Calculate metrics and threshold (if applicable) for each column
     (set of predictions) in matrix X
@@ -104,7 +125,7 @@ def scores_matrix(X, labels, metrics):
     scores_dict = {}
     for column in X.columns:
         column_temp = X[column]
-        metrics_per_column = scores(labels, column_temp, metrics)
+        metrics_per_column = _scores(labels, column_temp, metrics)
         # metric_names = list(metrics.keys())
         for metric_key in metrics_per_column.keys():
             if not (metric_key in scores_dict):
@@ -115,13 +136,13 @@ def scores_matrix(X, labels, metrics):
     return scores_dict
 
 
-def create_metric_threshold_dataframes(X, labels, metrics):
+def _create_metric_threshold_dataframes(X, labels, metrics):
     """
     Create a separate dataframe for metrics and thresholds. thresholds_df contains
     NaN if threshold not applicable.
     """
 
-    scores_dict = scores_matrix(X, labels, metrics)
+    scores_dict = _scores_matrix(X, labels, metrics)
 
     metrics_df = pd.DataFrame(columns=X.columns)
     thresholds_df = pd.DataFrame(columns=X.columns)
@@ -130,15 +151,15 @@ def create_metric_threshold_dataframes(X, labels, metrics):
     return metrics_df, thresholds_df
 
 
-def create_metric_threshold_dict(X, labels, metrics):
+def _create_metric_threshold_dict(X, labels, metrics):
     df_dict = {}
-    df_dict["metrics"], df_dict["thresholds"] = create_metric_threshold_dataframes(
+    df_dict["metrics"], df_dict["thresholds"] = _create_metric_threshold_dataframes(
         X, labels, metrics
     )
     return df_dict
 
 
-def base_summary(ensemble_test_dataframes, metrics):
+def _base_summary(ensemble_test_dataframes, metrics):
     """
     Create a base predictor performance summary by concatenating data across test folds
     """
@@ -149,13 +170,13 @@ def base_summary(ensemble_test_dataframes, metrics):
             for df in ensemble_test_dataframes
         ]
     )
-    return create_metric_threshold_dict(ensemble_test_averaged_samples, labels, metrics)
+    return _create_metric_threshold_dict(ensemble_test_averaged_samples, labels, metrics)
 
 
-def ensemble_summary(ensemble_predictions, metrics):
+def _ensemble_summary(ensemble_predictions, metrics):
     X = ensemble_predictions.drop(["labels"], axis=1)
     labels = ensemble_predictions["labels"]
-    return create_metric_threshold_dict(X, labels, metrics)
+    return _create_metric_threshold_dict(X, labels, metrics)
 
 
 # These two functions are an attempt at maximizing/minimizing any metric
diff --git a/eipy/utils.py b/eipy/utils.py
index f81bbff..31ba545 100755
--- a/eipy/utils.py
+++ b/eipy/utils.py
@@ -15,7 +15,7 @@
 bar_format = "{desc}: |{bar}|{percentage:3.0f}%"
 
 
-def minority_class(y_true):
+def _minority_class(y_true):
     if np.bincount(y_true)[0] < np.bincount(y_true)[1]:
         minority_class = 0
     else:
@@ -23,7 +23,7 @@ def minority_class(y_true):
     return minority_class
 
 
-def set_predictor_seeds(base_predictors, random_state):
+def _set_predictor_seeds(base_predictors, random_state):
     for _, v in base_predictors.items():
         if type(v) == Pipeline:
             est_ = list(v.named_steps)[-1]
@@ -33,25 +33,25 @@ def set_predictor_seeds(base_predictors, random_state):
             v.set_params(**{"random_state": random_state})
 
 
-def X_is_dict(X):
+def _X_is_dict(X):
     if isinstance(X, dict):
         return True
     else:
         return False
 
 
-def X_dict_to_numpy(X_dict):
+def _X_dict_to_numpy(X_dict):
     """
     Retrieve feature names and convert arrays to numpy.
     """
     X_dict_numpy = {}
     feature_names = {}
     for key, X in X_dict.items():
-        X_dict_numpy[key], feature_names[key] = X_to_numpy(X)
+        X_dict_numpy[key], feature_names[key] = _X_to_numpy(X)
     return X_dict_numpy, feature_names
 
 
-def X_to_numpy(X):
+def _X_to_numpy(X):
     """
     Return X as a numpy array, with feature names if applicable.
     """
@@ -66,7 +66,7 @@ def X_to_numpy(X):
         )
 
 
-def y_to_numpy(y):
+def _y_to_numpy(y):
     """
     Check y is numpy array and convert if not.
     """
@@ -85,13 +85,13 @@ def y_to_numpy(y):
             or pandas Series."""
         )
 
-    if not is_binary_array(_y):
+    if not _is_binary_array(_y):
         raise ValueError("y must contain binary values.")
 
     return _y
 
 
-def is_binary_array(arr):
+def _is_binary_array(arr):
     if all(x == 0 or x == 1 or x == 0.0 or x == 1.0 for x in arr):
         return True
     else:
@@ -110,7 +110,7 @@ def get_n_splits(self, X, y, groups=None):
         return self.n_splits
 
 
-def safe_predict_proba(model, X):  # uses predict_proba method where possible
+def _safe_predict_proba(model, X):  # uses predict_proba method where possible
     if hasattr(model, "predict_proba"):
         y_pred = model.predict_proba(X)[:, 1]
     else:
@@ -118,12 +118,12 @@ def safe_predict_proba(model, X):  # uses predict_proba method where possible
     return y_pred
 
 
-def random_integers(n_integers=1, seed=42):
+def _random_integers(n_integers=1, seed=42):
     random.seed(seed)
     return random.sample(range(0, 10000), n_integers)
 
 
-def sample(X, y, strategy, random_state):
+def _sample(X, y, strategy, random_state):
     if strategy is None:
         X_resampled, y_resampled = X, y
     elif strategy == "undersampling":  # define sampler
@@ -161,13 +161,13 @@ def sample(X, y, strategy, random_state):
     return X_resampled, y_resampled
 
 
-def retrieve_X_y(labelled_data):
+def _retrieve_X_y(labelled_data):
     X = labelled_data.drop(columns=["labels"], level=0)
     y = np.ravel(labelled_data["labels"])
     return X, y
 
 
-def append_modality(current_data, modality_data, model_building=False):
+def _append_modality(current_data, modality_data, model_building=False):
     if current_data is None:
         combined_dataframe = modality_data
     else: