From 798f373146d4dd1a4d1c735f3ad1732ab040be53 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Tue, 8 Feb 2022 15:57:53 +0200 Subject: [PATCH] Improve project structure Renamed the main source code path from ml-conversational-analytic-tool to mcat in order to make it a package. Long package names are discouraged and dashes are invalid symbols (see PEP8). Moved tests out of the mcat package and made it a package too. Updated import paths. Updated the auto-generated docs and the README. Signed-off-by: Teodora Sechkova --- README.md | 14 +- .../baseCNN.html | 38 ++--- .../baseLSTM.html | 38 ++--- .../commentAnalysis.html | 34 ++-- .../featureVector.html | 24 +-- .../githubDataExtraction.html | 108 ++++++------- .../github_data.html | 24 +-- .../index.html | 45 +++--- .../preProcessedDataset.html | 42 ++--- .../run.html | 14 +- .../utils.html | 48 +++--- .../runDataExtraction.html | 148 ------------------ mcat/__init__.py | 0 .../baseCNN.py | 0 .../baseLSTM.py | 0 .../commentAnalysis.py | 0 .../featureVector.py | 0 .../githubDataExtraction.py | 0 .../github_data.py | 0 .../preProcessedDataset.py | 0 .../run.py | 0 .../utils.py | 0 tests/__init__.py | 0 .../tests => tests}/test_commentAnalysis.py | 2 +- 24 files changed, 211 insertions(+), 368 deletions(-) rename docs/{ml-conversational-analytic-tool => mcat}/baseCNN.html (89%) rename docs/{ml-conversational-analytic-tool => mcat}/baseLSTM.html (89%) rename docs/{ml-conversational-analytic-tool => mcat}/commentAnalysis.html (88%) rename docs/{ml-conversational-analytic-tool => mcat}/featureVector.html (95%) rename docs/{ml-conversational-analytic-tool => mcat}/githubDataExtraction.html (84%) rename docs/{ml-conversational-analytic-tool => mcat}/github_data.html (91%) rename docs/{ml-conversational-analytic-tool => mcat}/index.html (65%) rename docs/{ml-conversational-analytic-tool => mcat}/preProcessedDataset.html (91%) rename docs/{ml-conversational-analytic-tool => mcat}/run.html (94%) rename docs/{ml-conversational-analytic-tool => mcat}/utils.html (80%) delete mode 100644 docs/ml-conversational-analytic-tool/runDataExtraction.html create mode 100644 mcat/__init__.py rename {ml-conversational-analytic-tool => mcat}/baseCNN.py (100%) rename {ml-conversational-analytic-tool => mcat}/baseLSTM.py (100%) rename {ml-conversational-analytic-tool => mcat}/commentAnalysis.py (100%) rename {ml-conversational-analytic-tool => mcat}/featureVector.py (100%) rename {ml-conversational-analytic-tool => mcat}/githubDataExtraction.py (100%) rename {ml-conversational-analytic-tool => mcat}/github_data.py (100%) rename {ml-conversational-analytic-tool => mcat}/preProcessedDataset.py (100%) rename {ml-conversational-analytic-tool => mcat}/run.py (100%) rename {ml-conversational-analytic-tool => mcat}/utils.py (100%) create mode 100644 tests/__init__.py rename {ml-conversational-analytic-tool/tests => tests}/test_commentAnalysis.py (98%) diff --git a/README.md b/README.md index bfef055..1fb1322 100644 --- a/README.md +++ b/README.md @@ -75,12 +75,10 @@ create isolated Python environment is recommended for this project. #### Testing 6. Run all unit tests ```python - cd ml-conversational-analytic-tool python -m unittest discover -s tests ``` 7. Run an individual unit test ```python - cd ml-conversational-analytic-tool python -m unittest tests/ ``` @@ -107,7 +105,7 @@ export GITACCESS= Run the script by passing in `organization` ```python -python ./ml-conversational-analytic-tool/githubDataExtraction.py +python ./mcat/githubDataExtraction.py ``` - `organization` is the name of the repository owner @@ -119,7 +117,7 @@ python ./ml-conversational-analytic-tool/githubDataExtraction.py `github_data.py` prepares your data for annotation use. Run the script by passing in path to `rawdatafile`. ```python -python ./ml-conversational-analytic-tool/github_data.py --name +python ./mcat/github_data.py --name ``` - `rawdatafile` is location of raw data csv @@ -137,7 +135,7 @@ include sentiment and code blocks. `Words` file contains words important in meas constructiveness. This functionality could be used instead of manual annotation. ```python -python ./ml-conversational-analytic-tool/featureVector.py --words --name +python ./mcat/featureVector.py --words --name ``` - `words` (optional) path to the words file - `name` (optional) name of the output file. @@ -154,7 +152,7 @@ There are two models available for training To train, run the script with required parameters path to `annotated_filename`, `dataset_filename`, `model`, and `outcome`. ```python -python ./ml-conversational-analytic-tool/run.py +python ./mcat/run.py ``` - `annotated_filename` is the location of the annotated dataset file @@ -176,12 +174,12 @@ communication, we welcome your contributions! ## Documentation Auto-generated API documentation can be found in -[docs/ml-conversational-analytic-tool](./docs/ml-conversational-analytic-tool) directory. +[docs/mcat](./docs/mcat) directory. Run the following command to update the API documentation ```python -PYTHONPATH=./ml-conversational-analytic-tool pdoc --html --output-dir docs ml-conversational-analytic-tool +PYTHONPATH=./mcat pdoc --html --output-dir docs mcat ``` ## Blog Posts diff --git a/docs/ml-conversational-analytic-tool/baseCNN.html b/docs/mcat/baseCNN.html similarity index 89% rename from docs/ml-conversational-analytic-tool/baseCNN.html rename to docs/mcat/baseCNN.html index a1b771b..cb943ed 100644 --- a/docs/ml-conversational-analytic-tool/baseCNN.html +++ b/docs/mcat/baseCNN.html @@ -4,7 +4,7 @@ -ml-conversational-analytic-tool.baseCNN API documentation +mcat.baseCNN API documentation @@ -19,7 +19,7 @@
-

Module ml-conversational-analytic-tool.baseCNN

+

Module mcat.baseCNN

@@ -129,7 +129,7 @@

Module ml-conversational-analytic-tool.baseCNN

Classes

-
+
class BaseCNN
@@ -224,7 +224,7 @@

Classes

Methods

-
+
def explain(self, obs)
@@ -241,7 +241,7 @@

Methods

return output
-
+
def makeModel(self, input_shape)
@@ -265,7 +265,7 @@

Methods

self.model_ready = True
-
+
def makeModel2D(self, input_shape)
@@ -290,7 +290,7 @@

Methods

self.model_ready = True
-
+
def predict(self, obs, labels=False)
@@ -309,7 +309,7 @@

Methods

return predictions
-
+
def saveModel(self, name, version)
@@ -322,7 +322,7 @@

Methods

self.model.save("{}/{}".format(name, version))
-
+
def scoreModel(self, obs, res)
@@ -343,7 +343,7 @@

Methods

return evaluation
-
+
def trainModel(self, obs, res, val_split=0.3, val_set=None, epochs=10, batch_size=32)
@@ -380,21 +380,21 @@

Index

  • Super-module

  • Classes

    diff --git a/docs/ml-conversational-analytic-tool/baseLSTM.html b/docs/mcat/baseLSTM.html similarity index 89% rename from docs/ml-conversational-analytic-tool/baseLSTM.html rename to docs/mcat/baseLSTM.html index e8fc887..c34d4b5 100644 --- a/docs/ml-conversational-analytic-tool/baseLSTM.html +++ b/docs/mcat/baseLSTM.html @@ -4,7 +4,7 @@ -ml-conversational-analytic-tool.baseLSTM API documentation +mcat.baseLSTM API documentation @@ -19,7 +19,7 @@
    -

    Module ml-conversational-analytic-tool.baseLSTM

    +

    Module mcat.baseLSTM

    @@ -153,7 +153,7 @@

    Module ml-conversational-analytic-tool.baseLSTM

    Classes

    -
    +
    class BaseLSTM
    @@ -272,7 +272,7 @@

    Classes

    Methods

    -
    +
    def explain(self, obs)
    @@ -290,7 +290,7 @@

    Methods

    return imp
    -
    +
    def makeModel(self, input_shape)
    @@ -313,7 +313,7 @@

    Methods

    self.model_ready = True
    -
    +
    def makeModel2D(self, input_shape)
    @@ -355,7 +355,7 @@

    Methods

    self.model_ready = True
    -
    +
    def predict(self, obs, labels=False)
    @@ -371,7 +371,7 @@

    Methods

    return predictions
    -
    +
    def saveModel(self, name, version)
    @@ -384,7 +384,7 @@

    Methods

    self.model.save("{}/{}".format(name, version))
    -
    +
    def scoreModel(self, obs, res)
    @@ -405,7 +405,7 @@

    Methods

    return evaluation
    -
    +
    def trainModel(self, obs, res, val_split=0.3, val_set=None, epochs=10, batch_size=32)
    @@ -441,21 +441,21 @@

    Index

    • Super-module

    • Classes

      diff --git a/docs/ml-conversational-analytic-tool/commentAnalysis.html b/docs/mcat/commentAnalysis.html similarity index 88% rename from docs/ml-conversational-analytic-tool/commentAnalysis.html rename to docs/mcat/commentAnalysis.html index 1593de3..a57652f 100644 --- a/docs/ml-conversational-analytic-tool/commentAnalysis.html +++ b/docs/mcat/commentAnalysis.html @@ -4,7 +4,7 @@ -ml-conversational-analytic-tool.commentAnalysis API documentation +mcat.commentAnalysis API documentation @@ -19,7 +19,7 @@
      -

      Module ml-conversational-analytic-tool.commentAnalysis

      +

      Module mcat.commentAnalysis

      @@ -135,7 +135,7 @@

      Module ml-conversational-analytic-tool.commentAnalysis

      Classes

      -
      +
      class CommentAnalyzer (words)
      @@ -221,7 +221,7 @@

      Classes

      Methods

      -
      +
      def analyzeComment(self, comment)
      @@ -247,7 +247,7 @@

      Methods

      return result
      -
      +
      def changeWords(self, words)
      @@ -263,7 +263,7 @@

      Methods

      self.word_count = {word: 0 for word in words}
      -
      +
      def countWords(self, comment)
      @@ -288,7 +288,7 @@

      Methods

      return current_word_count
      -
      +
      def getCodeBlockCount(self, comment)
      @@ -312,7 +312,7 @@

      Methods

      return int(count / 2) # Divide by 2 since pairs
      -
      +
      def getSentiment(self, comment)
      @@ -330,7 +330,7 @@

      Methods

      return self.vader_sentiment.polarity_scores(comment)["compound"]
      -
      +
      def preProcess(self, text)
      @@ -367,20 +367,20 @@

      Index

      • Super-module

      • Classes

        diff --git a/docs/ml-conversational-analytic-tool/featureVector.html b/docs/mcat/featureVector.html similarity index 95% rename from docs/ml-conversational-analytic-tool/featureVector.html rename to docs/mcat/featureVector.html index 3783667..75cc7d0 100644 --- a/docs/ml-conversational-analytic-tool/featureVector.html +++ b/docs/mcat/featureVector.html @@ -4,7 +4,7 @@ -ml-conversational-analytic-tool.featureVector API documentation +mcat.featureVector API documentation @@ -19,7 +19,7 @@
        -

        Module ml-conversational-analytic-tool.featureVector

        +

        Module mcat.featureVector

        @@ -37,7 +37,7 @@

        Module ml-conversational-analytic-tool.featureVectorModule ml-conversational-analytic-tool.featureVector

        Classes

        -
        +
        class Featurizer (retain_features, analysis_features)
        @@ -343,7 +343,7 @@

        Classes

        Methods

        -
        +
        def formFeatures(self)
        @@ -442,7 +442,7 @@

        Methods

        return export_df
        -
        +
        def readRawData(self, filename)
        @@ -466,7 +466,7 @@

        Methods

        lambda comment: utils.string_to_dict(comment))
        -
        +
        def setupCommentAnalyzer(self, filename)
        @@ -505,17 +505,17 @@

        Index

        • Super-module

        • Classes

          diff --git a/docs/ml-conversational-analytic-tool/githubDataExtraction.html b/docs/mcat/githubDataExtraction.html similarity index 84% rename from docs/ml-conversational-analytic-tool/githubDataExtraction.html rename to docs/mcat/githubDataExtraction.html index 93adbb9..3d5bc6b 100644 --- a/docs/ml-conversational-analytic-tool/githubDataExtraction.html +++ b/docs/mcat/githubDataExtraction.html @@ -4,7 +4,7 @@ -ml-conversational-analytic-tool.githubDataExtraction API documentation +mcat.githubDataExtraction API documentation @@ -19,7 +19,7 @@
          -

          Module ml-conversational-analytic-tool.githubDataExtraction

          +

          Module mcat.githubDataExtraction

          @@ -37,6 +37,8 @@

          Module ml-conversational-analytic-tool.githubDataExtract from github import Github from github.GithubException import RateLimitExceededException +import utils + class GithubDataExtractor: def __init__(self, access_token): @@ -44,7 +46,7 @@

          Module ml-conversational-analytic-tool.githubDataExtract Constructor requires an access token to start a Github session, and specifies instance variables """ self.g_ses = Github(access_token) # Github object is used as a channel to the Github API - self.current_repo = None # Current Opended Repo + self.current_repo = None # Current Opened Repo self.reaction_flag = False self.repo_opened = False # Flag to store state of repo as opened (True) or closed (False) self.repo_name = "" @@ -59,9 +61,9 @@

          Module ml-conversational-analytic-tool.githubDataExtract self.repo_opened = True self.repo_name = repo_name self.organization = organization - print("Opened repo {} - {}".format(repo_name, organization)) + print("Opened repo {}/{}".format(organization, repo_name)) - def getAllPulls(self, name="", reaction_flag=False, export_to_csv=True): + def getAllPulls(self, reaction_flag=False): """ Method to form a dataframe containing pull information. Parameters: name - name of exported csv file, export - if the dataframe should be exported to csv. Returns: Dataframe with pull data @@ -71,15 +73,7 @@

          Module ml-conversational-analytic-tool.githubDataExtract pull_data = [] pull_data.extend(self.getPullsByState('open')) # Access all open pulls pull_data.extend(self.getPullsByState('closed')) # Access all closed pulls - pull_df = pd.DataFrame(pull_data) # Convert list of dictionaries to dataframe - if export_to_csv: # Export to csv if flag is true - if not os.path.exists('exports'): - os.mkdir('exports') - if name == "": # Check if name is provided - pull_df.to_csv("exports/" + self.organization + "_" + self.repo_name + ".csv") - else: - pull_df.to_csv("exports/" + name) - return pull_df + return pd.DataFrame(pull_data) # Return list of dictionaries converted to dataframe print("Please open a Repo") def getPullsByState(self, state): @@ -186,23 +180,31 @@

          Module ml-conversational-analytic-tool.githubDataExtract if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Create csv for all pulls in repo') + parser = argparse.ArgumentParser(description='Create CSV/s for all pulls in repo/s') parser.add_argument('organization', help='Organization the repo belongs to.') - parser.add_argument('reponame', help='Name of repo') - parser.add_argument('-reactions', action='store_true', default=False, help='Flag to extract reactions') - parser.add_argument('--filename', help='Name of file') - # parser.add_argument('accesstoken', help='Github access token') + parser.add_argument('-R', '--repo', help='Name of repo.') + parser.add_argument('--reactions', action='store_true', default=False, help='Flag to extract reactions') + parser.add_argument('-n', "--name", help='Output file name. If not specified, the name is constructed like this: ' + '<organization>_<repo>.csv') args = parser.parse_args() ACCESS_TOKEN = os.environ["GITACCESS"] # Access Github token from environment for security purposes extractor = GithubDataExtractor(ACCESS_TOKEN) # Create object - extractor.openRepo(args.organization, args.reponame) # Open repo - # Extract all pulls and export them to .csv - if args.filename: - extractor.getAllPulls(args.filename, args.reactions) + if args.repo is None: + # Extract data for all repositories in organization + repos = extractor.g_ses.get_organization(args.organization).get_repos() + for repo in repos: + extractor.openRepo(args.organization, repo.name) + df = extractor.getAllPulls(args.reactions) + file_name = utils.construct_file_name(None, args.organization, repo.name) + utils.export_to_cvs(df, file_name) else: - extractor.getAllPulls("", args.reactions) + # Extract data for an individual repository + extractor.openRepo(args.organization, args.repo) + df = extractor.getAllPulls(args.reactions) + file_name = utils.construct_file_name(args.name, args.organization, args.repo) + utils.export_to_cvs(df, file_name)

          @@ -214,7 +216,7 @@

          Module ml-conversational-analytic-tool.githubDataExtract

          Classes

          -
          +
          class GithubDataExtractor (access_token)
          @@ -230,7 +232,7 @@

          Classes

          Constructor requires an access token to start a Github session, and specifies instance variables """ self.g_ses = Github(access_token) # Github object is used as a channel to the Github API - self.current_repo = None # Current Opended Repo + self.current_repo = None # Current Opened Repo self.reaction_flag = False self.repo_opened = False # Flag to store state of repo as opened (True) or closed (False) self.repo_name = "" @@ -245,9 +247,9 @@

          Classes

          self.repo_opened = True self.repo_name = repo_name self.organization = organization - print("Opened repo {} - {}".format(repo_name, organization)) + print("Opened repo {}/{}".format(organization, repo_name)) - def getAllPulls(self, name="", reaction_flag=False, export_to_csv=True): + def getAllPulls(self, reaction_flag=False): """ Method to form a dataframe containing pull information. Parameters: name - name of exported csv file, export - if the dataframe should be exported to csv. Returns: Dataframe with pull data @@ -257,15 +259,7 @@

          Classes

          pull_data = [] pull_data.extend(self.getPullsByState('open')) # Access all open pulls pull_data.extend(self.getPullsByState('closed')) # Access all closed pulls - pull_df = pd.DataFrame(pull_data) # Convert list of dictionaries to dataframe - if export_to_csv: # Export to csv if flag is true - if not os.path.exists('exports'): - os.mkdir('exports') - if name == "": # Check if name is provided - pull_df.to_csv("exports/" + self.organization + "_" + self.repo_name + ".csv") - else: - pull_df.to_csv("exports/" + name) - return pull_df + return pd.DataFrame(pull_data) # Return list of dictionaries converted to dataframe print("Please open a Repo") def getPullsByState(self, state): @@ -372,8 +366,8 @@

          Classes

          Methods

          -
          -def getAllPulls(self, name='', reaction_flag=False, export_to_csv=True) +
          +def getAllPulls(self, reaction_flag=False)

          Method to form a dataframe containing pull information. Parameters: name - name of exported csv file, @@ -382,7 +376,7 @@

          Methods

          Expand source code -
          def getAllPulls(self, name="", reaction_flag=False, export_to_csv=True):
          +
          def getAllPulls(self, reaction_flag=False):
               """
               Method to form a dataframe containing pull information. Parameters: name - name of exported csv file,
               export - if the dataframe should be exported to csv. Returns: Dataframe with pull data
          @@ -392,19 +386,11 @@ 

          Methods

          pull_data = [] pull_data.extend(self.getPullsByState('open')) # Access all open pulls pull_data.extend(self.getPullsByState('closed')) # Access all closed pulls - pull_df = pd.DataFrame(pull_data) # Convert list of dictionaries to dataframe - if export_to_csv: # Export to csv if flag is true - if not os.path.exists('exports'): - os.mkdir('exports') - if name == "": # Check if name is provided - pull_df.to_csv("exports/" + self.organization + "_" + self.repo_name + ".csv") - else: - pull_df.to_csv("exports/" + name) - return pull_df + return pd.DataFrame(pull_data) # Return list of dictionaries converted to dataframe print("Please open a Repo")
          -
          +
          def getPullFeatures(self, pull)
          @@ -449,7 +435,7 @@

          Methods

          return pull_dict
          -
          +
          def getPullsByState(self, state)
          @@ -482,7 +468,7 @@

          Methods

          return pull_data
          -
          +
          def listOfComments(self, comments)
          @@ -539,7 +525,7 @@

          Methods

          return list_comments
          -
          +
          def openRepo(self, organization, repo_name)
          @@ -559,7 +545,7 @@

          Methods

          self.repo_opened = True self.repo_name = repo_name self.organization = organization - print("Opened repo {} - {}".format(repo_name, organization))
          + print("Opened repo {}/{}".format(organization, repo_name))
          @@ -575,19 +561,19 @@

          Index

          • Super-module

          • Classes

            diff --git a/docs/ml-conversational-analytic-tool/github_data.html b/docs/mcat/github_data.html similarity index 91% rename from docs/ml-conversational-analytic-tool/github_data.html rename to docs/mcat/github_data.html index 4aa07b6..8596c57 100644 --- a/docs/ml-conversational-analytic-tool/github_data.html +++ b/docs/mcat/github_data.html @@ -4,7 +4,7 @@ -ml-conversational-analytic-tool.github_data API documentation +mcat.github_data API documentation @@ -19,7 +19,7 @@
            -

            Module ml-conversational-analytic-tool.github_data

            +

            Module mcat.github_data

            @@ -32,7 +32,7 @@

            Module ml-conversational-analytic-tool.github_dataModule ml-conversational-analytic-tool.github_data

            Classes

            -
            +
            class GitHubData (raw_filename)
            @@ -193,7 +193,7 @@

            Classes

            Methods

            -
            +
            def merge_comments(self, row)
            @@ -221,7 +221,7 @@

            Methods

            return conversation.encode("ascii", "ignore").decode()
            -
            +
            def read_raw_data(self)
            @@ -241,7 +241,7 @@

            Methods

            self.raw_data['Review_Comments'] = self.raw_data['Review_Comments'].apply(lambda comment: utils.string_to_dict(comment))
            -
            +
            def reformat_data(self)
            @@ -287,17 +287,17 @@

            Index

            • Super-module

            • Classes

              diff --git a/docs/ml-conversational-analytic-tool/index.html b/docs/mcat/index.html similarity index 65% rename from docs/ml-conversational-analytic-tool/index.html rename to docs/mcat/index.html index 1baaf69..97ebcf8 100644 --- a/docs/ml-conversational-analytic-tool/index.html +++ b/docs/mcat/index.html @@ -4,7 +4,7 @@ -ml-conversational-analytic-tool API documentation +mcat API documentation @@ -19,50 +19,46 @@
              -

              Namespace ml-conversational-analytic-tool

              +

              Package mcat

              Sub-modules

              -
              ml-conversational-analytic-tool.baseCNN
              +
              mcat.baseCNN
              -
              ml-conversational-analytic-tool.baseLSTM
              +
              mcat.baseLSTM
              -
              ml-conversational-analytic-tool.commentAnalysis
              +
              mcat.commentAnalysis
              -
              ml-conversational-analytic-tool.featureVector
              +
              mcat.featureVector
              -
              ml-conversational-analytic-tool.githubDataExtraction
              +
              mcat.githubDataExtraction
              -
              ml-conversational-analytic-tool.github_data
              +
              mcat.github_data
              -
              ml-conversational-analytic-tool.preProcessedDataset
              +
              mcat.preProcessedDataset
              -
              ml-conversational-analytic-tool.run
              +
              mcat.run
              -
              ml-conversational-analytic-tool.runDataExtraction
              -
              -
              -
              -
              ml-conversational-analytic-tool.utils
              +
              mcat.utils
              @@ -83,16 +79,15 @@

              Index

              diff --git a/docs/ml-conversational-analytic-tool/preProcessedDataset.html b/docs/mcat/preProcessedDataset.html similarity index 91% rename from docs/ml-conversational-analytic-tool/preProcessedDataset.html rename to docs/mcat/preProcessedDataset.html index c810642..c773b88 100644 --- a/docs/ml-conversational-analytic-tool/preProcessedDataset.html +++ b/docs/mcat/preProcessedDataset.html @@ -4,7 +4,7 @@ -ml-conversational-analytic-tool.preProcessedDataset API documentation +mcat.preProcessedDataset API documentation @@ -19,7 +19,7 @@
              -

              Module ml-conversational-analytic-tool.preProcessedDataset

              +

              Module mcat.preProcessedDataset

              @@ -291,7 +291,7 @@

              Module ml-conversational-analytic-tool.preProcessedDatas

              Classes

              -
              +
              class PreProcessedDataset (vocab_size=1000, no_tokens=512, max_pull_length=100)
              @@ -548,7 +548,7 @@

              Classes

              Methods

              -
              +
              def encode(self, utterances)
              @@ -580,7 +580,7 @@

              Methods

              return np.array(encoded_utterances)
              -
              +
              def encodeData(self)
              @@ -612,7 +612,7 @@

              Methods

              self.curr_max_length = max([len(x) for x in self.all_encoded_utterances])
              -
              +
              def getRes(self, outcome=None)
              @@ -630,7 +630,7 @@

              Methods

              return pd.DataFrame(data=self.results)
              -
              +
              def getRoleAgnosticMatrix(self, outcome=None, padPull=True)
              @@ -661,7 +661,7 @@

              Methods

              return obs, res
              -
              +
              def getRoleMatrix(self, outcome=None, padPull=True)
              @@ -726,7 +726,7 @@

              Methods

              return obs, res
              -
              +
              def loadAnnotatedData(self, filename)
              @@ -743,7 +743,7 @@

              Methods

              self.annotated_data_open = True
              -
              +
              def loadDataset(self, filename)
              @@ -760,7 +760,7 @@

              Methods

              self.dataset_open = True
              -
              +
              def setupPreProcess(self, annotated_filename, dataset_filename)
              @@ -806,22 +806,22 @@

              Index

              -
              +
              def save_model(model, name, version)
              @@ -212,13 +212,13 @@

              Index

              diff --git a/docs/ml-conversational-analytic-tool/utils.html b/docs/mcat/utils.html similarity index 80% rename from docs/ml-conversational-analytic-tool/utils.html rename to docs/mcat/utils.html index 654e412..6d716f6 100644 --- a/docs/ml-conversational-analytic-tool/utils.html +++ b/docs/mcat/utils.html @@ -4,7 +4,7 @@ -ml-conversational-analytic-tool.utils API documentation +mcat.utils API documentation @@ -19,7 +19,7 @@
              -

              Module ml-conversational-analytic-tool.utils

              +

              Module mcat.utils

              @@ -37,7 +37,7 @@

              Module ml-conversational-analytic-tool.utils

              def export_to_cvs(export_df: pd.DataFrame, name): """ - Export DataFrame into csv file with name constructed based on the given name or default_name + Export DataFrame into csv file with given name """ if not os.path.exists(EXPORTS_DIR): @@ -49,14 +49,19 @@

              Module ml-conversational-analytic-tool.utils

              print("Output file: ", os.path.abspath(file)) -def construct_file_name(name, raw_datafile, suffix): +def construct_file_name(name, component_a, component_b, separator="_"): + """ + Construct output file name if `name` is not provided explicitly. + File output name is constructed based on component_a, component_b and separator input parameters : + <component_a><separator><component_b>.csv + """ if name: _, file_extension = os.path.splitext(name) if not file_extension: name = name + ".csv" return name else: - return Path(raw_datafile).stem + suffix + ".csv" + return "{}{}{}.csv".format(Path(component_a).stem, separator, component_b) def string_to_dict(string): @@ -74,37 +79,44 @@

              Module ml-conversational-analytic-tool.utils

              Functions

              -
              -def construct_file_name(name, raw_datafile, suffix) +
              +def construct_file_name(name, component_a, component_b, separator='_')
              -
              +

              Construct output file name if name is not provided explicitly. +File output name is constructed based on component_a, component_b and separator input parameters : +.csv

              Expand source code -
              def construct_file_name(name, raw_datafile, suffix):
              +
              def construct_file_name(name, component_a, component_b, separator="_"):
              +    """
              +    Construct output file name if `name` is not provided explicitly.
              +    File output name is constructed based on component_a, component_b and separator input parameters :
              +    <component_a><separator><component_b>.csv
              +    """
                   if name:
                       _, file_extension = os.path.splitext(name)
                       if not file_extension:
                           name = name + ".csv"
                       return name
                   else:
              -        return Path(raw_datafile).stem + suffix + ".csv"
              + return "{}{}{}.csv".format(Path(component_a).stem, separator, component_b)
              -
              +
              def export_to_cvs(export_df: pandas.core.frame.DataFrame, name)
              -

              Export DataFrame into csv file with name constructed based on the given name or default_name

              +

              Export DataFrame into csv file with given name

              Expand source code
              def export_to_cvs(export_df: pd.DataFrame, name):
                   """
              -    Export DataFrame into csv file with name constructed based on the given name or default_name
              +    Export DataFrame into csv file with given name
                   """
               
                   if not os.path.exists(EXPORTS_DIR):
              @@ -116,7 +128,7 @@ 

              Functions

              print("Output file: ", os.path.abspath(file))
              -
              +
              def string_to_dict(string)
              @@ -146,14 +158,14 @@

              Index

              diff --git a/docs/ml-conversational-analytic-tool/runDataExtraction.html b/docs/ml-conversational-analytic-tool/runDataExtraction.html deleted file mode 100644 index 8d7d03b..0000000 --- a/docs/ml-conversational-analytic-tool/runDataExtraction.html +++ /dev/null @@ -1,148 +0,0 @@ - - - - - - -ml-conversational-analytic-tool.runDataExtraction API documentation - - - - - - - - - - - -
              -
              -
              -

              Module ml-conversational-analytic-tool.runDataExtraction

              -
              -
              -
              - -Expand source code - -
              # Copyright 2021 VMware, Inc.
              -# SPDX-License-Identifier: Apache-2.0
              -
              -import argparse
              -import os
              -
              -from githubDataExtraction import GithubDataExtractor
              -
              -
              -def getRepos(access_token, organization, reaction):
              -    """
              -    Method to extract data for all repositories in organization
              -    """
              -    extractor = GithubDataExtractor(access_token)  # Create object
              -    repos = extractor.g_ses.get_organization(organization).get_repos()
              -    for repo in repos:
              -        print("Starting: {}".format(repo.name))
              -        extractor.openRepo(organization, repo.name)
              -        extractor.getAllPulls("", reaction)
              -
              -
              -def getRepo(access_token, organization, reponame, reaction):
              -    """
              -    Method to extract data for an individual repository
              -    """
              -    extractor = GithubDataExtractor(access_token)  # Create object 
              -    extractor.openRepo(organization, reponame)
              -    extractor.getAllPulls("", reaction)
              -
              -
              -if __name__ == "__main__":
              -    parser = argparse.ArgumentParser(description='Create csv for all pulls in each repo for the organzation')
              -    parser.add_argument('organization', help='Organization the repo belongs to.')
              -    parser.add_argument('repo', help='Repo name or all if all repos in organization')
              -    parser.add_argument('-reactions', action='store_true', default=False, help='Flag to extract reactions')
              -
              -    args = parser.parse_args()
              -    ACCESS_TOKEN = os.environ["GITACCESS"]  # Access Github token from environment for security purposes
              -    if args.repo == 'all':
              -        getRepos(ACCESS_TOKEN, args.organization, args.reactions)
              -    else:
              -        getRepo(ACCESS_TOKEN, args.organization, args.repo, args.reactions)
              -
              -
              -
              -
              -
              -
              -
              -

              Functions

              -
              -
              -def getRepo(access_token, organization, reponame, reaction) -
              -
              -

              Method to extract data for an individual repository

              -
              - -Expand source code - -
              def getRepo(access_token, organization, reponame, reaction):
              -    """
              -    Method to extract data for an individual repository
              -    """
              -    extractor = GithubDataExtractor(access_token)  # Create object 
              -    extractor.openRepo(organization, reponame)
              -    extractor.getAllPulls("", reaction)
              -
              -
              -
              -def getRepos(access_token, organization, reaction) -
              -
              -

              Method to extract data for all repositories in organization

              -
              - -Expand source code - -
              def getRepos(access_token, organization, reaction):
              -    """
              -    Method to extract data for all repositories in organization
              -    """
              -    extractor = GithubDataExtractor(access_token)  # Create object
              -    repos = extractor.g_ses.get_organization(organization).get_repos()
              -    for repo in repos:
              -        print("Starting: {}".format(repo.name))
              -        extractor.openRepo(organization, repo.name)
              -        extractor.getAllPulls("", reaction)
              -
              -
              -
              -
              -
              -
              -
              - -
              - - - \ No newline at end of file diff --git a/mcat/__init__.py b/mcat/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ml-conversational-analytic-tool/baseCNN.py b/mcat/baseCNN.py similarity index 100% rename from ml-conversational-analytic-tool/baseCNN.py rename to mcat/baseCNN.py diff --git a/ml-conversational-analytic-tool/baseLSTM.py b/mcat/baseLSTM.py similarity index 100% rename from ml-conversational-analytic-tool/baseLSTM.py rename to mcat/baseLSTM.py diff --git a/ml-conversational-analytic-tool/commentAnalysis.py b/mcat/commentAnalysis.py similarity index 100% rename from ml-conversational-analytic-tool/commentAnalysis.py rename to mcat/commentAnalysis.py diff --git a/ml-conversational-analytic-tool/featureVector.py b/mcat/featureVector.py similarity index 100% rename from ml-conversational-analytic-tool/featureVector.py rename to mcat/featureVector.py diff --git a/ml-conversational-analytic-tool/githubDataExtraction.py b/mcat/githubDataExtraction.py similarity index 100% rename from ml-conversational-analytic-tool/githubDataExtraction.py rename to mcat/githubDataExtraction.py diff --git a/ml-conversational-analytic-tool/github_data.py b/mcat/github_data.py similarity index 100% rename from ml-conversational-analytic-tool/github_data.py rename to mcat/github_data.py diff --git a/ml-conversational-analytic-tool/preProcessedDataset.py b/mcat/preProcessedDataset.py similarity index 100% rename from ml-conversational-analytic-tool/preProcessedDataset.py rename to mcat/preProcessedDataset.py diff --git a/ml-conversational-analytic-tool/run.py b/mcat/run.py similarity index 100% rename from ml-conversational-analytic-tool/run.py rename to mcat/run.py diff --git a/ml-conversational-analytic-tool/utils.py b/mcat/utils.py similarity index 100% rename from ml-conversational-analytic-tool/utils.py rename to mcat/utils.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ml-conversational-analytic-tool/tests/test_commentAnalysis.py b/tests/test_commentAnalysis.py similarity index 98% rename from ml-conversational-analytic-tool/tests/test_commentAnalysis.py rename to tests/test_commentAnalysis.py index a285a9a..94dc4c4 100644 --- a/ml-conversational-analytic-tool/tests/test_commentAnalysis.py +++ b/tests/test_commentAnalysis.py @@ -1,7 +1,7 @@ import unittest import unittest.mock -import commentAnalysis +from mcat import commentAnalysis class TestCommentAnalysis(unittest.TestCase):