diff --git a/.env.sample b/.env.sample index 770c34c..e3f55df 100644 --- a/.env.sample +++ b/.env.sample @@ -1,5 +1,4 @@ -ES_HOST=http://localhost -ES_PORT=9200 +ES_URL=http://localhost:9200 ES_TIMEOUT=120 # Note, when running as a script from within the scripts/ folder, diff --git a/README.md b/README.md index 76dfe2c..9fdc026 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Currently only able to run the app in a docker image. Ingest is run manually. # Environment Setup 1. Download the large english spacy model from [here](https://spacy.io/models/en), by scrolling down to `en_core_web_lg`, click on release details which will take you to the github page, scroll down to assets and download the tar.gz file, unzip and put folder in `data/` directory 2. `pip3 install -r requirements.txt` (create a virtualenv first if you so choose) -3. Fill in `ES_HOST`, `ES_PORT`, and `NLP_FILE_PATH` in the `.env` file with the appropriate settings +3. Fill in `ES_URL`, and `NLP_FILE_PATH` in the `.env` file with the appropriate settings 4. In VSCode, if you want to debug ingestion locally, create a launch.json file (by going into the debugging panel on the left, and clicking "create a launch.json file"). Then paste the following into your launch.json file: ``` { @@ -48,7 +48,7 @@ Currently only able to run the app in a docker image. Ingest is run manually. 2. `python3 ingestion.py -fs -i <indra_id> -u <es_url>` # Run Ingestion/App (Dumpster Fire: 10.65.18.69): -1. Sync local code to dumpster-fire code because gitlab isn't accessible from dumpster-fire: `rsync -auv wm-curation-recommendation-service/ centos@10.65.18.69:~/wm-curation-recommendation --exclude=.venv/ --exclude=data/ --exclude=.git/ --exclude=.vscode/ --exclude=__pycache__/ --exclude=experiments/ --exclude=scripts/resources/ --exclude='**/*.pkl' --exclude='**/*.json'` +1. Sync local code to dumpster-fire code because gitlab isn't accessible from dumpster-fire: `rsync -auv wm-curation-recommendation-service/ centos@10.65.18.69:~/wm-curation-recommendation --exclude=.venv/ --exclude=data/ --exclude=.git/ --exclude=.vscode/ --exclude=__pycache__/ --exclude=experiments/ --exclude=scripts/resources/ --exclude='**/*.pkl' --exclude='**/*.json'`. If `wm-curation-recommendation-service` is not the name of your folder containing the code, replace it with your root folder's name. 2. Ssh to dumpster fire and run app using instructions above 3. In order to run the app on the server such that it doesn't end when you close your ssh session, you need to run your commands in a tmux session. Run `tmux new -s curation-service`, then `tmux a -t curation-service`. This creates a session that will run even when you exit your ssh session. From within this tmux session, you can run the app using the instructions above. To exit: `ctrl + b, + d` # Quick Docker Tips @@ -123,8 +123,7 @@ curl -H "Content-type:application/json" -XPOST http://<curation_server>:<port>/r { "remove_factors": true, "remove_statements": true, - "es_host": <destination_es>, - "es_port": 9200 + "es_url": <destination_es>:9200 } ' ``` @@ -141,8 +140,7 @@ http://<curation_server>:<port>/recommendation/task/<task_id> ``` curl -H "Content-type: application/json" -XPOST http:<curation_server>:<port>/recommendation/-delta-ingest/<indra_index> -d' { - "es_host": <destination_es>, - "es_port": 9200, + "es_url": <destination_es>:9200, "statement_ids": [new statements from project], "project_id": <projectId> } diff --git a/scripts/ingestion.py b/scripts/ingestion.py index 7b33ef9..c994fad 100644 --- a/scripts/ingestion.py +++ b/scripts/ingestion.py @@ -45,19 +45,14 @@ start_time = time.time() # Create ES connection from args - es_args = args.url.rsplit(':', 1) + es_args = args.url es_user = args.username es_pw = args.password - # needed if we have auth - es_scheme = "https" if es_user is None: es_user = "" - es_scheme = "http" if es_pw is None: es_pw = "" - if es_scheme is None: - es_scheme = "http" - es = Elastic(es_args[0].strip(), es_args[1].strip(), http_auth=(es_user, es_pw), scheme=es_scheme, timeout=60) + es = Elastic(es_args.strip(), http_auth=(es_user, es_pw), timeout=60) try: print(f'Generating recommendations for index: {args.index}') diff --git a/src/elastic/elastic.py b/src/elastic/elastic.py index 9cf6c1e..f5883aa 100644 --- a/src/elastic/elastic.py +++ b/src/elastic/elastic.py @@ -20,19 +20,18 @@ class Elastic: _port = None @staticmethod - def client(host, port, **kwargs): + def client(host, **kwargs): """ Return a new ES client """ - return Elastic(host, port, **kwargs) + return Elastic(host, **kwargs) - def __init__(self, host, port, **kwargs): + def __init__(self, host, **kwargs): """ Initialize the ElasticSearch Client """ self._host = host - self._port = port - self.client = Elasticsearch(host, port=port, verify_certs=False, **kwargs) + self.client = Elasticsearch(host, verify_certs=False, **kwargs) def get_host(self): return self._host diff --git a/src/web/celery/tasks.py b/src/web/celery/tasks.py index d77ea34..ccd5a97 100644 --- a/src/web/celery/tasks.py +++ b/src/web/celery/tasks.py @@ -17,8 +17,7 @@ def compute_recommendations(self, remove_factors, remove_statements, remove_concepts, - es_host, - es_port): + es_url): message, state = ( 'Creating Recommendations', 'PROGRESS' @@ -29,8 +28,7 @@ def compute_recommendations(self, progress(self, state, message) # Ingest - es = Elastic(es_host, es_port, http_auth=(Config.ES_USERNAME, Config.ES_PASSWORD), - scheme=Config.SCHEME, timeout=60) + es = Elastic(es_url, http_auth=(Config.ES_USERNAME, Config.ES_PASSWORD), timeout=60) ingestor = Ingestor( es=es, kb_index=kb_index, diff --git a/src/web/configuration.py b/src/web/configuration.py index 601d9c7..a25cefa 100644 --- a/src/web/configuration.py +++ b/src/web/configuration.py @@ -13,14 +13,9 @@ class Config: ES_USERNAME = os.getenv('ES_USERNAME', '') ES_PASSWORD = os.getenv('ES_PASSWORD', '') - SCHEME = 'http' - if ES_USERNAME != '': - SCHEME = 'https' ES = Elastic( - os.getenv('ES_HOST', ''), - os.getenv('ES_PORT', '9200'), - http_auth=(ES_USERNAME, ES_PASSWORD), - scheme=SCHEME + os.getenv('ES_URL', ''), + http_auth=(ES_USERNAME, ES_PASSWORD) ) DEBUG = True diff --git a/src/web/controllers.py b/src/web/controllers.py index e00b441..b450fcd 100644 --- a/src/web/controllers.py +++ b/src/web/controllers.py @@ -45,11 +45,10 @@ def compute_recommendations(knowledge_base_id): remove_factors = body and bool(body.get('remove_factors')) remove_statements = body and bool(body.get('remove_statements')) remove_concepts = body and bool(body.get('remove_concepts')) - es_host = body.get('es_host') - es_port = body.get('es_port') + es_url = body.get('es_url') - if es_host is None or es_port is None: - raise BadRequest(description="es_host and es_port are required arguments.") + if es_url is None: + raise BadRequest(description="es_url is a required argument.") # Run the Long running ingestion task = tasks.compute_recommendations.delay( @@ -59,8 +58,7 @@ def compute_recommendations(knowledge_base_id): remove_factors=remove_factors, remove_statements=remove_statements, remove_concepts=remove_concepts, - es_host=es_host, - es_port=es_port + es_url=es_url ) return jsonify({ 'task_id': task.id @@ -71,13 +69,12 @@ def compute_recommendations(knowledge_base_id): def compute_delta_recommendations(knowledge_base_id): # Get the params body = request.get_json() - es_host = body.get('es_host') - es_port = body.get('es_port') + es_url = body.get('es_url') project_index = body.get('project_index') statement_ids = body.get('statement_ids') - if es_host is None or es_port is None: - raise BadRequest(description='es_host and es_port are required arguments.') + if es_url is None: + raise BadRequest(description='es_url is a required argument.') if len(statement_ids) == 0: raise BadRequest(description='statement_ids must not be empty.') @@ -90,8 +87,7 @@ def compute_delta_recommendations(knowledge_base_id): remove_factors=False, remove_statements=False, remove_concepts=False, - es_host=es_host, - es_port=es_port + es_url=es_url ) return jsonify({ 'task_id': task.id