Skip to content

Commit

Permalink
Merge branch 'bugs' into 'main'
Browse files Browse the repository at this point in the history
changed es_host/port to es_url, updated documentation

See merge request WM/wm-curation-recommendation!34
  • Loading branch information
Venkat Korapaty committed Feb 2, 2022
2 parents 2ad989a + 0e538ef commit ebd725a
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 43 deletions.
3 changes: 1 addition & 2 deletions .env.sample
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
ES_HOST=http://localhost
ES_PORT=9200
ES_URL=http://localhost:9200
ES_TIMEOUT=120

# Note, when running as a script from within the scripts/ folder,
Expand Down
10 changes: 4 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Currently only able to run the app in a docker image. Ingest is run manually.
# Environment Setup
1. Download the large english spacy model from [here](https://spacy.io/models/en), by scrolling down to `en_core_web_lg`, click on release details which will take you to the github page, scroll down to assets and download the tar.gz file, unzip and put folder in `data/` directory
2. `pip3 install -r requirements.txt` (create a virtualenv first if you so choose)
3. Fill in `ES_HOST`, `ES_PORT`, and `NLP_FILE_PATH` in the `.env` file with the appropriate settings
3. Fill in `ES_URL`, and `NLP_FILE_PATH` in the `.env` file with the appropriate settings
4. In VSCode, if you want to debug ingestion locally, create a launch.json file (by going into the debugging panel on the left, and clicking "create a launch.json file"). Then paste the following into your launch.json file:
```
{
Expand Down Expand Up @@ -48,7 +48,7 @@ Currently only able to run the app in a docker image. Ingest is run manually.
2. `python3 ingestion.py -fs -i <indra_id> -u <es_url>`

# Run Ingestion/App (Dumpster Fire: 10.65.18.69):
1. Sync local code to dumpster-fire code because gitlab isn't accessible from dumpster-fire: `rsync -auv wm-curation-recommendation-service/ [email protected]:~/wm-curation-recommendation --exclude=.venv/ --exclude=data/ --exclude=.git/ --exclude=.vscode/ --exclude=__pycache__/ --exclude=experiments/ --exclude=scripts/resources/ --exclude='**/*.pkl' --exclude='**/*.json'`
1. Sync local code to dumpster-fire code because gitlab isn't accessible from dumpster-fire: `rsync -auv wm-curation-recommendation-service/ [email protected]:~/wm-curation-recommendation --exclude=.venv/ --exclude=data/ --exclude=.git/ --exclude=.vscode/ --exclude=__pycache__/ --exclude=experiments/ --exclude=scripts/resources/ --exclude='**/*.pkl' --exclude='**/*.json'`. If `wm-curation-recommendation-service` is not the name of your folder containing the code, replace it with your root folder's name.
2. Ssh to dumpster fire and run app using instructions above
3. In order to run the app on the server such that it doesn't end when you close your ssh session, you need to run your commands in a tmux session. Run `tmux new -s curation-service`, then `tmux a -t curation-service`. This creates a session that will run even when you exit your ssh session. From within this tmux session, you can run the app using the instructions above. To exit: `ctrl + b, + d`
# Quick Docker Tips
Expand Down Expand Up @@ -123,8 +123,7 @@ curl -H "Content-type:application/json" -XPOST http://<curation_server>:<port>/r
{
"remove_factors": true,
"remove_statements": true,
"es_host": <destination_es>,
"es_port": 9200
"es_url": <destination_es>:9200
}
'
```
Expand All @@ -141,8 +140,7 @@ http://<curation_server>:<port>/recommendation/task/<task_id>
```
curl -H "Content-type: application/json" -XPOST http:<curation_server>:<port>/recommendation/-delta-ingest/<indra_index> -d'
{
"es_host": <destination_es>,
"es_port": 9200,
"es_url": <destination_es>:9200,
"statement_ids": [new statements from project],
"project_id": <projectId>
}
Expand Down
9 changes: 2 additions & 7 deletions scripts/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,14 @@
start_time = time.time()

# Create ES connection from args
es_args = args.url.rsplit(':', 1)
es_args = args.url
es_user = args.username
es_pw = args.password
# needed if we have auth
es_scheme = "https"
if es_user is None:
es_user = ""
es_scheme = "http"
if es_pw is None:
es_pw = ""
if es_scheme is None:
es_scheme = "http"
es = Elastic(es_args[0].strip(), es_args[1].strip(), http_auth=(es_user, es_pw), scheme=es_scheme, timeout=60)
es = Elastic(es_args.strip(), http_auth=(es_user, es_pw), timeout=60)

try:
print(f'Generating recommendations for index: {args.index}')
Expand Down
9 changes: 4 additions & 5 deletions src/elastic/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,18 @@ class Elastic:
_port = None

@staticmethod
def client(host, port, **kwargs):
def client(host, **kwargs):
"""
Return a new ES client
"""
return Elastic(host, port, **kwargs)
return Elastic(host, **kwargs)

def __init__(self, host, port, **kwargs):
def __init__(self, host, **kwargs):
"""
Initialize the ElasticSearch Client
"""
self._host = host
self._port = port
self.client = Elasticsearch(host, port=port, verify_certs=False, **kwargs)
self.client = Elasticsearch(host, verify_certs=False, **kwargs)

def get_host(self):
return self._host
Expand Down
6 changes: 2 additions & 4 deletions src/web/celery/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ def compute_recommendations(self,
remove_factors,
remove_statements,
remove_concepts,
es_host,
es_port):
es_url):
message, state = (
'Creating Recommendations',
'PROGRESS'
Expand All @@ -29,8 +28,7 @@ def compute_recommendations(self,
progress(self, state, message)

# Ingest
es = Elastic(es_host, es_port, http_auth=(Config.ES_USERNAME, Config.ES_PASSWORD),
scheme=Config.SCHEME, timeout=60)
es = Elastic(es_url, http_auth=(Config.ES_USERNAME, Config.ES_PASSWORD), timeout=60)
ingestor = Ingestor(
es=es,
kb_index=kb_index,
Expand Down
9 changes: 2 additions & 7 deletions src/web/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,9 @@ class Config:
ES_USERNAME = os.getenv('ES_USERNAME', '')
ES_PASSWORD = os.getenv('ES_PASSWORD', '')

SCHEME = 'http'
if ES_USERNAME != '':
SCHEME = 'https'
ES = Elastic(
os.getenv('ES_HOST', ''),
os.getenv('ES_PORT', '9200'),
http_auth=(ES_USERNAME, ES_PASSWORD),
scheme=SCHEME
os.getenv('ES_URL', ''),
http_auth=(ES_USERNAME, ES_PASSWORD)
)

DEBUG = True
Expand Down
20 changes: 8 additions & 12 deletions src/web/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,10 @@ def compute_recommendations(knowledge_base_id):
remove_factors = body and bool(body.get('remove_factors'))
remove_statements = body and bool(body.get('remove_statements'))
remove_concepts = body and bool(body.get('remove_concepts'))
es_host = body.get('es_host')
es_port = body.get('es_port')
es_url = body.get('es_url')

if es_host is None or es_port is None:
raise BadRequest(description="es_host and es_port are required arguments.")
if es_url is None:
raise BadRequest(description="es_url is a required argument.")

# Run the Long running ingestion
task = tasks.compute_recommendations.delay(
Expand All @@ -59,8 +58,7 @@ def compute_recommendations(knowledge_base_id):
remove_factors=remove_factors,
remove_statements=remove_statements,
remove_concepts=remove_concepts,
es_host=es_host,
es_port=es_port
es_url=es_url
)
return jsonify({
'task_id': task.id
Expand All @@ -71,13 +69,12 @@ def compute_recommendations(knowledge_base_id):
def compute_delta_recommendations(knowledge_base_id):
# Get the params
body = request.get_json()
es_host = body.get('es_host')
es_port = body.get('es_port')
es_url = body.get('es_url')
project_index = body.get('project_index')
statement_ids = body.get('statement_ids')

if es_host is None or es_port is None:
raise BadRequest(description='es_host and es_port are required arguments.')
if es_url is None:
raise BadRequest(description='es_url is a required argument.')

if len(statement_ids) == 0:
raise BadRequest(description='statement_ids must not be empty.')
Expand All @@ -90,8 +87,7 @@ def compute_delta_recommendations(knowledge_base_id):
remove_factors=False,
remove_statements=False,
remove_concepts=False,
es_host=es_host,
es_port=es_port
es_url=es_url
)
return jsonify({
'task_id': task.id
Expand Down

0 comments on commit ebd725a

Please sign in to comment.