Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ui similar #37

Merged
merged 7 commits into from
Oct 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include flatgov/elasticsearch/*.json
include flatgov/elasticsearch/*.conf
include flatgov/samples/*.txt
recursive-include package *
1 change: 1 addition & 0 deletions _version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.1.1"
File renamed without changes.
16 changes: 7 additions & 9 deletions scripts/billdata.py → flatgovtools/billdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@
from functools import reduce

try:
from . import constants
from . import utils
from flatgovtools import constants, utils
except:
import constants
import utils
from . import constants, utils

logging.basicConfig(filename='billdata.log', filemode='w', level='INFO')
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -43,7 +41,7 @@ def getBillFromDirname(dirName: str) -> str:
if m and m.groups():
return ''.join(list(m.groups()))
else:
return None
return ''


def getTopBillLevel(dirName: str):
Expand Down Expand Up @@ -165,14 +163,14 @@ def loadDataJSON(billNumber: str, congressDataDir = constants.PATH_TO_CONGRESSDA
[congress, billType, numberOfBill, billVersion] = billNumberMatch.groups()
billTypeNumber = billType + numberOfBill
else:
logger.warning('Bill number is not of the correct form (e.g. 116hr200): ' + billNumber)
logger.warning('Bill number is not of the correct form (e.g. 116hr200): {0}'.format(billNumber))
return

dataJSONPath = os.path.join(congressDataDir, congress, 'bills', billType, billTypeNumber, 'data.json')
if os.path.isfile(dataJSONPath):
return loadJSON(dataJSONPath)
else:
logger.warning('No data.json found for: ' + billNumber + ' at ' + dataJSONPath)
logger.warning('No data.json found for: {0} at {1}'.format(billNumber, dataJSONPath))
return

def loadBillsMeta(billMetaPath = constants.PATH_TO_BILLS_META, zip = True):
Expand All @@ -182,13 +180,13 @@ def loadBillsMeta(billMetaPath = constants.PATH_TO_BILLS_META, zip = True):
with gzip.open(billMetaPath + '.gz', 'rt', encoding='utf-8') as zipfile:
billsMeta = json.load(zipfile)
except:
raise Exception('No file at' + billMetaPath + '.gz')
raise Exception('No file at {0}.gz'.format(billMetaPath))
else:
try:
with open(billMetaPath, 'r') as f:
billsMeta = json.load(f)
except:
raise Exception('No file at' + billMetaPath + '.gz')
raise Exception('No file at {0}.gz'.format(billMetaPath))

return billsMeta

Expand Down
48 changes: 43 additions & 5 deletions scripts/constants.py → flatgovtools/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,27 @@
import os
from copy import deepcopy
from re import S
import pkgutil
import json

PATH_SEC_602 = os.path.join('samples', '116hr5150-sec602.txt')
PATH_MAL = os.path.join('samples', 'maralago.txt')
PATH_BILLSECTIONS_JSON = os.path.join('elasticsearch', 'billsections_mapping.json')

try:
SAMPLE_TEXT_602 = str(pkgutil.get_data(__name__, PATH_SEC_602))
SAMPLE_TEXT_MAL = str(pkgutil.get_data(__name__, PATH_MAL))
BILLSECTION_MAPPING = json.loads(pkgutil.get_data(__name__, PATH_BILLSECTIONS_JSON).decode("utf-8"))
except Exception as err:
print(err)
with open(PATH_SEC_602, 'r') as f:
SAMPLE_TEXT_602 = f.read()

with open(PATH_MAL, 'r') as f:
SAMPLE_TEXT_MAL = f.read()

with open(PATH_BILLSECTIONS_JSON, 'r') as f:
BILLSECTION_MAPPING = json.load(f)

PATH_TO_BILLS_META = os.path.join('..', 'billsMeta.json')
PATH_TO_CONGRESSDATA_DIR = os.path.join('..', '..', 'congress', 'data')
Expand Down Expand Up @@ -103,9 +124,15 @@
The Commissioner of Food and Drugs and the Secretary of Agriculture shall establish guidance for food labelers on how to determine quality dates and safety dates for food products.
"""

def getSampleText(text_path: str = '../samples/maralago.txt'):
with open(text_path, 'r') as f:
return f.read()
def getQueryText(text_path: str=''):
if not text_path or text_path == '':
return SAMPLE_TEXT_MAL
else:
with open(text_path, 'r') as f:
queryText = f.read()
if not queryText:
queryText = ''
return queryText

# more like this query (working)
SAMPLE_QUERY_NESTED_MLT = {
Expand Down Expand Up @@ -133,6 +160,17 @@ def getSampleText(text_path: str = '../samples/maralago.txt'):
}

SAMPLE_QUERY_NESTED_MLT_MARALAGO = deepcopy(SAMPLE_QUERY_NESTED_MLT)
SAMPLE_QUERY_NESTED_MLT_MARALAGO['query']['nested']['query']['more_like_this']['like'] = getSampleText()
SAMPLE_QUERY_NESTED_MLT_MARALAGO['query']['nested']['query']['more_like_this']['like'] = getQueryText()
SAMPLE_QUERY_NESTED_MLT_116hr5150sec602 = deepcopy(SAMPLE_QUERY_NESTED_MLT)
SAMPLE_QUERY_NESTED_MLT_116hr5150sec602['query']['nested']['query']['more_like_this']['like'] = getSampleText('../samples/116hr5150-sec602.txt')
SAMPLE_QUERY_NESTED_MLT_116hr5150sec602['query']['nested']['query']['more_like_this']['like'] = SAMPLE_TEXT_602

def makeMLTQuery(queryText: str, queryTextPath: str=''):
if queryTextPath and not queryText:
try:
queryText = getQueryText(queryTextPath)
except Exception as err:
raise Exception('Error getting text from path: {0}'.format(err))

newQuery = deepcopy(SAMPLE_QUERY_NESTED_MLT)
newQuery['query']['nested']['query']['more_like_this']['like'] = queryText
return newQuery
File renamed without changes.
59 changes: 50 additions & 9 deletions scripts/elastic_load.py → flatgovtools/elastic_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@
from collections import OrderedDict

try:
from . import constants
from flatgovtools import constants
except:
import constants
from . import constants

bill_file = "BILLS-116hr1500rh.xml"
bill_file2 = "BILLS-116hr299ih.xml"
PATH_BILL = os.path.join(constants.PATH_TO_CONGRESSDATA_XML_DIR, bill_file)
PATH_BILLSECTIONS_JSON = os.path.join('..', 'elasticsearch', 'billsections_mapping.json')

def getXMLDirByCongress(congress: str ='116', docType: str = 'dtd') -> str:
return os.path.join(constants.PATH_TO_DATA_DIR, congress, docType)
Expand All @@ -23,8 +22,6 @@ def getMapping(map_path):
with open(map_path, 'r') as f:
return json.load(f)

BILLSECTION_MAPPING = getMapping(PATH_BILLSECTIONS_JSON)

def getText(item):
if item is None:
return ''
Expand All @@ -36,7 +33,8 @@ def getText(item):
except:
return ''

def createIndex(index: str='billsections', body: dict=BILLSECTION_MAPPING, delete=False):

def createIndex(index: str='billsections', body: dict=constants.BILLSECTION_MAPPING, delete=False):
if delete:
try:
es.indices.delete(index=index)
Expand Down Expand Up @@ -126,19 +124,62 @@ def refreshIndices(index: str="billsections"):
def runQuery(index: str='billsections', query: dict=constants.SAMPLE_QUERY_NESTED_MLT_MARALAGO) -> dict:
return es.search(index=index, body=query)

def moreLikeThis(queryText: str, index: str='billsections'):
query = constants.makeMLTQuery(queryText)
return runQuery(index=index, query=query)

def printResults(res):
print("Got %d Hits:" % res['hits']['total']['value'])
for hit in res['hits']['hits']:
print(hit["_source"])

def getHits(res):
return res.get('hits').get('hits')

def getResultBillnumbers(res):
return [hit.get('_source').get('billnumber') for hit in res['hits']['hits']]
return [hit.get('_source').get('billnumber') for hit in getHits(res)]

def getInnerResults(res):
return [hit['inner_hits'] for hit in res['hits']['hits']]
return [hit.get('inner_hits') for hit in getHits(res)]

def getSimilarSections(res):
similarSections = []
try:
hits = getHits(res)
innerResults = getInnerResults(res)
print('Length of innerResults: {0}'.format(len(innerResults)))
for index, hit in enumerate(hits):
innerResultSections = getHits(innerResults[index].get('sections'))
billSource = hit.get('_source')
title = ''
dublinCore = ''
dublinCores = billSource.get('dc', [])
if dublinCores:
dublinCore = dublinCores[0]

titleMatch = re.search(r'<dc:title>(.*)?<', dublinCore)
if titleMatch:
title = titleMatch[1].strip()
match = {
"score": innerResultSections[0].get('_score', ''),
"billnumber": billSource.get('billnumber', ''),
"congress": billSource.get('_source', {}).get('congress', ''),
"session": billSource.get('session', ''),
"legisnum": billSource.get('legisnum', ''),
"title": title,
"section_num": innerResultSections[0].get('_source', {}).get('section_num', ''),
"section_header": innerResultSections[0].get('_source', {}).get('section_header', ''),
"section_xml": innerResultSections[0].get('_source', {}).get('section_xml', ''),
"section_text": innerResultSections[0].get('_source', {}).get('section_text', '')
}
similarSections.append(match)
return similarSections
except Exception as err:
print(err)
return []

if __name__ == "__main__":
createIndex(delete=True)
createIndex(delete=False)
indexBills()
refreshIndices()
res = runQuery()
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
import re
import logging
import argparse
from billdata import loadBillsMeta, saveBillsMeta
try:
from . import constants
from flatgovtools.billdata import loadBillsMeta, saveBillsMeta
from flatgovtools import constants
except:
import constants
from .billdata import loadBillsMeta, saveBillsMeta
from . import constants


logging.basicConfig(filename='process_bill_meta.log',
filemode='w', level='INFO')
Expand Down
14 changes: 6 additions & 8 deletions scripts/relatedBills.py → flatgovtools/relatedBills.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,14 @@
import argparse
import logging
import re
import json
import gzip
try:
from .constants import BILL_NUMBER_REGEX_COMPILED, PATH_TO_RELATEDBILLS_DIR, PATH_TO_NOYEAR_TITLES_INDEX
from flatgovtools.constants import PATH_TO_RELATEDBILLS_DIR, PATH_TO_NOYEAR_TITLES_INDEX
from flatgovtools.utils import loadTitlesIndex, loadRelatedBillJSON, dumpRelatedBillJSON
from flatgovtools.billdata import deep_get, billIdToBillNumber, loadJSON, loadDataJSON, loadBillsMeta
except:
from .constants import PATH_TO_RELATEDBILLS_DIR, PATH_TO_NOYEAR_TITLES_INDEX
from .utils import loadTitlesIndex, loadRelatedBillJSON, dumpRelatedBillJSON
from .billdata import deep_get, billIdToBillNumber, loadJSON, loadDataJSON, loadBillsMeta
except:
from constants import BILL_NUMBER_REGEX_COMPILED, PATH_TO_RELATEDBILLS_DIR, PATH_TO_NOYEAR_TITLES_INDEX
from utils import loadTitlesIndex, loadRelatedBillJSON, dumpRelatedBillJSON
from billdata import deep_get, billIdToBillNumber, loadJSON, loadDataJSON, loadBillsMeta

OF_YEAR_REGEX = re.compile(r'\sof\s[0-9]+$')

Expand All @@ -38,7 +36,7 @@ def addSimilarTitles(noYearTitlesIndex: dict, billsRelated = {}):
for bill_outer in relatedBills:
for bill_inner in relatedBills:
similarTitle = relatedTitles.get(bill_inner)
if len(similarTitle) == 1:
if similarTitle and len(similarTitle) == 1:
similarTitle = similarTitle[0]
else:
continue
Expand Down
File renamed without changes.
File renamed without changes.
5 changes: 1 addition & 4 deletions scripts/utils.py → flatgovtools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@
import logging
import gzip
import json
try:
from . import constants
except:
import constants
from flatgovtools import constants

logging.basicConfig(filename='utils.log',
filemode='w', level='INFO')
Expand Down
1 change: 1 addition & 0 deletions server_py/flatgov/bills/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

urlpatterns = [
path('', views.index, name='index'),
path(r'similar', views.similar_bills_view),
path('<str:bill>', views.bill_view)

]
39 changes: 36 additions & 3 deletions server_py/flatgov/bills/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from functools import reduce
import json
from typing import Dict

from flatgovtools.elastic_load import getSimilarSections, moreLikeThis, getResultBillnumbers, getInnerResults

def deep_get(dictionary: Dict, *keys):
"""
Expand Down Expand Up @@ -86,6 +86,33 @@ def makeName(commaName):
return ''
return ' '.join(reversed(commaName.split(',')))

def similar_bills_view(request):
noResults = False
# after the redirect (in the views.py that handles your redirect)
queryText = request.session.get('queryText')
if not queryText:
queryText = ''
res = moreLikeThis(queryText = queryText)
similarBillNumbers = getResultBillnumbers(res)
print(similarBillNumbers)
similarSections = getSimilarSections(res)
bestMatch = {}
if not similarSections or len(similarSections) == 0:
noResults = True
else:
bestMatch = similarSections[0]

context = {
"billQuery": {
"queryText": queryText,
"bestMatch": bestMatch,
"similarBillNumbers": similarBillNumbers,
"similarSections": json.dumps(similarSections),
"noResults": noResults
}
}
return render(request, 'bills/bill-similar.html', context)

def bill_view(request, bill):
context = {'billCongressTypeNumber': bill, 'bill': {}}

Expand All @@ -105,6 +132,8 @@ def bill_view(request, bill):
relatedBillData = json.load(f)

relatedBills = deep_get(relatedBillData, 'related')
if not relatedBills:
relatedBills = {}

context['bill']['meta'] = bill_meta
bill_summary = deep_get(bill_meta, 'summary', 'text')
Expand All @@ -117,7 +146,7 @@ def bill_view(request, bill):

relatedTable = []
for bctn in bctns:
relatedTableItem = relatedBills.get(bctn, '')
relatedTableItem = relatedBills.get(bctn, {})
relatedTableItem['billCongressTypeNumber'] = bctn
# TODO handle the same bill number (maybe put it at the top?)
if bill == bctn:
Expand All @@ -144,7 +173,11 @@ def bill_view(request, bill):
context['bill']['related_table'] = json.dumps(relatedTable)

context['bill']['type_abbrev'] = makeTypeAbbrev(bill_type)
sponsor_name = cleanSponsorName(deep_get(bill_meta, 'sponsor', 'name'))
meta_sponsor_name = deep_get(bill_meta, 'sponsor', 'name')
if meta_sponsor_name:
sponsor_name = cleanSponsorName(meta_sponsor_name)
else:
sponsor_name = ''
title = deep_get(bill_meta, 'sponsor', 'title')
if not title:
title = ''
Expand Down
Binary file modified server_py/flatgov/db.sqlite3
Binary file not shown.
6 changes: 6 additions & 0 deletions server_py/flatgov/home/forms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django import forms

class QueryForm(forms.Form): #Note that it is not inheriting from forms.ModelForm
auto_id = False
queryText = forms.CharField(label="", widget=forms.Textarea(attrs={"rows":8, "cols":60, "placeholder":"Enter text from a bill section you want to search."}))
#All my attributes here
Loading