diff --git a/invenio/ext/legacy/__init__.py b/invenio/ext/legacy/__init__.py
index f0f84b8159..0f0bbed481 100644
--- a/invenio/ext/legacy/__init__.py
+++ b/invenio/ext/legacy/__init__.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
+##
## This file is part of Invenio.
-## Copyright (C) 2011, 2012, 2013, 2014 CERN.
+## Copyright (C) 2011, 2012, 2013, 2014, 2015 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
@@ -23,12 +24,12 @@
import os
import sys
-## Import the remote debugger as a first thing, if allowed
-#FIXME enable remote_debugger when invenio.config is ready
-#try:
-# from invenio.utils import remote_debugger
-#except:
-# remote_debugger = None
+# Import the remote debugger as a first thing, if allowed
+# FIXME enable remote_debugger when invenio.config is ready
+# try:
+# from invenio.utils import remote_debugger
+# except:
+# remote_debugger = None
from werkzeug.exceptions import HTTPException
from werkzeug.wrappers import BaseResponse
@@ -57,10 +58,8 @@ def cli_cmd_reset(sender, yes_i_know=False, drop=True, **kwargs):
# cli_cmd_reset_fieldnames(conf)
for cmd in ["%s/bin/webaccessadmin -u admin -c -a -D" % CFG_PREFIX,
- "%s/bin/webcoll -u admin" % CFG_PREFIX,
- "%s/bin/webcoll 1" % CFG_PREFIX,
"%s/bin/bibsort -u admin --load-config" % CFG_PREFIX,
- "%s/bin/bibsort 2" % CFG_PREFIX, ]:
+ "%s/bin/bibsort 1" % CFG_PREFIX, ]:
if os.system(cmd):
print("ERROR: failed execution of", cmd)
sys.exit(1)
diff --git a/invenio/ext/sqlalchemy/utils.py b/invenio/ext/sqlalchemy/utils.py
index b341007314..9556330fbe 100644
--- a/invenio/ext/sqlalchemy/utils.py
+++ b/invenio/ext/sqlalchemy/utils.py
@@ -41,6 +41,11 @@ def save(self):
from sqlalchemy.exc import OperationalError
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.orm import class_mapper, properties
+from sqlalchemy.orm.collections import (
+ InstrumentedList,
+ attribute_mapped_collection,
+ collection,
+)
first_cap_re = re.compile('(.)([A-Z][a-z]+)')
all_cap_re = re.compile('([a-z0-9])([A-Z])')
@@ -258,3 +263,102 @@ def test_sqla_utf8_chain():
table.drop(bind=db.engine)
print(" [OK]")
+
+
+class IntbitsetPickle(object):
+
+ """Pickle implementation for intbitset."""
+
+ def dumps(self, obj, protocol=None):
+ """Dump intbitset to byte stream."""
+ if obj is not None:
+ return obj.fastdump()
+ return intbitset([]).fastdump()
+
+ def loads(self, obj):
+ """Load byte stream to intbitset."""
+ try:
+ return intbitset(obj)
+ except:
+ return intbitset()
+
+
+def IntbitsetCmp(x, y):
+ """Compare two intbitsets."""
+ if x is None or y is None:
+ return False
+ else:
+ return x == y
+
+
+class OrderedList(InstrumentedList):
+
+ """Implemented ordered instrumented list."""
+
+ def append(self, item):
+ if self:
+ s = sorted(self, key=lambda obj: obj.score)
+ item.score = s[-1].score + 1
+ else:
+ item.score = 1
+ InstrumentedList.append(self, item)
+
+ def set(self, item, index=0):
+ if self:
+ s = sorted(self, key=lambda obj: obj.score)
+ if index >= len(s):
+ item.score = s[-1].score + 1
+ elif index < 0:
+ item.score = s[0].score
+ index = 0
+ else:
+ item.score = s[index].score + 1
+
+ for i, it in enumerate(s[index:]):
+ it.score = item.score + i + 1
+ # if s[i+1].score more then break
+ else:
+ item.score = index
+ InstrumentedList.append(self, item)
+
+ def pop(self, item):
+ # FIXME
+ if self:
+ obj_list = sorted(self, key=lambda obj: obj.score)
+ for i, it in enumerate(obj_list):
+ if obj_list[i] == item:
+ return InstrumentedList.pop(self, i)
+
+
+def attribute_multi_dict_collection(creator, key_attr, val_attr):
+ """Define new attribute based mapping."""
+ class MultiMappedCollection(dict):
+
+ def __init__(self, data=None):
+ self._data = data or {}
+
+ @collection.appender
+ def _append(self, obj):
+ l = self._data.setdefault(key_attr(obj), [])
+ l.append(obj)
+
+ def __setitem__(self, key, value):
+ self._append(creator(key, value))
+
+ def __getitem__(self, key):
+ return tuple(val_attr(obj) for obj in self._data[key])
+
+ @collection.remover
+ def _remove(self, obj):
+ self._data[key_attr(obj)].remove(obj)
+
+ @collection.iterator
+ def _iterator(self):
+ for objs in self._data.itervalues():
+ for obj in objs:
+ yield obj
+
+ def __repr__(self):
+ return '%s(%r)' % (type(self).__name__, self._data)
+
+ return MultiMappedCollection
diff --git a/invenio/ext/template/context_processor.py b/invenio/ext/template/context_processor.py
index 12ead827af..b7866c5325 100644
--- a/invenio/ext/template/context_processor.py
+++ b/invenio/ext/template/context_processor.py
@@ -52,7 +52,7 @@ class template_args(object):
def setup_app(app):
- @template_args('search.index', app=app)
+ @template_args('collections.index', app=app)
def foo():
return dict(foo='bar')
@@ -61,7 +61,7 @@ def foo():
.. code-block:: python
- from invenio.modules.search.views.search import index
+ from invenio.modules.collections.views.collections import index
@template_args(index)
def bar():
diff --git a/invenio/legacy/bibcirculation/webinterface.py b/invenio/legacy/bibcirculation/webinterface.py
index 169aa8da17..05af7dda88 100644
--- a/invenio/legacy/bibcirculation/webinterface.py
+++ b/invenio/legacy/bibcirculation/webinterface.py
@@ -72,7 +72,7 @@
CFG_BIBCIRCULATION_ACQ_STATUS_NEW, \
AMZ_ACQUISITION_IDENTIFIER_TAG
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
get_colID = lambda name: Collection.query.filter_by(name=name).value('id')
diff --git a/invenio/legacy/bibdocfile/cli.py b/invenio/legacy/bibdocfile/cli.py
index 3506c1485c..f6c50165bd 100644
--- a/invenio/legacy/bibdocfile/cli.py
+++ b/invenio/legacy/bibdocfile/cli.py
@@ -433,10 +433,11 @@ def print_table(title, table):
for row in table:
print("\t".join(str(elem) for elem in row))
- for collection, reclist in run_sql("SELECT name, reclist FROM collection ORDER BY name"):
+ from invenio.modules.collections.cache import get_collection_reclist
+ for collection, in run_sql("SELECT name FROM collection ORDER BY name"):
print("-" * 79)
print("Statistic for: %s " % collection)
- reclist = intbitset(reclist)
+ reclist = get_collection_reclist(collection)
if reclist:
sqlreclist = "(" + ','.join(str(elem) for elem in reclist) + ')'
print_table("Formats", run_sql("SELECT COUNT(format) as c, format FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY format ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql
diff --git a/invenio/legacy/bibdocfile/webinterface.py b/invenio/legacy/bibdocfile/webinterface.py
index ba3bbbebc3..286841039d 100644
--- a/invenio/legacy/bibdocfile/webinterface.py
+++ b/invenio/legacy/bibdocfile/webinterface.py
@@ -52,14 +52,14 @@
from invenio.base.i18n import gettext_set_language
from invenio.legacy.search_engine import \
guess_primary_collection_of_a_record, record_exists, \
- create_navtrail_links, check_user_can_view_record, \
- is_user_owner_of_record
+ create_navtrail_links, check_user_can_view_record
+from invenio.modules.records.access import is_user_owner_of_record
from invenio.legacy.bibdocfile.api import BibRecDocs, normalize_format, file_strip_ext, \
stream_restricted_icon, BibDoc, InvenioBibDocFileError, \
get_subformat_from_format
from invenio.ext.logging import register_exception
from invenio.legacy.websearch.adminlib import get_detailed_page_tabs, get_detailed_page_tabs_counts
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
import invenio.legacy.template
bibdocfile_templates = invenio.legacy.template.load('bibdocfile')
webstyle_templates = invenio.legacy.template.load('webstyle')
diff --git a/invenio/legacy/bibedit/utils.py b/invenio/legacy/bibedit/utils.py
index 43fd63dcdf..00b5c5a5c3 100644
--- a/invenio/legacy/bibedit/utils.py
+++ b/invenio/legacy/bibedit/utils.py
@@ -87,7 +87,7 @@
from invenio.base.globals import cfg
from invenio.legacy.bibcatalog.api import BIBCATALOG_SYSTEM
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
try:
from cPickle import loads
diff --git a/invenio/legacy/bibexport/sitemap.py b/invenio/legacy/bibexport/sitemap.py
index 0130530472..8275eb1aa7 100644
--- a/invenio/legacy/bibexport/sitemap.py
+++ b/invenio/legacy/bibexport/sitemap.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
-## Copyright (C) 2008, 2010, 2011, 2014 CERN.
+## Copyright (C) 2008, 2010, 2011, 2014, 2015 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
@@ -39,7 +39,6 @@
from invenio.config import CFG_SITE_URL, CFG_WEBDIR, CFG_ETCDIR, \
CFG_SITE_RECORD, CFG_SITE_LANGS, CFG_TMPSHAREDDIR
from intbitset import intbitset
-from invenio.legacy.websearch.webcoll import Collection
from invenio.legacy.bibsched.bibtask import write_message, task_update_progress, task_sleep_now_if_required
from invenio.utils.text import encode_for_xml
from invenio.utils.url import get_canonical_and_alternates_urls
@@ -115,19 +114,19 @@ def get_collection_last_modification(collection):
return max(minimum_timestamp, last_mod)
output = []
- for coll_name in base_collections:
- mother_collection = Collection(coll_name)
- if not mother_collection.restricted_p():
- last_mod = get_collection_last_modification(mother_collection)
- output.append((coll_name, last_mod))
- for descendant in mother_collection.get_descendants(type='r'):
- if not descendant.restricted_p():
- last_mod = get_collection_last_modification(descendant)
- output.append((descendant.name, last_mod))
- for descendant in mother_collection.get_descendants(type='v'):
- if not descendant.restricted_p():
- last_mod = get_collection_last_modification(descendant)
- output.append((descendant.name, last_mod))
+ # for coll_name in base_collections:
+ # mother_collection = Collection(coll_name)
+ # if not mother_collection.restricted_p():
+ # last_mod = get_collection_last_modification(mother_collection)
+ # output.append((coll_name, last_mod))
+ # for descendant in mother_collection.get_descendants(type='r'):
+ # if not descendant.restricted_p():
+ # last_mod = get_collection_last_modification(descendant)
+ # output.append((descendant.name, last_mod))
+ # for descendant in mother_collection.get_descendants(type='v'):
+ # if not descendant.restricted_p():
+ # last_mod = get_collection_last_modification(descendant)
+ # output.append((descendant.name, last_mod))
return output
def filter_fulltexts(recids, fulltext_type=None):
diff --git a/invenio/legacy/bibindex/engine.py b/invenio/legacy/bibindex/engine.py
index 2933f06142..a1397afebd 100644
--- a/invenio/legacy/bibindex/engine.py
+++ b/invenio/legacy/bibindex/engine.py
@@ -1435,7 +1435,7 @@ def add_recID_range(self, recID1, recID2):
wlist[recID])
marc, nonmarc = self.find_nonmarc_records(recID1, recID2)
- if marc:
+ if marc and len(self.tags):
collector = TermCollector(self.tokenizer,
self.tokenizer_type,
self.table_type,
@@ -1443,14 +1443,15 @@ def add_recID_range(self, recID1, recID2):
[recID1, recID2])
collector.set_special_tags(self.special_tags)
wlist = collector.collect(marc, wlist)
- if nonmarc:
+ if nonmarc or (not len(self.tags) and len(self.nonmarc_tags)):
collector = NonmarcTermCollector(self.tokenizer,
self.tokenizer_type,
self.table_type,
self.nonmarc_tags,
[recID1, recID2])
collector.set_special_tags(self.special_tags)
- wlist = collector.collect(nonmarc, wlist)
+ toindex = nonmarc if len(self.tags) else marc
+ wlist = collector.collect(toindex, wlist)
# lookup index-time synonyms:
synonym_kbrs = get_all_synonym_knowledge_bases()
diff --git a/invenio/legacy/bibindex/engine_utils.py b/invenio/legacy/bibindex/engine_utils.py
index eb435b544c..b631d20ee3 100644
--- a/invenio/legacy/bibindex/engine_utils.py
+++ b/invenio/legacy/bibindex/engine_utils.py
@@ -35,6 +35,7 @@
CFG_BIBINDEX_CHARS_PUNCTUATION, \
CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS
from invenio.legacy.bibindex.engine_config import CFG_BIBINDEX_COLUMN_VALUE_SEPARATOR
+from invenio.utils.memoise import memoize
latex_formula_re = re.compile(r'\$.*?\$|\\\[.*?\\\]')
@@ -288,6 +289,7 @@ def get_index_name_from_index_id(index_id):
return ''
+@memoize
def get_field_tags(field, tagtype="marc"):
"""Returns a list of tags for the field code 'field'. Works
for both MARC and nonMARC tags.
@@ -378,6 +380,7 @@ def get_nonmarc_tag_indexes(nonmarc_tag, virtual=True):
return ()
+@memoize
def get_index_tags(indexname, virtual=True, tagtype="marc"):
"""Returns the list of tags that are indexed inside INDEXNAME.
Returns empty list in case there are no tags indexed in this index.
diff --git a/invenio/legacy/bibknowledge/adminlib.py b/invenio/legacy/bibknowledge/adminlib.py
index e0200863d8..8d37639281 100644
--- a/invenio/legacy/bibknowledge/adminlib.py
+++ b/invenio/legacy/bibknowledge/adminlib.py
@@ -107,7 +107,7 @@ def perform_request_knowledge_base_show(kb_id, ln=CFG_SITE_LANG, sortby="to",
dyn_config = None
collections = None
if kb_type == 'd':
- from invenio.modules.search.models import Collection
+ from invenio.modules.collections.models import Collection
collections = [
c[0] for c in Collection.query.order_by('name').values('name')
]
diff --git a/invenio/legacy/search_engine/__init__.py b/invenio/legacy/search_engine/__init__.py
index 57c2f24219..c75e065dc3 100644
--- a/invenio/legacy/search_engine/__init__.py
+++ b/invenio/legacy/search_engine/__init__.py
@@ -161,17 +161,17 @@
"rt_portalbox" : "Prt",
"search_services": "SER"};
-from invenio.modules.search.cache import collection_reclist_cache
-from invenio.modules.search.cache import collection_restricted_p
-from invenio.modules.search.cache import restricted_collection_cache
+from invenio.modules.collections.cache import collection_reclist_cache
+from invenio.modules.collections.cache import collection_restricted_p
+from invenio.modules.collections.cache import restricted_collection_cache
from invenio.modules.search.utils import get_permitted_restricted_collections
-from invenio.modules.search.cache import get_all_restricted_recids
+from invenio.modules.collections.cache import get_all_restricted_recids
from invenio.modules.records.access import check_user_can_view_record
-from invenio.modules.search.cache import get_collection_reclist
-from invenio.modules.search.cache import get_coll_i18nname
+from invenio.modules.collections.cache import get_collection_reclist
+from invenio.modules.collections.cache import get_coll_i18nname
from invenio.modules.search.cache import get_field_i18nname
from invenio.modules.indexer.models import IdxINDEX
@@ -223,7 +223,7 @@ def get_coll_ancestors(coll):
return coll_ancestors
-from invenio.modules.search.cache import get_collection_allchildren
+from invenio.modules.collections.cache import get_collection_allchildren
def browse_pattern_phrases(req, colls, p, f, rg, ln=CFG_SITE_LANG):
diff --git a/invenio/legacy/webalert/alert_engine.py b/invenio/legacy/webalert/alert_engine.py
index 9ab02538ce..52928204c8 100644
--- a/invenio/legacy/webalert/alert_engine.py
+++ b/invenio/legacy/webalert/alert_engine.py
@@ -54,7 +54,7 @@
CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS
from invenio.legacy.websearch_external_collections.getter import HTTPAsyncPageGetter, async_download
from invenio.legacy.websearch_external_collections.utils import get_collection_id
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
import invenio.legacy.template
websearch_templates = invenio.legacy.template.load('websearch')
diff --git a/invenio/legacy/webcomment/webinterface.py b/invenio/legacy/webcomment/webinterface.py
index abd8adb95a..5830676aee 100644
--- a/invenio/legacy/webcomment/webinterface.py
+++ b/invenio/legacy/webcomment/webinterface.py
@@ -91,7 +91,7 @@
stream_file, \
decompose_file, \
propose_next_docname
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
class WebInterfaceCommentsPages(WebInterfaceDirectory):
"""Defines the set of /comments pages."""
diff --git a/invenio/legacy/weblinkback/webinterface.py b/invenio/legacy/weblinkback/webinterface.py
index b6176c8b80..afb366258d 100644
--- a/invenio/legacy/weblinkback/webinterface.py
+++ b/invenio/legacy/weblinkback/webinterface.py
@@ -45,7 +45,7 @@
from invenio.legacy.webpage import pageheaderonly, pagefooteronly
from invenio.legacy.websearch.adminlib import get_detailed_page_tabs
from invenio.modules.access.engine import acc_authorize_action
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
import invenio.legacy.template
webstyle_templates = invenio.legacy.template.load('webstyle')
diff --git a/invenio/legacy/websearch/scripts/webcoll.py b/invenio/legacy/websearch/scripts/webcoll.py
deleted file mode 100644
index ebb90ce7ef..0000000000
--- a/invenio/legacy/websearch/scripts/webcoll.py
+++ /dev/null
@@ -1,60 +0,0 @@
-## This file is part of Invenio.
-## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
-##
-## Invenio is free software; you can redistribute it and/or
-## modify it under the terms of the GNU General Public License as
-## published by the Free Software Foundation; either version 2 of the
-## License, or (at your option) any later version.
-##
-## Invenio is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Invenio; if not, write to the Free Software Foundation, Inc.,
-## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-from invenio.base.factory import with_app_context
-
-
-@with_app_context()
-def main():
- """Main that construct all the bibtask."""
- from invenio.legacy.bibsched.bibtask import task_init
- from invenio.legacy.websearch.webcoll import (
- task_submit_elaborate_specific_parameter, task_submit_check_options,
- task_run_core, __revision__)
-
- task_init(authorization_action="runwebcoll",
- authorization_msg="WebColl Task Submission",
- description="""Description:
- webcoll updates the collection cache (record universe for a
- given collection plus web page elements) based on invenio.conf and DB
- configuration parameters. If the collection name is passed as an argument,
- only this collection's cache will be updated. If the recursive option is
- set as well, the collection's descendants will also be updated.\n""",
- help_specific_usage=" -c, --collection\t Update cache for the given "
- "collection only. [all]\n"
- " -r, --recursive\t Update cache for the given collection and all its\n"
- "\t\t\t descendants (to be used in combination with -c). [no]\n"
- " -q, --quick\t\t Skip webpage cache update for those collections whose\n"
- "\t\t\t reclist was not changed. Note: if you use this option, it is advised\n"
- "\t\t\t to schedule, e.g. a nightly 'webcoll --force'. [no]\n"
- " -f, --force\t\t Force update even if cache is up to date. [no]\n"
- " -p, --part\t\t Update only certain cache parts (1=reclist,"
- " 2=webpage). [both]\n"
- " -l, --language\t Update pages in only certain language"
- " (e.g. fr,it,...). [all]\n",
- version=__revision__,
- specific_params=("c:rqfp:l:", [
- "collection=",
- "recursive",
- "quick",
- "force",
- "part=",
- "language="
- ]),
- task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
- task_submit_check_options_fnc=task_submit_check_options,
- task_run_fnc=task_run_core)
diff --git a/invenio/legacy/websearch/webcoll.py b/invenio/legacy/websearch/webcoll.py
deleted file mode 100644
index 75fc599458..0000000000
--- a/invenio/legacy/websearch/webcoll.py
+++ /dev/null
@@ -1,1223 +0,0 @@
-# -*- coding: utf-8 -*-
-## This file is part of Invenio.
-## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 CERN.
-##
-## Invenio is free software; you can redistribute it and/or
-## modify it under the terms of the GNU General Public License as
-## published by the Free Software Foundation; either version 2 of the
-## License, or (at your option) any later version.
-##
-## Invenio is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Invenio; if not, write to the Free Software Foundation, Inc.,
-## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-
-from __future__ import print_function
-
-"""Create Invenio collection cache."""
-
-__revision__ = "$Id$"
-
-import calendar
-import copy
-import datetime
-import sys
-import cgi
-import re
-import os
-import string
-import time
-from six.moves import cPickle
-
-from invenio.config import \
- CFG_CERN_SITE, \
- CFG_WEBSEARCH_INSTANT_BROWSE, \
- CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS, \
- CFG_WEBSEARCH_I18N_LATEST_ADDITIONS, \
- CFG_CACHEDIR, \
- CFG_SITE_LANG, \
- CFG_SITE_NAME, \
- CFG_SITE_LANGS, \
- CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \
- CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, \
- CFG_SCOAP3_SITE, \
- CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES
-from invenio.base.i18n import gettext_set_language
-from invenio.modules.sorter.engine import sort_records
-from invenio.modules.records.recordext.functions.get_creation_date import get_creation_date
-from invenio.legacy.search_engine import get_field_i18nname, collection_restricted_p, EM_REPOSITORY
-from invenio.legacy.dbquery import run_sql, Error, get_table_update_time
-from invenio.legacy.bibrank.record_sorter import get_bibrank_methods
-from invenio.utils.date import convert_datestruct_to_dategui, strftime
-from invenio.modules.search.api import SearchEngine
-from invenio.modules.formatter import format_record
-from invenio.utils.shell import mymkdir
-from intbitset import intbitset
-from invenio.legacy.websearch_external_collections import \
- external_collection_load_states, \
- dico_collection_external_searches, \
- external_collection_sort_engine_by_name
-from invenio.legacy.bibsched.bibtask import task_init, task_get_option, task_set_option, \
- write_message, task_has_option, task_update_progress, task_set_task_param, \
- task_sleep_now_if_required
-import invenio.legacy.template
-websearch_templates = invenio.legacy.template.load('websearch')
-
-from invenio.legacy.websearch_external_collections.searcher import external_collections_dictionary
-from invenio.legacy.websearch_external_collections.config import CFG_EXTERNAL_COLLECTION_TIMEOUT
-from invenio.legacy.websearch_external_collections.config import CFG_HOSTED_COLLECTION_TIMEOUT_NBRECS
-
-from invenio.base.signals import webcoll_after_webpage_cache_update, \
- webcoll_after_reclist_cache_update
-
-## global vars
-COLLECTION_HOUSE = {} # will hold collections we treat in this run of the program; a dict of {collname2, collobject1}, ...
-
-# CFG_CACHE_LAST_UPDATED_TIMESTAMP_TOLERANCE -- cache timestamp
-# tolerance (in seconds), to account for the fact that an admin might
-# accidentally happen to edit the collection definitions at exactly
-# the same second when some webcoll process was about to be started.
-# In order to be safe, let's put an exaggerated timestamp tolerance
-# value such as 20 seconds:
-CFG_CACHE_LAST_UPDATED_TIMESTAMP_TOLERANCE = 20
-
-# CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE -- location of the cache
-# timestamp file:
-CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE = "%s/collections/last_updated" % CFG_CACHEDIR
-
-# CFG_CACHE_LAST_FAST_UPDATED_TIMESTAMP_FILE -- location of the cache
-# timestamp file usef when running webcoll in the fast-mode.
-CFG_CACHE_LAST_FAST_UPDATED_TIMESTAMP_FILE = "%s/collections/last_fast_updated" % CFG_CACHEDIR
-
-
-def get_collection(colname):
- """Return collection object from the collection house for given colname.
- If does not exist, then create it."""
- if colname not in COLLECTION_HOUSE:
- colobject = Collection(colname)
- COLLECTION_HOUSE[colname] = colobject
- return COLLECTION_HOUSE[colname]
-
-## auxiliary functions:
-def is_selected(var, fld):
- "Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes."
- if var == fld:
- return ' selected="selected"'
- else:
- return ""
-
-def get_field(recID, tag):
- "Gets list of field 'tag' for the record with 'recID' system number."
-
- out = []
- digit = tag[0:2]
-
- bx = "bib%sx" % digit
- bibx = "bibrec_bib%sx" % digit
- query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" \
- % (bx, bibx, recID, tag)
- res = run_sql(query)
- for row in res:
- out.append(row[0])
- return out
-
-def check_nbrecs_for_all_external_collections():
- """Check if any of the external collections have changed their total number of records, aka nbrecs.
- Return True if any of the total numbers of records have changed and False if they're all the same."""
- res = run_sql("SELECT name FROM collection WHERE dbquery LIKE 'hostedcollection:%';")
- for row in res:
- coll_name = row[0]
- if (get_collection(coll_name)).check_nbrecs_for_external_collection():
- return True
- return False
-
-class Collection:
- "Holds the information on collections (id,name,dbquery)."
-
- def __init__(self, name=""):
- "Creates collection instance by querying the DB configuration database about 'name'."
- self.calculate_reclist_run_already = 0 # to speed things up without much refactoring
- self.update_reclist_run_already = 0 # to speed things up without much refactoring
- self.reclist_updated_since_start = 0 # to check if webpage cache need rebuilding
- self.reclist_with_nonpublic_subcolls = intbitset()
- # temporary counters for the number of records in hosted collections
- self.nbrecs_tmp = None # number of records in a hosted collection
- self.nbrecs_from_hosted_collections = 0 # total number of records from
- # descendant hosted collections
- if not name:
- self.name = CFG_SITE_NAME # by default we are working on the home page
- self.id = 1
- self.dbquery = None
- self.nbrecs = None
- self.reclist = intbitset()
- self.old_reclist = intbitset()
- self.reclist_updated_since_start = 1
- else:
- self.name = name
- try:
- res = run_sql("""SELECT id,name,dbquery,nbrecs,reclist FROM collection
- WHERE name=%s""", (name,))
- if res:
- self.id = res[0][0]
- self.name = res[0][1]
- self.dbquery = res[0][2]
- self.nbrecs = res[0][3]
- try:
- self.reclist = intbitset(res[0][4])
- except:
- self.reclist = intbitset()
- self.reclist_updated_since_start = 1
- else: # collection does not exist!
- self.id = None
- self.dbquery = None
- self.nbrecs = None
- self.reclist = intbitset()
- self.reclist_updated_since_start = 1
- self.old_reclist = intbitset(self.reclist)
- except Error as e:
- print("Error %d: %s" % (e.args[0], e.args[1]))
- sys.exit(1)
-
- def get_example_search_queries(self):
- """Returns list of sample search queries for this collection.
- """
- res = run_sql("""SELECT example.body FROM example
- LEFT JOIN collection_example on example.id=collection_example.id_example
- WHERE collection_example.id_collection=%s ORDER BY collection_example.score""", (self.id,))
- return [query[0] for query in res]
-
- def get_name(self, ln=CFG_SITE_LANG, name_type="ln", prolog="", epilog="", prolog_suffix=" ", epilog_suffix=""):
- """Return nicely formatted collection name for language LN.
- The NAME_TYPE may be 'ln' (=long name), 'sn' (=short name), etc."""
- out = prolog
- i18name = ""
- res = run_sql("SELECT value FROM collectionname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, ln, name_type))
- try:
- i18name += res[0][0]
- except IndexError:
- pass
- if i18name:
- out += i18name
- else:
- out += self.name
- out += epilog
- return out
-
- def get_collectionbox_name(self, ln=CFG_SITE_LANG, box_type="r"):
- """
- Return collection-specific labelling of 'Focus on' (regular
- collection), 'Narrow by' (virtual collection) and 'Latest
- addition' boxes.
-
- If translation for given language does not exist, use label
- for CFG_SITE_LANG. If no custom label is defined for
- CFG_SITE_LANG, return default label for the box.
-
- @param ln: the language of the label
- @param box_type: can be 'r' (=Narrow by), 'v' (=Focus on), 'l' (=Latest additions)
- """
- i18name = ""
- res = run_sql("SELECT value FROM collectionboxname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, ln, box_type))
- try:
- i18name = res[0][0]
- except IndexError:
- res = run_sql("SELECT value FROM collectionboxname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, CFG_SITE_LANG, box_type))
- try:
- i18name = res[0][0]
- except IndexError:
- pass
-
- if not i18name:
- # load the right message language
- _ = gettext_set_language(ln)
- if box_type == "v":
- i18name = _('Focus on:')
- elif box_type == "r":
- if CFG_SCOAP3_SITE:
- i18name = _('Narrow by publisher/journal:')
- else:
- i18name = _('Narrow by collection:')
- elif box_type == "l":
- i18name = _('Latest additions:')
-
- return i18name
-
- def get_ancestors(self):
- "Returns list of ancestors of the current collection."
- ancestors = []
- ancestors_ids = intbitset()
- id_son = self.id
- while 1:
- query = "SELECT cc.id_dad,c.name FROM collection_collection AS cc, collection AS c "\
- "WHERE cc.id_son=%d AND c.id=cc.id_dad" % int(id_son)
- res = run_sql(query, None, 1)
- if res:
- col_ancestor = get_collection(res[0][1])
- # looking for loops
- if self.id in ancestors_ids:
- write_message("Loop found in collection %s" % self.name, stream=sys.stderr)
- raise OverflowError("Loop found in collection %s" % self.name)
- else:
- ancestors.append(col_ancestor)
- ancestors_ids.add(col_ancestor.id)
- id_son = res[0][0]
- else:
- break
- ancestors.reverse()
- return ancestors
-
- def restricted_p(self):
- """Predicate to test if the collection is restricted or not. Return the contect of the
- `restrited' column of the collection table (typically Apache group). Otherwise return
- None if the collection is public."""
-
- if collection_restricted_p(self.name):
- return 1
- return None
-
- def get_sons(self, type='r'):
- "Returns list of direct sons of type 'type' for the current collection."
- sons = []
- id_dad = self.id
- query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\
- "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score ASC, c.name ASC" % (int(id_dad), type)
- res = run_sql(query)
- for row in res:
- sons.append(get_collection(row[1]))
- return sons
-
- def get_descendants(self, type='r'):
- "Returns list of all descendants of type 'type' for the current collection."
- descendants = []
- descendant_ids = intbitset()
- id_dad = self.id
- query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\
- "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score ASC" % (int(id_dad), type)
- res = run_sql(query)
- for row in res:
- col_desc = get_collection(row[1])
- # looking for loops
- if self.id in descendant_ids:
- write_message("Loop found in collection %s" % self.name, stream=sys.stderr)
- raise OverflowError("Loop found in collection %s" % self.name)
- else:
- descendants.append(col_desc)
- descendant_ids.add(col_desc.id)
- tmp_descendants = col_desc.get_descendants()
- for descendant in tmp_descendants:
- descendant_ids.add(descendant.id)
- descendants += tmp_descendants
- return descendants
-
- def write_cache_file(self, filename='', filebody={}):
- "Write a file inside collection cache."
- # open file:
- dirname = "%s/collections" % (CFG_CACHEDIR)
- mymkdir(dirname)
- fullfilename = dirname + "/%s.html" % filename
- try:
- os.umask(0o022)
- f = open(fullfilename, "wb")
- except IOError as v:
- try:
- (code, message) = v
- except:
- code = 0
- message = v
- print("I/O Error: " + str(message) + " (" + str(code) + ")")
- sys.exit(1)
- # print user info:
- write_message("... creating %s" % fullfilename, verbose=6)
- # print page body:
- cPickle.dump(filebody, f, cPickle.HIGHEST_PROTOCOL)
- # close file:
- f.close()
-
- def update_webpage_cache(self, lang):
- """Create collection page header, navtrail, body (including left and right stripes) and footer, and
- call write_cache_file() afterwards to update the collection webpage cache."""
-
- return {} ## webpage cache update is not really needed in
- ## Invenio-on-Flask, so let's return quickly here
- ## for great speed-up benefit
- ## precalculate latest additions for non-aggregate
- ## collections (the info is ln and as independent)
- if self.dbquery:
- if CFG_WEBSEARCH_I18N_LATEST_ADDITIONS:
- self.create_latest_additions_info(ln=lang)
- else:
- self.create_latest_additions_info()
-
- # load the right message language
- _ = gettext_set_language(lang)
-
- # create dictionary with data
- cache = {"te_portalbox" : self.create_portalbox(lang, 'te'),
- "np_portalbox" : self.create_portalbox(lang, 'np'),
- "ne_portalbox" : self.create_portalbox(lang, 'ne'),
- "tp_portalbox" : self.create_portalbox(lang, "tp"),
- "lt_portalbox" : self.create_portalbox(lang, "lt"),
- "rt_portalbox" : self.create_portalbox(lang, "rt"),
- "last_updated" : convert_datestruct_to_dategui(time.localtime(),
- ln=lang)}
- for aas in CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES: # do light, simple and advanced search pages:
- cache["navtrail_%s" % aas] = self.create_navtrail_links(aas, lang)
- cache["searchfor_%s" % aas] = self.create_searchfor(aas, lang)
- cache["narrowsearch_%s" % aas] = self.create_narrowsearch(aas, lang, 'r')
- cache["focuson_%s" % aas] = self.create_narrowsearch(aas, lang, "v")+ \
- self.create_external_collections_box(lang)
- cache["instantbrowse_%s" % aas] = self.create_instant_browse(aas=aas, ln=lang)
- # write cache file
- self.write_cache_file("%s-ln=%s"%(self.name, lang), cache)
-
- return cache
-
- def create_navtrail_links(self, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG):
- """Creates navigation trail links, i.e. links to collection
- ancestors (except Home collection). If aas==1, then links to
- Advanced Search interfaces; otherwise Simple Search.
- """
-
- dads = []
- for dad in self.get_ancestors():
- if dad.name != CFG_SITE_NAME: # exclude Home collection
- dads.append((dad.name, dad.get_name(ln)))
-
- return websearch_templates.tmpl_navtrail_links(
- aas=aas, ln=ln, dads=dads)
-
-
- def create_portalbox(self, lang=CFG_SITE_LANG, position="rt"):
- """Creates portalboxes of language CFG_SITE_LANG of the position POSITION by consulting DB configuration database.
- The position may be: 'lt'='left top', 'rt'='right top', etc."""
- out = ""
- query = "SELECT p.title,p.body FROM portalbox AS p, collection_portalbox AS cp "\
- " WHERE cp.id_collection=%d AND p.id=cp.id_portalbox AND cp.ln='%s' AND cp.position='%s' "\
- " ORDER BY cp.score DESC" % (self.id, lang, position)
- res = run_sql(query)
- for row in res:
- title, body = row[0], row[1]
- if title:
- out += websearch_templates.tmpl_portalbox(title = title,
- body = body)
- else:
- # no title specified, so print body ``as is'' only:
- out += body
- return out
-
- def create_narrowsearch(self, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG, type="r"):
- """Creates list of collection descendants of type 'type' under title 'title'.
- If aas==1, then links to Advanced Search interfaces; otherwise Simple Search.
- Suitable for 'Narrow search' and 'Focus on' boxes."""
-
- # get list of sons and analyse it
- sons = self.get_sons(type)
-
- if not sons:
- return ''
-
- # get descendents
- descendants = self.get_descendants(type)
-
- grandsons = []
- if CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS:
- # load grandsons for each son
- for son in sons:
- grandsons.append(son.get_sons())
-
- # return ""
- return websearch_templates.tmpl_narrowsearch(
- aas = aas,
- ln = ln,
- type = type,
- father = self,
- has_grandchildren = len(descendants)>len(sons),
- sons = sons,
- display_grandsons = CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS,
- grandsons = grandsons
- )
-
- def create_external_collections_box(self, ln=CFG_SITE_LANG):
- external_collection_load_states()
- if self.id not in dico_collection_external_searches:
- return ""
-
- engines_list = external_collection_sort_engine_by_name(dico_collection_external_searches[self.id])
-
- return websearch_templates.tmpl_searchalso(ln, engines_list, self.id)
-
- def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG):
- """
- Create info about latest additions that will be used for
- create_instant_browse() later.
- """
- self.latest_additions_info = []
- if self.nbrecs and self.reclist:
- # firstly, get last 'rg' records:
- recIDs = list(self.reclist)
- of = 'hb'
- # CERN hack begins: tweak latest additions for selected collections:
- if CFG_CERN_SITE:
- # alter recIDs list for some CERN collections:
- this_year = time.strftime("%Y", time.localtime())
- if self.name in ['CERN Yellow Reports','Videos']:
- last_year = str(int(this_year) - 1)
- # detect recIDs only from this and past year:
- recIDs = list(self.reclist & SearchEngine(
- 'year:%s or year:%s' % (this_year, last_year)
- ).search())
- # apply special filters:
- if self.name in ['Videos']:
- # select only videos with movies:
- recIDs = list(intbitset(recIDs) & SearchEngine(
- 'collection:"PUBLVIDEOMOVIE" -"Virtual Visit"'
- ).search())
- of = 'hvp'
- if self.name in ['General Talks', 'Academic Training Lectures', 'Summer Student Lectures']:
- #select only the lectures with material
- recIDs = list(self.reclist & SearchEngine(
- '856:MediaArchive'
- ).search())
- # sort some CERN collections specially:
- if self.name in ['Videos',
- 'Video Clips',
- 'Video Movies',
- 'Video News',
- 'Video Rushes',
- 'Webcast',
- 'ATLAS Videos',
- 'Restricted Video Movies',
- 'Restricted Video Rushes',
- 'LHC First Beam Videos',
- 'CERN openlab Videos']:
- recIDs = sort_records(recIDs, '269__c', 'a')
- elif self.name in ['LHCb Talks']:
- recIDs = sort_records(recIDs, 'reportnumber', 'a')
- elif self.name in ['CERN Yellow Reports']:
- recIDs = sort_records(recIDs, '084__a', 'a')
- elif self.name in ['CERN Courier Issues',
- 'CERN Courier Articles',
- 'CERN Bulletin Issues',
- 'CERN Bulletin Articles']:
- recIDs = sort_records(recIDs, '773__y', 'a')
- # CERN hack ends.
-
- total = len(recIDs)
- to_display = min(rg, total)
-
- for idx in range(total-1, total-to_display-1, -1):
- recid = recIDs[idx]
- creation_date = get_creation_date(recid) or datetime.now()
- self.latest_additions_info.append({'id': recid,
- 'format': format_record(recid, of, ln=ln),
- 'date': datetime.strptime(creation_date, "%Y-%m-%d %H:%i")})
- return
-
- def create_instant_browse(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG):
- "Searches database and produces list of last 'rg' records."
-
- if self.restricted_p():
- return websearch_templates.tmpl_box_restricted_content(ln = ln)
-
- if str(self.dbquery).startswith("hostedcollection:"):
- return websearch_templates.tmpl_box_hosted_collection(ln = ln)
-
- if rg == 0:
- # do not show latest additions box
- return ""
-
- # CERN hack: do not display latest additions for some CERN collections:
- if CFG_CERN_SITE and self.name in ['Periodicals', 'Electronic Journals',
- 'Press Office Photo Selection',
- 'Press Office Video Selection']:
- return ""
-
- try:
- self.latest_additions_info
- latest_additions_info_p = True
- except:
- latest_additions_info_p = False
-
- if latest_additions_info_p:
- passIDs = []
- for idx in range(0, min(len(self.latest_additions_info), rg)):
- # CERN hack: display the records in a grid layout, so do not show the related links
- if CFG_CERN_SITE and self.name in ['Videos']:
- passIDs.append({'id': self.latest_additions_info[idx]['id'],
- 'body': self.latest_additions_info[idx]['format'],
- 'date': self.latest_additions_info[idx]['date']})
- else:
- passIDs.append({'id': self.latest_additions_info[idx]['id'],
- 'body': self.latest_additions_info[idx]['format'] + \
- websearch_templates.tmpl_record_links(recid=self.latest_additions_info[idx]['id'],
- rm='citation',
- ln=ln),
- 'date': self.latest_additions_info[idx]['date']})
-
- if self.nbrecs > rg:
- url = websearch_templates.build_search_url(
- cc=self.name, jrec=rg+1, ln=ln, aas=aas)
- else:
- url = ""
- # CERN hack: display the records in a grid layout
- if CFG_CERN_SITE and self.name in ['Videos']:
- return websearch_templates.tmpl_instant_browse(
- aas=aas, ln=ln, recids=passIDs, more_link=url, grid_layout=True, father=self)
-
- return websearch_templates.tmpl_instant_browse(
- aas=aas, ln=ln, recids=passIDs, more_link=url, father=self)
-
- return websearch_templates.tmpl_box_no_records(ln=ln)
-
- def create_searchoptions(self):
- "Produces 'Search options' portal box."
- box = ""
- query = """SELECT DISTINCT(cff.id_field),f.code,f.name FROM collection_field_fieldvalue AS cff, field AS f
- WHERE cff.id_collection=%d AND cff.id_fieldvalue IS NOT NULL AND cff.id_field=f.id
- ORDER BY cff.score DESC""" % self.id
- res = run_sql(query)
- if res:
- for row in res:
- field_id = row[0]
- field_code = row[1]
- field_name = row[2]
- query_bis = """SELECT fv.value,fv.name FROM fieldvalue AS fv, collection_field_fieldvalue AS cff
- WHERE cff.id_collection=%d AND cff.type='seo' AND cff.id_field=%d AND fv.id=cff.id_fieldvalue
- ORDER BY cff.score_fieldvalue DESC, cff.score DESC, fv.name ASC""" % (self.id, field_id)
- res_bis = run_sql(query_bis)
- if res_bis:
- values = [{'value' : '', 'text' : 'any' + ' ' + field_name}] # FIXME: internationalisation of "any"
- for row_bis in res_bis:
- values.append({'value' : cgi.escape(row_bis[0], 1), 'text' : row_bis[1]})
-
- box += websearch_templates.tmpl_select(
- fieldname = field_code,
- values = values
- )
- return box
-
- def create_sortoptions(self, ln=CFG_SITE_LANG):
- """Produces 'Sort options' portal box."""
-
-
- # load the right message language
- _ = gettext_set_language(ln)
-
- box = ""
- query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
- WHERE id_collection=%d AND cff.type='soo' AND cff.id_field=f.id
- ORDER BY cff.score DESC, f.name ASC""" % self.id
- values = [{'value' : '', 'text': "- %s -" % _("latest first")}]
- res = run_sql(query)
- if res:
- for row in res:
- values.append({'value' : row[0], 'text': get_field_i18nname(row[1], ln)})
- else:
- for tmp in ('title', 'author', 'report number', 'year'):
- values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)})
-
- box = websearch_templates.tmpl_select(
- fieldname = 'sf',
- css_class = 'address',
- values = values
- )
- box += websearch_templates.tmpl_select(
- fieldname = 'so',
- css_class = 'address',
- values = [
- {'value' : 'a' , 'text' : _("asc.")},
- {'value' : 'd' , 'text' : _("desc.")}
- ]
- )
- return box
-
- def create_rankoptions(self, ln=CFG_SITE_LANG):
- "Produces 'Rank options' portal box."
-
- # load the right message language
- _ = gettext_set_language(ln)
-
- values = [{'value' : '', 'text': "- %s %s -" % (string.lower(_("OR")), _("rank by"))}]
- for (code, name) in get_bibrank_methods(self.id, ln):
- values.append({'value' : code, 'text': name})
- box = websearch_templates.tmpl_select(
- fieldname = 'rm',
- css_class = 'address',
- values = values
- )
- return box
-
- def create_displayoptions(self, ln=CFG_SITE_LANG):
- "Produces 'Display options' portal box."
-
- # load the right message language
- _ = gettext_set_language(ln)
-
- values = []
- for i in ['10', '25', '50', '100', '250', '500']:
- values.append({'value' : i, 'text' : i + ' ' + _("results")})
-
- box = websearch_templates.tmpl_select(
- fieldname = 'rg',
- selected = str(CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS),
- css_class = 'address',
- values = values
- )
-
- if self.get_sons():
- box += websearch_templates.tmpl_select(
- fieldname = 'sc',
- css_class = 'address',
- values = [
- {'value' : '1' , 'text' : CFG_SCOAP3_SITE and _("split by publisher/journal") or _("split by collection")},
- {'value' : '0' , 'text' : _("single list")}
- ]
- )
- return box
-
- def create_formatoptions(self, ln=CFG_SITE_LANG):
- "Produces 'Output format options' portal box."
-
- # load the right message language
- _ = gettext_set_language(ln)
-
- box = ""
- values = []
- query = """SELECT f.code,f.name FROM format AS f, collection_format AS cf
- WHERE cf.id_collection=%d AND cf.id_format=f.id AND f.visibility='1'
- ORDER BY cf.score DESC, f.name ASC""" % self.id
- res = run_sql(query)
- if res:
- for row in res:
- values.append({'value' : row[0], 'text': row[1]})
- else:
- values.append({'value' : 'hb', 'text' : "HTML %s" % _("brief")})
- box = websearch_templates.tmpl_select(
- fieldname = 'of',
- css_class = 'address',
- values = values
- )
- return box
-
- def create_searchwithin_selection_box(self, fieldname='f', value='', ln='en'):
- """Produces 'search within' selection box for the current collection."""
-
-
- # get values
- query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
- WHERE cff.type='sew' AND cff.id_collection=%d AND cff.id_field=f.id
- ORDER BY cff.score DESC, f.name ASC""" % self.id
- res = run_sql(query)
- values = [{'value' : '', 'text' : get_field_i18nname("any field", ln)}]
- if res:
- for row in res:
- values.append({'value' : row[0], 'text' : get_field_i18nname(row[1], ln)})
- else:
- if CFG_CERN_SITE:
- for tmp in ['title', 'author', 'abstract', 'report number', 'year']:
- values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)})
- else:
- for tmp in ['title', 'author', 'abstract', 'keyword', 'report number', 'journal', 'year', 'fulltext', 'reference']:
- values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)})
-
- return websearch_templates.tmpl_searchwithin_select(
- fieldname = fieldname,
- ln = ln,
- selected = value,
- values = values
- )
- def create_searchexample(self):
- "Produces search example(s) for the current collection."
- out = "$collSearchExamples = getSearchExample(%d, $se);" % self.id
- return out
-
- def create_searchfor(self, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG):
- "Produces either Simple or Advanced 'Search for' box for the current collection."
- if aas == 2:
- return self.create_searchfor_addtosearch(ln)
- elif aas == 1:
- return self.create_searchfor_advanced(ln)
- elif aas == 0:
- return self.create_searchfor_simple(ln)
- else:
- return self.create_searchfor_light(ln)
-
- def create_searchfor_addtosearch(self, ln=CFG_SITE_LANG):
- "Produces add-to-search 'Search for' box for the current collection."
-
- return websearch_templates.tmpl_searchfor_addtosearch(
- ln=ln,
- collection_id=self.name,
- record_count=self.nbrecs,
- searchwithin= self.create_searchwithin_selection_box(fieldname='f1', ln=ln),
- )
-
- def create_searchfor_light(self, ln=CFG_SITE_LANG):
- "Produces light 'Search for' box for the current collection."
-
- return websearch_templates.tmpl_searchfor_light(
- ln=ln,
- collection_id = self.name,
- collection_name=self.get_name(ln=ln),
- record_count=self.nbrecs,
- example_search_queries=self.get_example_search_queries(),
- )
-
- def create_searchfor_simple(self, ln=CFG_SITE_LANG):
- "Produces simple 'Search for' box for the current collection."
-
- return websearch_templates.tmpl_searchfor_simple(
- ln=ln,
- collection_id = self.name,
- collection_name=self.get_name(ln=ln),
- record_count=self.nbrecs,
- middle_option = self.create_searchwithin_selection_box(ln=ln),
- )
-
- def create_searchfor_advanced(self, ln=CFG_SITE_LANG):
- "Produces advanced 'Search for' box for the current collection."
-
- return websearch_templates.tmpl_searchfor_advanced(
- ln = ln,
- collection_id = self.name,
- collection_name=self.get_name(ln=ln),
- record_count=self.nbrecs,
-
- middle_option_1 = self.create_searchwithin_selection_box('f1', ln=ln),
- middle_option_2 = self.create_searchwithin_selection_box('f2', ln=ln),
- middle_option_3 = self.create_searchwithin_selection_box('f3', ln=ln),
-
- searchoptions = self.create_searchoptions(),
- sortoptions = self.create_sortoptions(ln),
- rankoptions = self.create_rankoptions(ln),
- displayoptions = self.create_displayoptions(ln),
- formatoptions = self.create_formatoptions(ln)
- )
-
- def calculate_reclist(self):
- """
- Calculate, set and return the (reclist,
- reclist_with_nonpublic_subcolls,
- nbrecs_from_hosted_collections)
- tuple for the given collection."""
-
- if str(self.dbquery).startswith("hostedcollection:"):
- # we don't normally use this function to calculate the reclist
- # for hosted collections. In case we do, recursively for a regular
- # ancestor collection, then quickly return the object attributes.
- return (self.reclist,
- self.reclist_with_nonpublic_subcolls,
- self.nbrecs)
-
- if self.calculate_reclist_run_already:
- # do we really have to recalculate? If not,
- # then return the object attributes
- return (self.reclist,
- self.reclist_with_nonpublic_subcolls,
- self.nbrecs_from_hosted_collections)
-
- write_message("... calculating reclist of %s" % self.name, verbose=6)
-
- reclist = intbitset() # will hold results for public sons only; good for storing into DB
- reclist_with_nonpublic_subcolls = intbitset() # will hold results for both public and nonpublic sons; good for deducing total
- # number of documents
- nbrecs_from_hosted_collections = 0 # will hold the total number of records from descendant hosted collections
-
- if not self.dbquery:
- # A - collection does not have dbquery, so query recursively all its sons
- # that are either non-restricted or that have the same restriction rules
- for coll in self.get_sons():
- coll_reclist,\
- coll_reclist_with_nonpublic_subcolls,\
- coll_nbrecs_from_hosted_collection = coll.calculate_reclist()
-
- if ((coll.restricted_p() is None) or
- (coll.restricted_p() == self.restricted_p())):
- # add this reclist ``for real'' only if it is public
- reclist.union_update(coll_reclist)
- reclist_with_nonpublic_subcolls.union_update(coll_reclist_with_nonpublic_subcolls)
-
- # increment the total number of records from descendant hosted collections
- nbrecs_from_hosted_collections += coll_nbrecs_from_hosted_collection
-
- else:
- # B - collection does have dbquery, so compute it:
- # (note: explicitly remove DELETED records)
- if CFG_CERN_SITE:
- reclist = SearchEngine(
- self.dbquery + ' -980__:"DELETED" -980__:"DUMMY"'
- ).search()
- else:
- reclist = SearchEngine(
- self.dbquery + ' -980__:"DELETED"'
- ).search()
- reclist_with_nonpublic_subcolls = copy.deepcopy(reclist)
-
- # store the results:
- self.nbrecs_from_hosted_collections = nbrecs_from_hosted_collections
- self.nbrecs = len(reclist_with_nonpublic_subcolls) + \
- nbrecs_from_hosted_collections
- self.reclist = reclist
- self.reclist_with_nonpublic_subcolls = reclist_with_nonpublic_subcolls
- # last but not least, update the speed-up flag:
- self.calculate_reclist_run_already = 1
- # return the two sets, as well as
- # the total number of records from descendant hosted collections:
- return (self.reclist,
- self.reclist_with_nonpublic_subcolls,
- self.nbrecs_from_hosted_collections)
-
- def calculate_nbrecs_for_external_collection(self, timeout=CFG_EXTERNAL_COLLECTION_TIMEOUT):
- """Calculate the total number of records, aka nbrecs, for given external collection."""
- #if self.calculate_reclist_run_already:
- # do we have to recalculate?
- #return self.nbrecs
- #write_message("... calculating nbrecs of external collection %s" % self.name, verbose=6)
- if self.name in external_collections_dictionary:
- engine = external_collections_dictionary[self.name]
- if engine.parser:
- self.nbrecs_tmp = engine.parser.parse_nbrecs(timeout)
- if self.nbrecs_tmp >= 0: return self.nbrecs_tmp
- # the parse_nbrecs() function returns negative values for some specific cases
- # maybe we can handle these specific cases, some warnings or something
- # for now the total number of records remains silently the same
- else: return self.nbrecs
- else: write_message("External collection %s does not have a parser!" % self.name, verbose=6)
- else: write_message("External collection %s not found!" % self.name, verbose=6)
- return 0
- # last but not least, update the speed-up flag:
- #self.calculate_reclist_run_already = 1
-
- def check_nbrecs_for_external_collection(self):
- """Check if the external collections has changed its total number of records, aka nbrecs.
- Rerurns True if the total number of records has changed and False if it's the same"""
-
- write_message("*** self.nbrecs = %s / self.cal...ion = %s ***" % (str(self.nbrecs), str(self.calculate_nbrecs_for_external_collection())), verbose=6)
- write_message("*** self.nbrecs != self.cal...ion = %s ***" % (str(self.nbrecs != self.calculate_nbrecs_for_external_collection()),), verbose=6)
- return self.nbrecs != self.calculate_nbrecs_for_external_collection(CFG_HOSTED_COLLECTION_TIMEOUT_NBRECS)
-
- def set_nbrecs_for_external_collection(self):
- """Set this external collection's total number of records, aka nbrecs"""
-
- if self.calculate_reclist_run_already:
- # do we have to recalculate?
- return
- write_message("... calculating nbrecs of external collection %s" % self.name, verbose=6)
- if self.nbrecs_tmp:
- self.nbrecs = self.nbrecs_tmp
- else:
- self.nbrecs = self.calculate_nbrecs_for_external_collection(CFG_HOSTED_COLLECTION_TIMEOUT_NBRECS)
- # last but not least, update the speed-up flag:
- self.calculate_reclist_run_already = 1
-
- def get_added_records(self):
- """Return new records added since last run."""
- return self.reclist - self.old_reclist
-
- def update_reclist(self):
- "Update the record universe for given collection; nbrecs, reclist of the collection table."
- if self.update_reclist_run_already:
- # do we have to reupdate?
- return 0
- write_message("... updating reclist of %s (%s recs)" % (self.name, self.nbrecs), verbose=6)
- sys.stdout.flush()
- try:
- ## In principle we could skip this update if old_reclist==reclist
- ## however we just update it here in case of race-conditions.
- run_sql("UPDATE collection SET nbrecs=%s, reclist=%s WHERE id=%s",
- (self.nbrecs, self.reclist.fastdump(), self.id))
- if self.old_reclist != self.reclist:
- self.reclist_updated_since_start = 1
- else:
- write_message("... no changes in reclist detected", verbose=6)
- except Error as e:
- print("Database Query Error %d: %s." % (e.args[0], e.args[1]))
- sys.exit(1)
- # last but not least, update the speed-up flag:
- self.update_reclist_run_already = 1
- return 0
-
-def perform_display_collection(colID, colname, aas, ln, em, show_help_boxes):
- """Returns the data needed to display a collection page
- The arguments are as follows:
- colID - id of the collection to display
- colname - name of the collection to display
- aas - 0 if simple search, 1 if advanced search
- ln - language of the page
- em - code to display just part of the page
- show_help_boxes - whether to show the help boxes or not"""
- # check and update cache if necessary
- cachedfile = open("%s/collections/%s-ln=%s.html" %
- (CFG_CACHEDIR, colname, ln), "rb")
- try:
- data = cPickle.load(cachedfile)
- except ValueError:
- data = get_collection(colname).update_webpage_cache(ln)
- cachedfile.close()
- # check em value to return just part of the page
- if em != "":
- if EM_REPOSITORY["search_box"] not in em:
- data["searchfor_%s" % aas] = ""
- if EM_REPOSITORY["see_also_box"] not in em:
- data["focuson_%s" % aas] = ""
- if EM_REPOSITORY["all_portalboxes"] not in em:
- if EM_REPOSITORY["te_portalbox"] not in em:
- data["te_portalbox"] = ""
- if EM_REPOSITORY["np_portalbox"] not in em:
- data["np_portalbox"] = ""
- if EM_REPOSITORY["ne_portalbox"] not in em:
- data["ne_portalbox"] = ""
- if EM_REPOSITORY["tp_portalbox"] not in em:
- data["tp_portalbox"] = ""
- if EM_REPOSITORY["lt_portalbox"] not in em:
- data["lt_portalbox"] = ""
- if EM_REPOSITORY["rt_portalbox"] not in em:
- data["rt_portalbox"] = ""
- c_body = websearch_templates.tmpl_webcoll_body(ln, colID, data.get("te_portalbox", ""),
- data.get("searchfor_%s"%aas,''), data.get("np_portalbox", ''), data.get("narrowsearch_%s"%aas, ''),
- data.get("focuson_%s"%aas, ''), data.get("instantbrowse_%s"%aas, ''), data.get("ne_portalbox", ''),
- em=="" or EM_REPOSITORY["body"] in em)
- if show_help_boxes <= 0:
- data["rt_portalbox"] = ""
- return (c_body, data.get("navtrail_%s"%aas, ''), data.get("lt_portalbox", ''), data.get("rt_portalbox", ''),
- data.get("tp_portalbox", ''), data.get("te_portalbox", ''), data.get("last_updated", ''))
-
-def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
- """Returns a date string according to the format string.
- It can handle normal date strings and shifts with respect
- to now."""
- date = time.time()
- shift_re = re.compile("([-\+]{0,1})([\d]+)([dhms])")
- factors = {"d":24*3600, "h":3600, "m":60, "s":1}
- m = shift_re.match(var)
- if m:
- sign = m.groups()[0] == "-" and -1 or 1
- factor = factors[m.groups()[2]]
- value = float(m.groups()[1])
- date = time.localtime(date + sign * factor * value)
- date = strftime(format_string, date)
- else:
- date = time.strptime(var, format_string)
- date = strftime(format_string, date)
- return date
-
-def get_current_time_timestamp():
- """Return timestamp corresponding to the current time."""
- return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-
-def compare_timestamps_with_tolerance(timestamp1,
- timestamp2,
- tolerance=0):
- """Compare two timestamps TIMESTAMP1 and TIMESTAMP2, of the form
- '2005-03-31 17:37:26'. Optionally receives a TOLERANCE argument
- (in seconds). Return -1 if TIMESTAMP1 is less than TIMESTAMP2
- minus TOLERANCE, 0 if they are equal within TOLERANCE limit,
- and 1 if TIMESTAMP1 is greater than TIMESTAMP2 plus TOLERANCE.
- """
- # remove any trailing .00 in timestamps:
- timestamp1 = re.sub(r'\.[0-9]+$', '', timestamp1)
- timestamp2 = re.sub(r'\.[0-9]+$', '', timestamp2)
- # first convert timestamps to Unix epoch seconds:
- timestamp1_seconds = calendar.timegm(time.strptime(timestamp1, "%Y-%m-%d %H:%M:%S"))
- timestamp2_seconds = calendar.timegm(time.strptime(timestamp2, "%Y-%m-%d %H:%M:%S"))
- # now compare them:
- if timestamp1_seconds < timestamp2_seconds - tolerance:
- return -1
- elif timestamp1_seconds > timestamp2_seconds + tolerance:
- return 1
- else:
- return 0
-
-def get_database_last_updated_timestamp():
- """Return last updated timestamp for collection-related and
- record-related database tables.
- """
- database_tables_timestamps = []
- database_tables_timestamps.append(get_table_update_time('bibrec'))
- ## In INSPIRE bibfmt is on innodb and there is not such configuration
- bibfmt_last_update = run_sql("SELECT max(last_updated) FROM bibfmt")
- if bibfmt_last_update and bibfmt_last_update[0][0]:
- database_tables_timestamps.append(str(bibfmt_last_update[0][0]))
- try:
- database_tables_timestamps.append(get_table_update_time('idxWORD%'))
- except ValueError:
- # There are no indexes in the database. That's OK.
- pass
- database_tables_timestamps.append(get_table_update_time('collection%'))
- database_tables_timestamps.append(get_table_update_time('portalbox'))
- database_tables_timestamps.append(get_table_update_time('field%'))
- database_tables_timestamps.append(get_table_update_time('format%'))
- database_tables_timestamps.append(get_table_update_time('rnkMETHODNAME'))
- database_tables_timestamps.append(get_table_update_time('accROLE_accACTION_accARGUMENT', run_on_slave=True))
- return max(database_tables_timestamps)
-
-def get_cache_last_updated_timestamp():
- """Return last updated cache timestamp."""
- try:
- f = open(CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE, "r")
- except:
- return "1970-01-01 00:00:00"
- timestamp = f.read()
- f.close()
-
- # Remove trailing newlines and whitespace.
- timestamp = timestamp.strip()
- return timestamp or "1970-01-01 00:00:00"
-
-def set_cache_last_updated_timestamp(timestamp):
- """Set last updated cache timestamp to TIMESTAMP."""
- try:
- with open(CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE, "w") as f:
- f.write(timestamp)
- except:
- # FIXME: do something here
- pass
- return timestamp
-
-def task_submit_elaborate_specific_parameter(key, value, opts, args):
- """ Given the string key it checks it's meaning, eventually using the value.
- Usually it fills some key in the options dict.
- It must return True if it has elaborated the key, False, if it doesn't
- know that key.
- eg:
- if key in ['-n', '--number']:
- self.options['number'] = value
- return True
- return False
- """
- if key in ("-c", "--collection"):
- task_set_option("collection", value)
- elif key in ("-r", "--recursive"):
- task_set_option("recursive", 1)
- elif key in ("-f", "--force"):
- task_set_option("force", 1)
- elif key in ("-q", "--quick"):
- task_set_option("quick", 1)
- elif key in ("-p", "--part"):
- task_set_option("part", int(value))
- elif key in ("-l", "--language"):
- languages = task_get_option("language", [])
- languages += value.split(',')
- for ln in languages:
- if ln not in CFG_SITE_LANGS:
- print('ERROR: "%s" is not a recognized language code' % ln)
- return False
- task_set_option("language", languages)
- else:
- return False
- return True
-
-def task_submit_check_options():
- if task_has_option('collection'):
- coll = get_collection(task_get_option("collection"))
- if coll.id is None:
- print('ERROR: Collection "%s" does not exist' % coll.name)
- return False
- return True
-
-def task_run_core():
- """ Reimplement to add the body of the task."""
-##
-## ------->--->time--->------>
-## (-1) | ( 0) | ( 1)
-## | | |
-## [T.db] | [T.fc] | [T.db]
-## | | |
-## |<-tol|tol->|
-##
-## the above is the compare_timestamps_with_tolerance result "diagram"
-## [T.db] stands fore the database timestamp and [T.fc] for the file cache timestamp
-## ( -1, 0, 1) stand for the returned value
-## tol stands for the tolerance in seconds
-##
-## When a record has been added or deleted from one of the collections the T.db becomes greater that the T.fc
-## and when webcoll runs it is fully ran. It recalculates the reclists and nbrecs, and since it updates the
-## collections db table it also updates the T.db. The T.fc is set as the moment the task started running thus
-## slightly before the T.db (practically the time distance between the start of the task and the last call of
-## update_reclist). Therefore when webcoll runs again, and even if no database changes have taken place in the
-## meanwhile, it fully runs (because compare_timestamps_with_tolerance returns 0). This time though, and if
-## no databases changes have taken place, the T.db remains the same while T.fc is updated and as a result if
-## webcoll runs again it will not be fully ran
-##
- task_run_start_timestamp = get_current_time_timestamp()
- colls = []
- params = {}
- task_set_task_param("post_process_params", params)
- # decide whether we need to run or not, by comparing last updated timestamps:
- write_message("Database timestamp is %s." % get_database_last_updated_timestamp(), verbose=3)
- write_message("Collection cache timestamp is %s." % get_cache_last_updated_timestamp(), verbose=3)
- if task_has_option("part"):
- write_message("Running cache update part %s only." % task_get_option("part"), verbose=3)
- if check_nbrecs_for_all_external_collections() or task_has_option("force") or \
- compare_timestamps_with_tolerance(get_database_last_updated_timestamp(),
- get_cache_last_updated_timestamp(),
- CFG_CACHE_LAST_UPDATED_TIMESTAMP_TOLERANCE) >= 0:
- ## either forced update was requested or cache is not up to date, so recreate it:
- # firstly, decide which collections to do:
- if task_has_option("collection"):
- coll = get_collection(task_get_option("collection"))
- colls.append(coll)
- if task_has_option("recursive"):
- r_type_descendants = coll.get_descendants(type='r')
- colls += r_type_descendants
- v_type_descendants = coll.get_descendants(type='v')
- colls += v_type_descendants
- else:
- res = run_sql("SELECT name FROM collection ORDER BY id")
- for row in res:
- colls.append(get_collection(row[0]))
- # secondly, update collection reclist cache:
- if task_get_option('part', 1) == 1:
- all_recids_added = intbitset()
- i = 0
- for coll in colls:
- i += 1
- write_message("%s / reclist cache update" % coll.name)
- if str(coll.dbquery).startswith("hostedcollection:"):
- coll.set_nbrecs_for_external_collection()
- else:
- coll.calculate_reclist()
- coll.update_reclist()
- all_recids_added.update(coll.get_added_records())
- task_update_progress("Part 1/2: done %d/%d" % (i, len(colls)))
- task_sleep_now_if_required(can_stop_too=True)
- webcoll_after_reclist_cache_update.send('webcoll', collections=colls)
- params.update({'recids': list(all_recids_added)})
- # thirdly, update collection webpage cache:
- if task_get_option("part", 2) == 2:
- # Updates cache only for chosen languages or for all available ones if none was chosen
- languages = task_get_option("language", CFG_SITE_LANGS)
- write_message("Cache update for the following languages: %s" % str(languages), verbose=3)
- i = 0
- for coll in colls:
- i += 1
- if coll.reclist_updated_since_start or task_has_option("collection") or task_get_option("force") or not task_get_option("quick"):
- write_message("%s / webpage cache update" % coll.name)
- for lang in languages:
- coll.update_webpage_cache(lang)
- webcoll_after_webpage_cache_update.send(coll.name, collection=coll, lang=lang)
- else:
- write_message("%s / webpage cache seems not to need an update and --quick was used" % coll.name, verbose=2)
- task_update_progress("Part 2/2: done %d/%d" % (i, len(colls)))
- task_sleep_now_if_required(can_stop_too=True)
-
- # finally update the cache last updated timestamp:
- # (but only when all collections were updated, not when only
- # some of them were forced-updated as per admin's demand)
- if not task_has_option("collection"):
- set_cache_last_updated_timestamp(task_run_start_timestamp)
- write_message("Collection cache timestamp is set to %s." % get_cache_last_updated_timestamp(), verbose=3)
- task_set_task_param("post_process_params", params)
- else:
- ## cache up to date, we don't have to run
- write_message("Collection cache is up to date, no need to run.")
- ## we are done:
- return True
-
-### okay, here we go:
-if __name__ == '__main__':
- main()
diff --git a/invenio/legacy/websearch/webinterface.py b/invenio/legacy/websearch/webinterface.py
index b85b9e704c..41c1214aa5 100644
--- a/invenio/legacy/websearch/webinterface.py
+++ b/invenio/legacy/websearch/webinterface.py
@@ -78,16 +78,15 @@
perform_request_search, \
restricted_collection_cache, \
EM_REPOSITORY
-from invenio.modules.search.models import Collection
-from invenio.legacy.websearch.webcoll import perform_display_collection
+from invenio.modules.collections.models import Collection
from invenio.legacy.bibrecord import get_fieldvalues, \
get_fieldvalues_alephseq_like
from invenio.modules.access.engine import acc_authorize_action
from invenio.modules.access.local_config import VIEWRESTRCOLL
from invenio.modules.access.mailcookie import mail_cookie_create_authorize_action
+from invenio.modules.collections.cache import get_collection_reclist
from invenio.modules.formatter import format_records
from invenio.modules.formatter.engine import get_output_formats
-from invenio.legacy.websearch.webcoll import get_collection
from intbitset import intbitset
from invenio.legacy.bibupload.engine import find_record_from_sysno
from invenio.legacy.bibrank.citation_searcher import get_cited_by_list
@@ -447,7 +446,7 @@ def __call__(self, req, form):
for collname in restricted_collection_cache.cache:
(auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collname)
if auth_code and user_info['email'] == 'guest':
- coll_recids = get_collection(collname).reclist
+ coll_recids = get_collection_reclist(collname)
if coll_recids & recids:
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : collname})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
@@ -827,82 +826,8 @@ def display_collection(req, c, aas, verbose, ln, em=""):
req=req,
navmenuid='search')
- if normalised_name != c:
- redirect_to_url(req, normalised_name, apache.HTTP_MOVED_PERMANENTLY)
-
- # start display:
- req.content_type = "text/html"
- req.send_http_header()
-
- c_body, c_navtrail, c_portalbox_lt, c_portalbox_rt, c_portalbox_tp, c_portalbox_te, \
- c_last_updated = perform_display_collection(colID, c, aas, ln, em,
- user_preferences.get('websearch_helpbox', 1))
-
- if em == "" or EM_REPOSITORY["body"] in em:
- try:
- title = get_coll_i18nname(c, ln)
- except:
- title = ""
- else:
- title = ""
- show_title_p = True
- body_css_classes = []
- if c == CFG_SITE_NAME:
- # Do not display title on home collection
- show_title_p = False
- body_css_classes.append('home')
-
- if len(collection_reclist_cache.cache.keys()) == 1:
- # if there is only one collection defined, do not print its
- # title on the page as it would be displayed repetitively.
- show_title_p = False
-
- if aas == -1:
- show_title_p = False
-
- if CFG_INSPIRE_SITE == 1:
- # INSPIRE should never show title, but instead use css to
- # style collections
- show_title_p = False
- body_css_classes.append(nmtoken_from_string(c))
-
- # RSS:
- rssurl = CFG_SITE_URL + '/rss'
- rssurl_params = []
- if c != CFG_SITE_NAME:
- rssurl_params.append('cc=' + quote(c))
- if ln != CFG_SITE_LANG and \
- c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS:
- rssurl_params.append('ln=' + ln)
-
- if rssurl_params:
- rssurl += '?' + '&'.join(rssurl_params)
-
- if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
- metaheaderadd = get_mathjax_header(req.is_https())
- else:
- metaheaderadd = ''
-
- return page(title=title,
- body=c_body,
- navtrail=c_navtrail,
- description="%s - %s" % (CFG_SITE_NAME, c),
- keywords="%s, %s" % (CFG_SITE_NAME, c),
- metaheaderadd=metaheaderadd,
- uid=uid,
- language=ln,
- req=req,
- cdspageboxlefttopadd=c_portalbox_lt,
- cdspageboxrighttopadd=c_portalbox_rt,
- titleprologue=c_portalbox_tp,
- titleepilogue=c_portalbox_te,
- lastupdated=c_last_updated,
- navmenuid='search',
- rssurl=rssurl,
- body_css_classes=body_css_classes,
- show_title_p=show_title_p,
- show_header=em == "" or EM_REPOSITORY["header"] in em,
- show_footer=em == "" or EM_REPOSITORY["footer"] in em)
+ from flask import redirect, url_for
+ return redirect(url_for('collections.collection', name=collection.name))
def resolve_doi(req, doi, ln=CFG_SITE_LANG, verbose=0):
diff --git a/invenio/legacy/webstat/engine.py b/invenio/legacy/webstat/engine.py
index 474584ccf6..89c3685cf8 100644
--- a/invenio/legacy/webstat/engine.py
+++ b/invenio/legacy/webstat/engine.py
@@ -51,7 +51,7 @@
book_information_from_MARC
from invenio.legacy.bibcirculation.db_layer import get_id_bibrec, \
get_borrower_data
-from invenio.legacy.websearch.webcoll import CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE
+CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE = None
from invenio.utils.date import convert_datetext_to_datestruct, convert_datestruct_to_dategui
from invenio.legacy.bibsched.bibtask import get_modified_records_since
diff --git a/invenio/legacy/webstat/templates.py b/invenio/legacy/webstat/templates.py
index f459d49e7e..ce646f7c38 100644
--- a/invenio/legacy/webstat/templates.py
+++ b/invenio/legacy/webstat/templates.py
@@ -312,7 +312,7 @@ def tmpl_collection_stats_main_list(self, ln=CFG_SITE_LANG):
"""
out = """
Collections stats
"""
- from invenio.modules.search.models import Collection
+ from invenio.modules.collections.models import Collection
for collection in Collection.query.filter_by(
name=CFG_SITE_NAME).one().collection_children_r:
coll = collection.name
diff --git a/invenio/legacy/webstyle/templates.py b/invenio/legacy/webstyle/templates.py
index 58a684a8ac..c1ed12c7f9 100644
--- a/invenio/legacy/webstyle/templates.py
+++ b/invenio/legacy/webstyle/templates.py
@@ -488,8 +488,8 @@ def detailed_record_container_top(self, recid, tabs, ln=CFG_SITE_LANG,
@param referencenum: show (this) number of references in the references tab
@param discussionnum: show (this) number of comments/reviews in the discussion tab
"""
- from invenio.modules.search.cache import get_all_restricted_recids
- from invenio.modules.search.cache import is_record_in_any_collection
+ from invenio.modules.collections.cache import get_all_restricted_recids
+ from invenio.modules.collections.cache import is_record_in_any_collection
# load the right message language
_ = gettext_set_language(ln)
diff --git a/invenio/legacy/websubmit/webinterface.py b/invenio/legacy/websubmit/webinterface.py
index c26fbb6276..abb5535334 100644
--- a/invenio/legacy/websubmit/webinterface.py
+++ b/invenio/legacy/websubmit/webinterface.py
@@ -47,7 +47,7 @@
from invenio.legacy.webpage import warning_page
from invenio.legacy.webuser import getUid, page_not_authorized, collect_user_info, \
isGuestUser
-from invenio.legacy.search_engine import is_user_owner_of_record
+from invenio.modules.records.access import is_user_owner_of_record
from invenio.ext.legacy.handler import wash_urlargd, WebInterfaceDirectory
from invenio.utils.url import make_canonical_urlargd, redirect_to_url
from invenio.base.i18n import gettext_set_language
diff --git a/invenio/modules/baskets/models.py b/invenio/modules/baskets/models.py
index 71f318a7f7..d285ff0247 100644
--- a/invenio/modules/baskets/models.py
+++ b/invenio/modules/baskets/models.py
@@ -27,7 +27,7 @@
# Create your models here.
from invenio.modules.accounts.models import User, Usergroup
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
class BskBASKET(db.Model):
"""Represents a BskBASKET record."""
diff --git a/invenio/modules/classifier/models.py b/invenio/modules/classifier/models.py
index 8633ec478d..110c33acf7 100644
--- a/invenio/modules/classifier/models.py
+++ b/invenio/modules/classifier/models.py
@@ -26,7 +26,7 @@
# Create your models here.
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
class ClsMETHOD(db.Model):
"""Represents a ClsMETHOD record."""
diff --git a/invenio/modules/collections/__init__.py b/invenio/modules/collections/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/invenio/modules/collections/cache.py b/invenio/modules/collections/cache.py
new file mode 100644
index 0000000000..ee09363d33
--- /dev/null
+++ b/invenio/modules/collections/cache.py
@@ -0,0 +1,235 @@
+# -*- coding: utf-8 -*-
+##
+## This file is part of Invenio.
+## Copyright (C) 2012, 2014, 2015 CERN.
+##
+## Invenio is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## Invenio is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Invenio; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+"""Implementation of collections caching."""
+
+from intbitset import intbitset
+
+from invenio.base.globals import cfg
+from invenio.legacy.miscutil.data_cacher import DataCacher, DataCacherProxy
+from invenio.modules.search.engine import search_unit_in_idxphrases
+from invenio.utils.memoise import memoize
+
+from .models import Collection, Collectionname
+
+
+class CollectionAllChildrenDataCacher(DataCacher):
+
+ """Cache for all children of a collection."""
+
+ def __init__(self):
+ """Initilize cache."""
+ def cache_filler():
+ collections = Collection.query.all()
+ collection_index = dict([(c.id, c.name) for c in collections])
+
+ return dict([
+ (c.name, map(collection_index.get, c.descendants_ids))
+ for c in collections
+ ])
+
+ def timestamp_verifier():
+ from invenio.legacy.dbquery import get_table_update_time
+ return max(get_table_update_time('collection'),
+ get_table_update_time('collection_collection'))
+
+ DataCacher.__init__(self, cache_filler, timestamp_verifier)
+
+collection_allchildren_cache = DataCacherProxy(CollectionAllChildrenDataCacher)
+
+
+def get_collection_allchildren(coll, recreate_cache_if_needed=True):
+ """Return the list of all children of a collection."""
+ if recreate_cache_if_needed:
+ collection_allchildren_cache.recreate_cache_if_needed()
+ if coll not in collection_allchildren_cache.cache:
+ return [] # collection does not exist; return empty list
+ return collection_allchildren_cache.cache[coll]
+
+
+class CollectionRecListDataCacher(DataCacher):
+
+ """Implement cache for collection reclist hitsets.
+
+ This class is not to be used directly; use function
+ get_collection_reclist() instead.
+ """
+
+ def __init__(self):
+ def cache_filler():
+ collections = Collection.query.all()
+ setattr(get_all_recids, 'cache', dict())
+ setattr(get_collection_nbrecs, 'cache', dict())
+ return dict([
+ (c.name, search_unit_in_idxphrases(c.name, 'collection', 'e'))
+ for c in collections
+ ])
+
+ def timestamp_verifier():
+ from invenio.legacy.dbquery import get_table_update_time
+ return get_table_update_time('collection')
+
+ DataCacher.__init__(self, cache_filler, timestamp_verifier)
+
+
+collection_reclist_cache = DataCacherProxy(CollectionRecListDataCacher)
+
+
+def get_collection_reclist(coll, recreate_cache_if_needed=True):
+ """Return hitset of recIDs that belong to the collection 'coll'."""
+ if recreate_cache_if_needed:
+ collection_reclist_cache.recreate_cache_if_needed()
+ if coll not in collection_reclist_cache.cache:
+ return intbitset()
+ if not collection_reclist_cache.cache[coll]:
+ c_coll = Collection.query.filter_by(name=coll).first()
+ if c_coll:
+ collection_reclist_cache.cache[coll] = search_unit_in_idxphrases(
+ c_coll.name, 'collection', 'e')
+ return collection_reclist_cache.cache[coll] or intbitset()
+
+
+@memoize
+def get_collection_nbrecs(coll):
+ """Return number of records in collection."""
+ return len(get_collection_reclist(coll))
+
+
+class RestrictedCollectionDataCacher(DataCacher):
+ def __init__(self):
+ def cache_filler():
+ from invenio.modules.access.control import acc_get_action_id
+ from invenio.modules.access.local_config import VIEWRESTRCOLL
+ from invenio.modules.access.models import (
+ AccAuthorization, AccARGUMENT
+ )
+ VIEWRESTRCOLL_ID = acc_get_action_id(VIEWRESTRCOLL)
+
+ return [auth[0] for auth in AccAuthorization.query.join(
+ AccAuthorization.argument
+ ).filter(
+ AccARGUMENT.keyword == 'collection',
+ AccAuthorization.id_accACTION == VIEWRESTRCOLL_ID
+ ).values(AccARGUMENT.value)]
+
+ setattr(get_all_restricted_recids, 'cache', dict())
+
+ def timestamp_verifier():
+ from invenio.legacy.dbquery import get_table_update_time
+ return max(get_table_update_time('accROLE_accACTION_accARGUMENT'),
+ get_table_update_time('accARGUMENT'))
+
+ DataCacher.__init__(self, cache_filler, timestamp_verifier)
+
+
+restricted_collection_cache = DataCacherProxy(RestrictedCollectionDataCacher)
+
+
+def collection_restricted_p(collection, recreate_cache_if_needed=True):
+ if recreate_cache_if_needed:
+ restricted_collection_cache.recreate_cache_if_needed()
+ return collection in restricted_collection_cache.cache
+
+
+@memoize
+def get_all_restricted_recids():
+ """Return the set of all the restricted recids.
+
+ I.e. the ids of those records which belong to at least one restricted
+ collection.
+ """
+ ret = intbitset()
+ for collection in restricted_collection_cache.cache:
+ ret |= get_collection_reclist(collection)
+ return ret
+
+
+@memoize
+def get_all_recids():
+ """Return the set of all recids."""
+ ret = intbitset()
+ for collection in collection_reclist_cache.cache:
+ ret |= get_collection_reclist(collection)
+ return ret
+
+
+def is_record_in_any_collection(recID, recreate_cache_if_needed=True):
+ """Return True if the record belongs to at least one collection.
+
+ This is a good, although not perfect, indicator to guess if webcoll has
+ already run after this record has been entered into the system.
+ """
+ if recreate_cache_if_needed:
+ collection_reclist_cache.recreate_cache_if_needed()
+ return recID in get_all_recids()
+
+
+class CollectionI18nNameDataCacher(DataCacher):
+ """
+ Provides cache for I18N collection names. This class is not to be
+ used directly; use function get_coll_i18nname() instead.
+ """
+ def __init__(self):
+ def cache_filler():
+ res = Collection.query.join(
+ Collection.collection_names
+ ).filter(Collectionname.type == 'ln').values(
+ Collection.name, 'ln', 'value'
+ )
+ ret = {}
+ for c, ln, i18nname in res:
+ if i18nname:
+ if c not in ret:
+ ret[c] = {}
+ ret[c][ln] = i18nname
+ return ret
+
+ def timestamp_verifier():
+ from invenio.legacy.dbquery import get_table_update_time
+ return get_table_update_time('collectionname')
+
+ DataCacher.__init__(self, cache_filler, timestamp_verifier)
+
+collection_i18nname_cache = DataCacherProxy(CollectionI18nNameDataCacher)
+
+
+def get_coll_i18nname(c, ln=None, verify_cache_timestamp=True):
+ """Return nicely formatted collection name for given language.
+
+ This function uses collection_i18nname_cache, but it verifies
+ whether the cache is up-to-date first by default. This
+ verification step is performed by checking the DB table update
+ time. So, if you call this function 1000 times, it can get very
+ slow because it will do 1000 table update time verifications, even
+ though collection names change not that often.
+
+ Hence the parameter VERIFY_CACHE_TIMESTAMP which, when set to
+ False, will assume the cache is already up-to-date. This is
+ useful namely in the generation of collection lists for the search
+ results page.
+ """
+ ln = ln or cfg['CFG_SITE_LANG']
+ if verify_cache_timestamp:
+ collection_i18nname_cache.recreate_cache_if_needed()
+ out = c
+ try:
+ out = collection_i18nname_cache.cache[c][ln]
+ except KeyError:
+ pass # translation in LN does not exist
+ return out
diff --git a/invenio/modules/collections/decorators.py b/invenio/modules/collections/decorators.py
new file mode 100644
index 0000000000..a8733853ca
--- /dev/null
+++ b/invenio/modules/collections/decorators.py
@@ -0,0 +1,57 @@
+import functools
+
+from flask import g, abort, flash, redirect, request, url_for
+from flask.ext.login import current_user
+
+from invenio.base.i18n import _
+
+from .models import Collection
+
+
+
+
+def check_collection(method=None, name_getter=None, default_collection=False):
+ """Check collection existence and authorization for current user."""
+ if method is None:
+ return functools.partial(check_collection, name_getter=name_getter,
+ default_collection=default_collection)
+
+ def collection_name_from_request():
+ """Return collection name from request arguments 'cc' or 'c'."""
+ collection = request.values.get('cc')
+ if collection is None and len(request.values.getlist('c')) == 1:
+ collection = request.values.get('c')
+ return collection
+
+ name_getter = name_getter or collection_name_from_request
+
+ @functools.wraps(method)
+ def decorated(*args, **kwargs):
+ uid = current_user.get_id()
+ name = name_getter()
+ if name:
+ g.collection = collection = Collection.query.filter(
+ Collection.name == name).first_or_404()
+ elif default_collection:
+ g.collection = collection = Collection.query.get_or_404(1)
+ else:
+ return abort(404)
+
+ if collection.is_restricted:
+ from invenio.modules.access.engine import acc_authorize_action
+ from invenio.modules.access.local_config import VIEWRESTRCOLL
+ (auth_code, auth_msg) = acc_authorize_action(
+ uid,
+ VIEWRESTRCOLL,
+ collection=collection.name
+ )
+ if auth_code:
+ flash(_('This collection is restricted.'), 'error')
+ if auth_code and current_user.is_guest:
+ return redirect(url_for('webaccount.login',
+ referer=request.url))
+ elif auth_code:
+ return abort(401)
+
+ return method(collection, *args, **kwargs)
+ return decorated
diff --git a/invenio/modules/search/admin_forms.py b/invenio/modules/collections/forms.py
similarity index 90%
rename from invenio/modules/search/admin_forms.py
rename to invenio/modules/collections/forms.py
index 0094508988..75d8a703d4 100644
--- a/invenio/modules/search/admin_forms.py
+++ b/invenio/modules/collections/forms.py
@@ -17,14 +17,14 @@
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-"""WebMessage Forms."""
+"""Collection form implementation."""
+
+from wtforms import TextField, HiddenField, SelectField, StringField
from invenio.base.i18n import _
-from invenio.modules.search.models import get_pbx_pos
+from invenio.modules.collections.models import get_pbx_pos
from invenio.utils.forms import InvenioBaseForm
-from wtforms import HiddenField, SelectField, StringField
-
class CollectionForm(InvenioBaseForm):
diff --git a/invenio/modules/collections/models.py b/invenio/modules/collections/models.py
new file mode 100644
index 0000000000..fe74aba9eb
--- /dev/null
+++ b/invenio/modules/collections/models.py
@@ -0,0 +1,732 @@
+# -*- coding: utf-8 -*-
+##
+## This file is part of Invenio.
+## Copyright (C) 2011, 2012, 2013, 2014, 2015 CERN.
+##
+## Invenio is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## Invenio is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Invenio; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+"""Database models for collections."""
+
+# General imports.
+import re
+
+from flask import g, url_for
+from intbitset import intbitset
+from operator import itemgetter
+from sqlalchemy.ext.associationproxy import association_proxy
+from sqlalchemy.ext.orderinglist import ordering_list
+from sqlalchemy.orm.collections import attribute_mapped_collection
+from werkzeug.utils import cached_property
+
+from invenio.base.globals import cfg
+from invenio.base.i18n import _, gettext_set_language
+from invenio.ext.sqlalchemy import db
+from invenio.ext.sqlalchemy.utils import (
+ attribute_multi_dict_collection,
+ IntbitsetPickle,
+ IntbitsetCmp,
+)
+
+# Create your models here.
+
+from invenio.modules.formatter.models import Format
+from invenio.modules.search.models import Field, Fieldvalue
+
+external_collection_mapper = attribute_multi_dict_collection(
+ creator=lambda k, v: CollectionExternalcollection(type=k,
+ externalcollection=v),
+ key_attr=lambda obj: obj.type,
+ val_attr=lambda obj: obj.externalcollection)
+
+
+class Collection(db.Model):
+
+ """Represent a Collection record."""
+
+ def __repr__(self):
+ return 'Collection '.format(self)
+
+ def __unicode__(self):
+ suffix = ' ({0})'.format(_('default')) if self.id == 1 else ''
+ return u"{0.id}. {0.name}{1}".format(self, suffix)
+
+ def __str__(self):
+ return unicode(self).encode('utf-8')
+
+ __tablename__ = 'collection'
+ id = db.Column(db.MediumInteger(9, unsigned=True),
+ primary_key=True)
+ name = db.Column(db.String(255), unique=True, index=True,
+ nullable=False)
+ dbquery = db.Column(db.Text(20), nullable=True,
+ index=True)
+
+ @property
+ def nbrecs(self):
+ """Number of records in the collection."""
+ from .cache import get_collection_nbrecs
+ return get_collection_nbrecs(self.name)
+
+ @property
+ def reclist(self):
+ """Return hit set with record identifiers."""
+ from .cache import get_collection_reclist
+ return get_collection_reclist(self.name)
+
+ @property
+ def is_hosted(self):
+ """Return True if collection is hosted elsewhere."""
+ return self.dbquery.startswith('hostedcollection:') if self.dbquery \
+ else False
+
+ _names = db.relationship(lambda: Collectionname,
+ backref='collection',
+ collection_class=attribute_mapped_collection(
+ 'ln_type'),
+ cascade="all, delete, delete-orphan")
+
+ names = association_proxy(
+ '_names', 'value',
+ creator=lambda k, v: Collectionname(ln_type=k, value=v)
+ )
+ _boxes = db.relationship(lambda: Collectionboxname,
+ backref='collection',
+ collection_class=attribute_mapped_collection(
+ 'ln_type'),
+ cascade="all, delete, delete-orphan")
+
+ boxes = association_proxy(
+ '_boxes', 'value',
+ creator=lambda k, v: Collectionboxname(ln_type=k, value=v)
+ )
+
+ _formatoptions = association_proxy('formats', 'format')
+
+ # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
+ def formatoptions(self):
+ if len(self._formatoptions):
+ return [dict(f) for f in self._formatoptions]
+ else:
+ return [{'code': u'hb',
+ 'name': _("HTML %(format)s", format=_("brief")),
+ 'content_type': u'text/html'}]
+
+ formatoptions = property(formatoptions)
+
+ _examples_example = association_proxy('_examples', 'example')
+
+ @property
+ # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
+ def examples(self):
+ return list(self._examples_example)
+
+ @property
+ def name_ln(self):
+ from invenio.legacy.search_engine import get_coll_i18nname
+ return get_coll_i18nname(self.name,
+ getattr(g, 'ln', cfg['CFG_SITE_LANG']))
+ # Another possible implementation with cache memoize
+ # @cache.memoize
+ # try:
+ # return db.object_session(self).query(Collectionname).\
+ # with_parent(self).filter(db.and_(Collectionname.ln==g.ln,
+ # Collectionname.type=='ln')).first().value
+ # except:
+ # return self.name
+
+ @property
+ # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
+ def portalboxes_ln(self):
+ return db.object_session(self).query(CollectionPortalbox).\
+ with_parent(self).\
+ options(db.joinedload_all(CollectionPortalbox.portalbox)).\
+ filter(CollectionPortalbox.ln == g.ln).\
+ order_by(db.desc(CollectionPortalbox.score)).all()
+
+ @property
+ def most_specific_dad(self):
+ results = sorted(
+ db.object_session(self).query(Collection).join(
+ Collection.sons
+ ).filter(CollectionCollection.id_son == self.id).all(),
+ key=lambda c: c.nbrecs)
+ return results[0] if len(results) else None
+
+ @property
+ # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
+ def is_restricted(self):
+ from invenio.legacy.search_engine import collection_restricted_p
+ return collection_restricted_p(self.name)
+
+ @property
+ def type(self):
+ p = re.compile("\d+:.*")
+ if self.dbquery is not None and \
+ p.match(self.dbquery.lower()):
+ return 'r'
+ else:
+ return 'v'
+
+ _collection_children = db.relationship(
+ lambda: CollectionCollection,
+ collection_class=ordering_list('score'),
+ primaryjoin=lambda: Collection.id == CollectionCollection.id_dad,
+ foreign_keys=lambda: CollectionCollection.id_dad,
+ order_by=lambda: db.asc(CollectionCollection.score)
+ )
+ _collection_children_r = db.relationship(
+ lambda: CollectionCollection,
+ collection_class=ordering_list('score'),
+ primaryjoin=lambda: db.and_(
+ Collection.id == CollectionCollection.id_dad,
+ CollectionCollection.type == 'r'),
+ foreign_keys=lambda: CollectionCollection.id_dad,
+ order_by=lambda: db.asc(CollectionCollection.score)
+ )
+ _collection_children_v = db.relationship(
+ lambda: CollectionCollection,
+ collection_class=ordering_list('score'),
+ primaryjoin=lambda: db.and_(
+ Collection.id == CollectionCollection.id_dad,
+ CollectionCollection.type == 'v'),
+ foreign_keys=lambda: CollectionCollection.id_dad,
+ order_by=lambda: db.asc(CollectionCollection.score)
+ )
+ collection_parents = db.relationship(
+ lambda: CollectionCollection,
+ collection_class=ordering_list('score'),
+ primaryjoin=lambda: Collection.id == CollectionCollection.id_son,
+ foreign_keys=lambda: CollectionCollection.id_son,
+ order_by=lambda: db.asc(CollectionCollection.score)
+ )
+ collection_children = association_proxy('_collection_children', 'son')
+ collection_children_r = association_proxy(
+ '_collection_children_r', 'son',
+ creator=lambda son: CollectionCollection(id_son=son.id, type='r')
+ )
+ collection_children_v = association_proxy(
+ '_collection_children_v', 'son',
+ creator=lambda son: CollectionCollection(id_son=son.id, type='v')
+ )
+
+ _externalcollections = db.relationship(
+ lambda: CollectionExternalcollection,
+ cascade="all, delete, delete-orphan"
+ )
+
+ def _externalcollections_type(type):
+ return association_proxy(
+ '_externalcollections_' + str(type),
+ 'externalcollection',
+ creator=lambda ext: CollectionExternalcollection(
+ externalcollection=ext, type=type))
+
+ externalcollections_0 = _externalcollections_type(0)
+ externalcollections_1 = _externalcollections_type(1)
+ externalcollections_2 = _externalcollections_type(2)
+
+ externalcollections = db.relationship(
+ lambda: CollectionExternalcollection,
+ collection_class=external_collection_mapper,
+ cascade="all, delete, delete-orphan"
+ )
+
+ # Search options
+ _make_field_fieldvalue = lambda type: db.relationship(
+ lambda: CollectionFieldFieldvalue,
+ primaryjoin=lambda: db.and_(
+ Collection.id == CollectionFieldFieldvalue.id_collection,
+ CollectionFieldFieldvalue.type == type),
+ order_by=lambda: CollectionFieldFieldvalue.score)
+
+ _search_within = _make_field_fieldvalue('sew')
+ _search_options = _make_field_fieldvalue('seo')
+
+ @property
+ # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
+ def search_within(self):
+ """
+ Collect search within options.
+ """
+ default = [('', _('any field'))]
+ found = [(o.field.code, o.field.name_ln) for o in self._search_within]
+ if not found:
+ found = [(f.name.replace(' ', ''), f.name_ln)
+ for f in Field.query.filter(Field.name.in_(
+ cfg['CFG_WEBSEARCH_SEARCH_WITHIN'])).all()]
+ return default + sorted(found, key=itemgetter(1))
+
+ @property
+ # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
+ def search_options(self):
+ return self._search_options
+
+ @cached_property
+ def ancestors(self):
+ """Get list of parent collection ids."""
+ output = set([self])
+ for c in self.dads:
+ output |= c.dad.ancestors
+ return output
+
+ @cached_property
+ def ancestors_ids(self):
+ """Get list of parent collection ids."""
+ output = intbitset([self.id])
+ for c in self.dads:
+ ancestors = c.dad.ancestors_ids
+ if self.id in ancestors:
+ raise
+ output |= ancestors
+ return output
+
+ @cached_property
+ def descendants_ids(self):
+ """Get list of child collection ids."""
+ output = intbitset([self.id])
+ for c in self.sons:
+ descendants = c.son.descendants_ids
+ if self.id in descendants:
+ raise
+ output |= descendants
+ return output
+
+ # Gets the list of localized names as an array
+ collection_names = db.relationship(
+ lambda: Collectionname,
+ primaryjoin=lambda: Collection.id == Collectionname.id_collection,
+ foreign_keys=lambda: Collectionname.id_collection
+ )
+
+ def translation(self, lang):
+ """Get the translation according to the language code."""
+ try:
+ return db.object_session(self).query(Collectionname).\
+ with_parent(self).filter(db.and_(
+ Collectionname.ln == lang,
+ Collectionname.type == 'ln'
+ )).first().value
+ except:
+ return ""
+
+ @property
+ def sort_methods(self):
+ """Get sort methods for collection.
+
+ If not sort methods are defined for a collection the root collections
+ sort methods are retuned. If not methods are defined for the root
+ collection, all possible sort methods are returned.
+
+ Note: Noth sorting methods and ranking methods are now defined via
+ the sorter.
+ """
+ from invenio.modules.sorter.models import BsrMETHOD, \
+ Collection_bsrMETHOD
+
+ get_method = lambda obj: obj.bsrMETHOD
+
+ for coll_id in (self.id, 1):
+ methods = Collection_bsrMETHOD.query.filter_by(
+ id_collection=coll_id
+ ).order_by(
+ Collection_bsrMETHOD.score
+ ).options(
+ db.joinedload(Collection_bsrMETHOD.bsrMETHOD)
+ ).all()
+
+ if len(methods) > 0:
+ return map(get_method, methods)
+
+ return BsrMETHOD.query.order_by(BsrMETHOD.name).all()
+
+ def get_collectionbox_name(self, ln=None, box_type="r"):
+ """Return collection-specific labelling subtrees.
+
+ - 'Focus on': regular collection
+ - 'Narrow by': virtual collection
+ - 'Latest addition': boxes
+
+ If translation for given language does not exist, use label
+ for CFG_SITE_LANG. If no custom label is defined for
+ CFG_SITE_LANG, return default label for the box.
+
+ :param ln: the language of the label
+ :param box_type: can be 'r' (=Narrow by), 'v' (=Focus on),
+ 'l' (=Latest additions)
+ """
+ if ln is None:
+ ln = g.ln
+ collectionboxnamequery = db.object_session(self).query(
+ Collectionboxname).with_parent(self)
+ try:
+ collectionboxname = collectionboxnamequery.filter(db.and_(
+ Collectionboxname.ln == ln,
+ Collectionboxname.type == box_type,
+ )).one()
+ except:
+ try:
+ collectionboxname = collectionboxnamequery.filter(db.and_(
+ Collectionboxname.ln == ln,
+ Collectionboxname.type == box_type,
+ )).one()
+ except:
+ collectionboxname = None
+
+ if collectionboxname is None:
+ # load the right message language
+ _ = gettext_set_language(ln)
+ return _(Collectionboxname.TYPES.get(box_type, ''))
+ else:
+ return collectionboxname.value
+
+ portal_boxes_ln = db.relationship(
+ lambda: CollectionPortalbox,
+ collection_class=ordering_list('score'),
+ primaryjoin=lambda:
+ Collection.id == CollectionPortalbox.id_collection,
+ foreign_keys=lambda: CollectionPortalbox.id_collection,
+ order_by=lambda: db.asc(CollectionPortalbox.score))
+
+ def breadcrumbs(self, builder=None, ln=None):
+ """Return breadcrumbs for collection."""
+ ln = cfg.get('CFG_SITE_LANG') if ln is None else ln
+ breadcrumbs = []
+ # Get breadcrumbs for most specific dad if it exists.
+ if self.most_specific_dad is not None:
+ breadcrumbs = self.most_specific_dad.breadcrumbs(builder=builder,
+ ln=ln)
+
+ if builder is not None:
+ crumb = builder(self)
+ else:
+ crumb = dict(
+ text=self.name_ln,
+ url=url_for('collections.collection', name=self.name))
+
+ breadcrumbs.append(crumb)
+ return breadcrumbs
+
+
+class Collectionname(db.Model):
+
+ """Represent a Collectionname record."""
+
+ __tablename__ = 'collectionname'
+
+ id_collection = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Collection.id),
+ nullable=False, primary_key=True)
+ ln = db.Column(db.Char(5), nullable=False, primary_key=True,
+ server_default='')
+ type = db.Column(db.Char(3), nullable=False, primary_key=True,
+ server_default='sn')
+ value = db.Column(db.String(255), nullable=False)
+
+ @db.hybrid_property
+ def ln_type(self):
+ return (self.ln, self.type)
+
+ @ln_type.setter
+ def set_ln_type(self, value):
+ (self.ln, self.type) = value
+
+
+class Collectionboxname(db.Model):
+
+ """Represent a Collectionboxname record."""
+
+ __tablename__ = 'collectionboxname'
+
+ TYPES = {
+ 'v': 'Focus on:',
+ 'r': 'Narrow by collection:',
+ 'l': 'Latest additions:',
+ }
+
+ id_collection = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Collection.id),
+ nullable=False, primary_key=True)
+ ln = db.Column(db.Char(5), nullable=False, primary_key=True,
+ server_default='')
+ type = db.Column(db.Char(3), nullable=False, primary_key=True,
+ server_default='r')
+ value = db.Column(db.String(255), nullable=False)
+
+ @db.hybrid_property
+ def ln_type(self):
+ return (self.ln, self.type)
+
+ @ln_type.setter
+ def set_ln_type(self, value):
+ (self.ln, self.type) = value
+
+
+class Collectiondetailedrecordpagetabs(db.Model):
+
+ """Represent a Collectiondetailedrecordpagetabs record."""
+ __tablename__ = 'collectiondetailedrecordpagetabs'
+ id_collection = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Collection.id),
+ nullable=False, primary_key=True)
+ tabs = db.Column(db.String(255), nullable=False,
+ server_default='')
+ collection = db.relationship(Collection,
+ backref='collectiondetailedrecordpagetabs')
+
+
+class CollectionCollection(db.Model):
+
+ """Represent a CollectionCollection record."""
+ __tablename__ = 'collection_collection'
+ id_dad = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Collection.id), primary_key=True)
+ id_son = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Collection.id), primary_key=True)
+ type = db.Column(db.Char(1), nullable=False,
+ server_default='r')
+ score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False,
+ server_default='0')
+ son = db.relationship(Collection, primaryjoin=id_son == Collection.id,
+ backref='dads',
+ # FIX
+ # collection_class=db.attribute_mapped_collection('score'),
+ order_by=db.asc(score))
+ dad = db.relationship(Collection, primaryjoin=id_dad == Collection.id,
+ backref='sons', order_by=db.asc(score))
+
+
+class Example(db.Model):
+
+ """Represent a Example record."""
+ __tablename__ = 'example'
+ id = db.Column(db.MediumInteger(9, unsigned=True), primary_key=True,
+ autoincrement=True)
+ type = db.Column(db.Text, nullable=False)
+ body = db.Column(db.Text, nullable=False)
+
+
+class CollectionExample(db.Model):
+
+ """Represent a CollectionExample record."""
+ __tablename__ = 'collection_example'
+ id_collection = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Collection.id), primary_key=True)
+ id_example = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Example.id), primary_key=True)
+ score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False,
+ server_default='0')
+ collection = db.relationship(Collection, backref='_examples',
+ order_by=score)
+ example = db.relationship(Example, backref='collections', order_by=score)
+
+
+class Portalbox(db.Model):
+
+ """Represent a Portalbox record."""
+ __tablename__ = 'portalbox'
+ id = db.Column(db.MediumInteger(9, unsigned=True), autoincrement=True,
+ primary_key=True)
+ title = db.Column(db.Text, nullable=False)
+ body = db.Column(db.Text, nullable=False)
+
+
+def get_pbx_pos():
+ """Returns a list of all the positions for a portalbox"""
+
+ position = {}
+ position["rt"] = "Right Top"
+ position["lt"] = "Left Top"
+ position["te"] = "Title Epilog"
+ position["tp"] = "Title Prolog"
+ position["ne"] = "Narrow by coll epilog"
+ position["np"] = "Narrow by coll prolog"
+ return position
+
+
+class CollectionPortalbox(db.Model):
+
+ """Represent a CollectionPortalbox record."""
+ __tablename__ = 'collection_portalbox'
+ id_collection = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Collection.id), primary_key=True)
+ id_portalbox = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Portalbox.id), primary_key=True)
+ ln = db.Column(db.Char(5), primary_key=True, server_default='',
+ nullable=False)
+ position = db.Column(db.Char(3), nullable=False,
+ server_default='top')
+ score = db.Column(db.TinyInteger(4, unsigned=True),
+ nullable=False,
+ server_default='0')
+ collection = db.relationship(Collection, backref='portalboxes',
+ order_by=score)
+ portalbox = db.relationship(Portalbox, backref='collections',
+ order_by=score)
+
+
+class Externalcollection(db.Model):
+
+ """Represent a Externalcollection record."""
+ __tablename__ = 'externalcollection'
+ id = db.Column(db.MediumInteger(9, unsigned=True),
+ primary_key=True)
+ name = db.Column(db.String(255), unique=True, nullable=False,
+ server_default='')
+
+ @property
+ def engine(self):
+ from invenio.legacy.websearch_external_collections.searcher import (
+ external_collections_dictionary
+ )
+ if self.name in external_collections_dictionary:
+ return external_collections_dictionary[self.name]
+
+
+class CollectionExternalcollection(db.Model):
+
+ """Represent a CollectionExternalcollection record."""
+ __tablename__ = 'collection_externalcollection'
+ id_collection = db.Column(db.MediumInteger(9,
+ unsigned=True),
+ db.ForeignKey(Collection.id), primary_key=True,
+ server_default='0')
+ id_externalcollection = db.Column(db.MediumInteger(9,
+ unsigned=True),
+ db.ForeignKey(Externalcollection.id),
+ primary_key=True,
+ server_default='0')
+ type = db.Column(db.TinyInteger(4, unsigned=True),
+ server_default='0',
+ nullable=False)
+
+ def _collection_type(type_):
+ return db.relationship(
+ Collection,
+ primaryjoin=lambda: db.and_(
+ CollectionExternalcollection.id_collection == Collection.id,
+ CollectionExternalcollection.type == type_),
+ backref='_externalcollections_{0}'.format(str(type_))
+ )
+ collection_0 = _collection_type(0)
+ collection_1 = _collection_type(1)
+ collection_2 = _collection_type(2)
+
+ externalcollection = db.relationship(Externalcollection)
+
+
+class CollectionFormat(db.Model):
+
+ """Represent a CollectionFormat record."""
+ __tablename__ = 'collection_format'
+ id_collection = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Collection.id), primary_key=True)
+ id_format = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Format.id), primary_key=True)
+ score = db.Column(db.TinyInteger(4, unsigned=True),
+ nullable=False, server_default='0')
+ collection = db.relationship(Collection, backref='formats',
+ order_by=db.desc(score))
+ format = db.relationship(Format, backref='collections',
+ order_by=db.desc(score))
+
+
+class CollectionFieldFieldvalue(db.Model):
+
+ """Represent a CollectionFieldFieldvalue record."""
+
+ __tablename__ = 'collection_field_fieldvalue'
+ id_collection = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Collection.id),
+ primary_key=True, nullable=False)
+ id_field = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Field.id), primary_key=True,
+ nullable=False)
+ id_fieldvalue = db.Column(db.MediumInteger(9, unsigned=True),
+ db.ForeignKey(Fieldvalue.id), primary_key=True,
+ nullable=True)
+ type = db.Column(db.Char(3), nullable=False,
+ server_default='src')
+ score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False,
+ server_default='0')
+ score_fieldvalue = db.Column(db.TinyInteger(4, unsigned=True),
+ nullable=False, server_default='0')
+
+ collection = db.relationship(Collection, backref='field_fieldvalues',
+ order_by=score)
+ field = db.relationship(Field, backref='collection_fieldvalues',
+ lazy='joined')
+ fieldvalue = db.relationship(Fieldvalue, backref='collection_fields',
+ lazy='joined')
+
+
+class FacetCollection(db.Model):
+
+ """Facet configuration for collection."""
+
+ __tablename__ = 'facet_collection'
+
+ id = db.Column(db.Integer, primary_key=True)
+ id_collection = db.Column(db.Integer, db.ForeignKey(Collection.id))
+ order = db.Column(db.Integer)
+ facet_name = db.Column(db.String(80))
+
+ collection = db.relationship(Collection, backref='facets')
+
+ def __repr__(self):
+ return ('FacetCollection '.format(self))
+
+ @classmethod
+ def is_place_taken(cls, id_collection, order):
+ """Check if there is already a facet on the given position.
+
+ .. note:: This works well as a pre-check, however saving can still fail
+ if somebody else creates the same record in other session
+ (phantom reads).
+ """
+ return bool(cls.query.filter(
+ cls.id_collection == id_collection,
+ cls.order == order).count())
+
+ @classmethod
+ def is_duplicated(cls, id_collection, facet_name):
+ """Check if the given facet is already assigned to this collection.
+
+ .. note:: This works well as a pre-check, however saving can still fail
+ if somebody else creates the same record in other session
+ (phantom reads).
+ """
+ return bool(cls.query.filter(
+ cls.id_collection == id_collection,
+ cls.facet_name == facet_name).count())
+
+
+__all__ = (
+ 'Collection',
+ 'Collectionname',
+ 'Collectiondetailedrecordpagetabs',
+ 'CollectionCollection',
+ 'Example',
+ 'CollectionExample',
+ 'Portalbox',
+ 'CollectionPortalbox',
+ 'Externalcollection',
+ 'CollectionExternalcollection',
+ 'CollectionFormat',
+ 'CollectionFieldFieldvalue',
+ 'FacetCollection',
+)
diff --git a/invenio/modules/collections/recordext/__init__.py b/invenio/modules/collections/recordext/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/invenio/modules/collections/recordext/fields/collections.cfg b/invenio/modules/collections/recordext/fields/collections.cfg
new file mode 100644
index 0000000000..df0ad1baf5
--- /dev/null
+++ b/invenio/modules/collections/recordext/fields/collections.cfg
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+##
+## This file is part of Invenio.
+## Copyright (C) 2015 CERN.
+##
+## Invenio is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## Invenio is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Invenio; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+_collections:
+ """List of static and dynamic collections to which record belongs to."""
+ calculated:
+ @memoize()
+ get_record_collections(self)
diff --git a/invenio/modules/collections/recordext/functions/__init__.py b/invenio/modules/collections/recordext/functions/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/invenio/modules/collections/recordext/functions/get_record_collections.py b/invenio/modules/collections/recordext/functions/get_record_collections.py
new file mode 100644
index 0000000000..7419c6fd18
--- /dev/null
+++ b/invenio/modules/collections/recordext/functions/get_record_collections.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+##
+## This file is part of Invenio.
+## Copyright (C) 2015 CERN.
+##
+## Invenio is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## Invenio is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Invenio; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+"""Record field function."""
+
+from six import iteritems
+
+from invenio.modules.search.api import SearchEngine
+from invenio.utils.datastructures import LazyDict
+
+COLLECTIONS_DELETED_RECORDS = '{dbquery} -980:"DELETED"'
+
+
+def _queries():
+ """Preprocess collection queries."""
+ from invenio.ext.sqlalchemy import db
+ from invenio.modules.collections.models import Collection
+ return dict(
+ (collection.name, dict(
+ query=SearchEngine(COLLECTIONS_DELETED_RECORDS.format(
+ dbquery=collection.dbquery)
+ ),
+ ancestors=set(c.name for c in collection.ancestors
+ if c.dbquery is None)
+ ))
+ for collection in Collection.query.filter(
+ Collection.dbquery != None,
+ db.not_(Collection.dbquery.like('hostedcollection:%'))
+ ).all()
+ )
+
+queries = LazyDict(_queries)
+
+
+def get_record_collections(record):
+ """Return list of collections to which record belongs to.
+
+ :record: Record instance
+ :returns: list of collection names
+ """
+ output = set()
+ for name, data in iteritems(queries):
+ if data['query'].match(record):
+ output.add(name)
+ output |= data['ancestors']
+ return list(output)
diff --git a/invenio/modules/collections/searchext/__init__.py b/invenio/modules/collections/searchext/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/invenio/modules/collections/searchext/units/__init__.py b/invenio/modules/collections/searchext/units/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/invenio/modules/search/searchext/units/collection.py b/invenio/modules/collections/searchext/units/collection.py
similarity index 100%
rename from invenio/modules/search/searchext/units/collection.py
rename to invenio/modules/collections/searchext/units/collection.py
diff --git a/invenio/modules/collections/views/__init__.py b/invenio/modules/collections/views/__init__.py
new file mode 100644
index 0000000000..0d11fce2af
--- /dev/null
+++ b/invenio/modules/collections/views/__init__.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+##
+## This file is part of Invenio.
+## Copyright (C) 2015 CERN.
+##
+## Invenio is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## Invenio is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Invenio; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 331, Boston, MA 02111-1307, USA.
+
+"""Collection view definitions."""
+
+from .collections import blueprint as collections_blueprint
+from .admin import blueprint as admin_blueprint
+
+blueprints = [collections_blueprint, admin_blueprint]
diff --git a/invenio/modules/search/views/admin.py b/invenio/modules/collections/views/admin.py
similarity index 76%
rename from invenio/modules/search/views/admin.py
rename to invenio/modules/collections/views/admin.py
index 43632bfb2c..c8faebc3ee 100644
--- a/invenio/modules/search/views/admin.py
+++ b/invenio/modules/collections/views/admin.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
-## Copyright (C) 2012, 2014 CERN.
+## Copyright (C) 2012, 2014, 2015 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
@@ -17,41 +17,38 @@
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-"""WebSearch Admin Flask Blueprint."""
-
-from __future__ import print_function
+"""Admin interface for collections."""
from flask import Blueprint, g, render_template, request, flash, redirect, \
url_for, abort
from flask.ext.breadcrumbs import register_breadcrumb
-from invenio.ext.sqlalchemy import db
-from ..models import Collection, CollectionCollection, \
- Collectionname, CollectionPortalbox, Portalbox
-from invenio.base.i18n import _
-from invenio.base.decorators import templated
from flask.ext.login import current_user, login_required
+
+from invenio.base.decorators import templated
+from invenio.base.i18n import _, language_list_long
from invenio.ext.principal import permission_required
-from invenio.base.i18n import language_list_long
+from invenio.ext.sqlalchemy import db
-# imports the necessary forms
-from ..admin_forms import CollectionForm, TranslationsForm
+from ..forms import CollectionForm, TranslationsForm
+from ..models import (
+ Collection, CollectionCollection,
+ Collectionname, CollectionPortalbox, Portalbox
+)
not_guest = lambda: not current_user.is_guest
-blueprint = Blueprint('websearch_admin', __name__,
- url_prefix="/admin/websearch",
+blueprint = Blueprint('collections_admin', __name__,
+ url_prefix="/admin/collections",
template_folder='../templates'
)
-#breadcrumbs=[(_('Configure WebSearch'), 'websearch_admin.index')])
-
@blueprint.route('/', methods=['GET', 'POST'])
@blueprint.route('/index', methods=['GET', 'POST'])
@login_required
@permission_required('cfgwebsearch')
@templated('search/admin_index.html')
-@register_breadcrumb(blueprint, 'admin.websearch_admin', _('WebSearch'))
+@register_breadcrumb(blueprint, 'admin.collections_admin', _('WebSearch'))
def index():
"""WebSearch admin interface with editable collection tree."""
collection = Collection.query.get_or_404(1)
@@ -70,17 +67,13 @@ def index():
@login_required
@permission_required('cfgwebsearch')
def modifycollectiontree():
- """
- Handler of the tree changing operations triggered by the drag and drop operation.
- """
+ """Handler for the tree changing operations triggered by the drag&drop."""
# Get the requests parameters
id_son = request.form.get('id_son', 0, type=int)
id_dad = request.form.get('id_dad', 0, type=int)
id_new_dad = request.form.get('id_new_dad', 0, type=int)
score = request.form.get('score', 0, type=int)
- #if id_dad == id_new_dad:
- # score = score + 1
- type = request.form.get('type', 'r')
+ type_ = request.form.get('type', 'r')
# Check if collection exits.
Collection.query.get_or_404(id_son)
@@ -99,7 +92,7 @@ def modifycollectiontree():
cc = CollectionCollection(
id_dad=id_new_dad,
id_son=id_son,
- type=type)
+ type=type_)
db.session.add(cc)
if id_new_dad == 0:
@@ -110,18 +103,16 @@ def modifycollectiontree():
try:
descendants = Collection.query.get(id_son).descendants_ids
ancestors = new_dad.ancestors_ids
- print(descendants, ancestors)
if descendants & ancestors:
raise
- except:
- ## Cycle has been detected.
+ except Exception:
+ # Cycle has been detected.
db.session.rollback()
abort(406)
new_dad._collection_children.reorder()
new_dad._collection_children.insert(score, cc)
- #FIXME add dbrecs rebuild for modified trees.
-
+ # FIXME add dbrecs rebuild for modified trees.
db.session.commit()
return 'done'
@@ -144,7 +135,9 @@ def managecollectiontree():
@login_required
@permission_required('cfgwebsearch')
def manage_collection(name):
- collection = Collection.query.filter(Collection.name == name).first_or_404()
+ """Manage collection."""
+ collection = Collection.query.filter(
+ Collection.name == name).first_or_404()
form = CollectionForm(request.form, obj=collection)
# gets the collections translations
@@ -154,23 +147,19 @@ def manage_collection(name):
TranslationsFormFilled = TranslationsForm(language_list_long(),
translations)
translation_form = TranslationsFormFilled(request.form)
- #for x in collection.collection_names:
- # translation_form[x.ln](default = x.value)
-
- #translation_form.populate_obj(translations)
return render_template('search/admin_collection.html',
collection=collection, form=form,
translation_form=translation_form)
-@blueprint.route('/collection/update/', methods=['POST'])
+@blueprint.route('/collection/update/', methods=['POST'])
@login_required
@permission_required('cfgwebsearch')
-def update(id):
+def update(id_collection):
form = CollectionForm(request.form)
if request.method == 'POST': # and form.validate():
- collection = Collection.query.filter(Collection.id == id).first_or_404()
+ collection = Collection.query.get_or_404(id_collection)
form.populate_obj(collection)
db.session.commit()
flash(_('Collection was updated'), "info")
@@ -179,7 +168,8 @@ def update(id):
@blueprint.route('/collection/new', methods=['GET', 'POST'])
@blueprint.route('/collection/add', methods=['GET', 'POST'])
-#@login_required
+@login_required
+@permission_required('cfgwebsearch')
@templated('search/admin_collection.html')
def create_collection():
form = CollectionForm()
@@ -189,16 +179,16 @@ def create_collection():
@blueprint.route('/collection/update_translations', methods=['POST'])
@login_required
@permission_required('cfgwebsearch')
-#@login_required
def update_translations(id):
"""Update translations if the value is altered or not void."""
collection = Collection.query.filter(Collection.id == id).first_or_404()
for (lang, lang_long) in language_list_long():
-
- collection_name = Collectionname.query.filter(
- db.and_(Collectionname.id_collection == id,
- Collectionname.ln == lang, Collectionname.type == 'ln')).first()
+ collection_name = Collectionname.query.filter(db.and_(
+ Collectionname.id_collection == id,
+ Collectionname.ln == lang,
+ Collectionname.type == 'ln'
+ )).first()
if collection_name:
if collection_name.value != request.form.get(lang):
@@ -215,30 +205,32 @@ def update_translations(id):
return redirect(url_for('.manage_collection', name=collection.name))
-@blueprint.route('/collection/manage_portalboxes_order', methods=['GET', 'POST'])
-#@login_required
+@blueprint.route('/collection/manage_portalboxes_order',
+ methods=['GET', 'POST'])
+@login_required
+@permission_required('cfgwebsearch')
def manage_portalboxes_order():
+ """Manage order of portalboxes."""
id_p = request.args.get('id', 0, type=int)
- collection_id = request.args.get('id_collection', 0, type=int)
+ id_collection = request.args.get('id_collection', 0, type=int)
order = request.args.get('score', 0, type=int)
- collection = Collection.query.filter(Collection.id == collection_id).first_or_404()
+ collection = Collection.query.filter(
+ Collection.id == id_collection).first_or_404()
portalbox = \
CollectionPortalbox.query.filter(db.and_(
CollectionPortalbox.id_portalbox == id_p,
- CollectionPortalbox.id_collection == collection_id)).first_or_404()
+ CollectionPortalbox.id_collection == id_collection)).first_or_404()
position = portalbox.position
p_order = portalbox.score
db.session.delete(portalbox)
-
- #p = portalboxes.pop(portalbox)
- collection.portal_boxes_ln.set(CollectionPortalbox(collection_id,
- id_p,
- g.ln, position,
- p_order), order)
+ collection.portal_boxes_ln.set(
+ CollectionPortalbox(id_collection, id_p, g.ln, position, p_order),
+ order
+ )
db.session.commit()
return ''
diff --git a/invenio/modules/collections/views/collections.py b/invenio/modules/collections/views/collections.py
new file mode 100644
index 0000000000..d6c78c9ee0
--- /dev/null
+++ b/invenio/modules/collections/views/collections.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+##
+## This file is part of Invenio.
+## Copyright (C) 2015 CERN.
+##
+## Invenio is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## Invenio is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Invenio; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+
+import warnings
+
+from flask import (Blueprint, request, redirect, url_for, render_template,
+ current_app, g)
+from flask.ext.breadcrumbs import \
+ register_breadcrumb, current_breadcrumbs, default_breadcrumb_root
+from flask.ext.menu import register_menu
+
+from invenio.base.i18n import _
+from invenio.base.decorators import wash_arguments, templated
+from invenio.ext.template.context_processor import \
+ register_template_context_processor
+from invenio.modules.formatter import format_record
+from invenio.modules.search.forms import EasySearchForm
+from invenio.utils.text import slugify
+
+from ..models import Collection
+
+blueprint = Blueprint('collections', __name__, url_prefix='',
+ template_folder='../templates',
+ static_url_path='', # static url path has to be empty
+ # if url_prefix is empty
+ static_folder='../static')
+
+default_breadcrumb_root(blueprint, '.')
+
+
+@blueprint.route('/index.html', methods=['GET', 'POST'])
+@blueprint.route('/index.py', methods=['GET', 'POST'])
+@blueprint.route('/', methods=['GET', 'POST'])
+@templated('search/index.html')
+@register_menu(blueprint, 'main.collection', _('Search'), order=1)
+@register_breadcrumb(blueprint, '.', _('Home'))
+def index():
+ """Render the homepage."""
+ # legacy app support
+ c = request.values.get('c')
+ if c:
+ warnings.warn("'c' argument for this url has been deprecated",
+ PendingDeprecationWarning)
+ if c == current_app.config['CFG_SITE_NAME']:
+ return redirect(url_for('.index', ln=g.ln))
+ elif c is not None:
+ return redirect(url_for('.collection', name=c, ln=g.ln))
+
+ collection = Collection.query.get_or_404(1)
+
+ @register_template_context_processor
+ def index_context():
+ return dict(
+ of=request.values.get('of', collection.formatoptions[0]['code']),
+ easy_search_form=EasySearchForm(csrf_enabled=False),
+ format_record=format_record,
+ )
+ return dict(collection=collection)
+
+
+@blueprint.route('/collection/', methods=['GET', 'POST'])
+@blueprint.route('/collection/', methods=['GET', 'POST'])
+def collection(name=None):
+ """Render the collection page.
+
+ It renders it either with a collection specific template (aka
+ collection_{collection_name}.html) or with the default collection
+ template (collection.html)
+ """
+ if name is None:
+ return redirect('.collection',
+ name=current_app.config['CFG_SITE_NAME'])
+ collection = Collection.query.filter(Collection.name == name) \
+ .first_or_404()
+
+ @register_template_context_processor
+ def index_context():
+ breadcrumbs = current_breadcrumbs + collection.breadcrumbs(ln=g.ln)[1:]
+ return dict(
+ of=request.values.get('of', collection.formatoptions[0]['code']),
+ format_record=format_record,
+ easy_search_form=EasySearchForm(csrf_enabled=False),
+ breadcrumbs=breadcrumbs)
+
+ return render_template([
+ 'search/collection_{0}.html'.format(collection.id),
+ 'search/collection_{0}.html'.format(slugify(name, '_')),
+ 'search/collection.html'], collection=collection)
diff --git a/invenio/modules/comments/api.py b/invenio/modules/comments/api.py
index 8495af7a7e..51012b0e30 100644
--- a/invenio/modules/comments/api.py
+++ b/invenio/modules/comments/api.py
@@ -72,7 +72,7 @@
from invenio.legacy.search_engine import \
guess_primary_collection_of_a_record, \
check_user_can_view_record
-from invenio.modules.search.cache import get_collection_reclist
+from invenio.modules.collections.cache import get_collection_reclist
from invenio.legacy.bibrecord import get_fieldvalues
from invenio.utils.htmlwasher import EmailWasher
try:
diff --git a/invenio/modules/communities/models.py b/invenio/modules/communities/models.py
index 94d5e49363..fa1d29609e 100644
--- a/invenio/modules/communities/models.py
+++ b/invenio/modules/communities/models.py
@@ -62,16 +62,16 @@
before_delete_collection, after_delete_collection, \
before_delete_collections, after_delete_collections, \
pre_curation, post_curation
-from invenio.modules.records.api import get_record
-from invenio.modules.search.models import \
+from invenio.modules.collections.models import \
Collection, \
CollectionCollection, \
CollectionFormat, \
CollectionPortalbox, \
Collectiondetailedrecordpagetabs, \
Collectionname, \
- Format, \
Portalbox
+from invenio.modules.formatter.models import Format
+from invenio.modules.records.api import get_record
from invenio.modules.oaiharvester.models import OaiREPOSITORY
diff --git a/invenio/modules/communities/tasks.py b/invenio/modules/communities/tasks.py
index dac3751e15..8f2c7121fb 100644
--- a/invenio/modules/communities/tasks.py
+++ b/invenio/modules/communities/tasks.py
@@ -21,7 +21,7 @@
from datetime import datetime
from invenio.ext.sqlalchemy import db
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
from celery.task.base import PeriodicTask
from .models import Community
diff --git a/invenio/modules/communities/testsuite/test_communities.py b/invenio/modules/communities/testsuite/test_communities.py
index 7cd3dee1d3..449de656c1 100644
--- a/invenio/modules/communities/testsuite/test_communities.py
+++ b/invenio/modules/communities/testsuite/test_communities.py
@@ -38,7 +38,7 @@
COMMUNITIES_OUTPUTFORMAT_PROVISIONAL
Community = lazy_import('invenio.modules.communities.models:Community')
-Collection = lazy_import('invenio.modules.search.models:Collection')
+Collection = lazy_import('invenio.modules.collections.models:Collection')
calculate_rank_for_community = lazy_import('invenio.modules.communities.tasks:calculate_rank_for_community')
class CommunityModelTest(InvenioTestCase):
diff --git a/invenio/modules/formatter/__init__.py b/invenio/modules/formatter/__init__.py
index c2af21ab0c..c3d21d19ac 100644
--- a/invenio/modules/formatter/__init__.py
+++ b/invenio/modules/formatter/__init__.py
@@ -431,10 +431,10 @@ def print_records(recIDs, of='hb', ln=None, verbose=0,
from flask import request
from invenio.base.i18n import wash_language
from invenio.ext.template import render_template_to_string
- from invenio.modules.search.models import Format
from invenio.utils.pagination import Pagination
- from invenio.modules.formatter.engine import \
- TEMPLATE_CONTEXT_FUNCTIONS_CACHE
+
+ from .engine import TEMPLATE_CONTEXT_FUNCTIONS_CACHE
+ from .models import Format
of = of.lower()
jrec = request.values.get('jrec', ctx.get('jrec', 1), type=int)
diff --git a/invenio/modules/formatter/templates/format/records/xr.tpl b/invenio/modules/formatter/templates/format/records/xr.tpl
index cf418a22df..41fba555f0 100644
--- a/invenio/modules/formatter/templates/format/records/xr.tpl
+++ b/invenio/modules/formatter/templates/format/records/xr.tpl
@@ -30,7 +30,7 @@
: {{ collection.name|e }}
{%- endif -%}
- {{ config.CFG_SITE_URL if is_root_collection else url_for('search.collection', name=collection.name, _external=True) }}
+ {{ config.CFG_SITE_URL if is_root_collection else url_for('collections.collection', name=collection.name, _external=True) }}
{{ config.CFG_SITE_NAME|e }}{{ _('latest documents') }}
{%- if not is_root_collection -%}
{{ ' ' }}in {{ collection.name|e }}
diff --git a/invenio/modules/knowledge/admin.py b/invenio/modules/knowledge/admin.py
index cdaca3f39e..1b3dc19025 100644
--- a/invenio/modules/knowledge/admin.py
+++ b/invenio/modules/knowledge/admin.py
@@ -26,7 +26,7 @@
from invenio.base.i18n import _
from invenio.ext.admin.views import ModelView
from invenio.ext.sqlalchemy import db
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
from .forms import DynamicKnowledgeForm, KnowledgeForm, KnwKBRVALForm, \
TaxonomyKnowledgeForm, WrittenAsKnowledgeForm
diff --git a/invenio/modules/knowledge/api.py b/invenio/modules/knowledge/api.py
index da0fba1f1a..f1dc092fcf 100644
--- a/invenio/modules/knowledge/api.py
+++ b/invenio/modules/knowledge/api.py
@@ -28,7 +28,7 @@
from invenio.base.i18n import _
from invenio.ext.sqlalchemy import db
from invenio.ext.sqlalchemy.utils import session_manager
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
from invenio.utils.memoise import Memoise
from sqlalchemy.exc import IntegrityError
diff --git a/invenio/modules/knowledge/forms.py b/invenio/modules/knowledge/forms.py
index d04bfa849a..38d0ebd1bc 100644
--- a/invenio/modules/knowledge/forms.py
+++ b/invenio/modules/knowledge/forms.py
@@ -20,7 +20,7 @@
"""Knowledge Forms."""
from invenio.base.i18n import _
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
from invenio.utils.forms import InvenioBaseForm
from werkzeug.local import LocalProxy
diff --git a/invenio/modules/knowledge/models.py b/invenio/modules/knowledge/models.py
index 69e6ccf678..4447b2ed50 100644
--- a/invenio/modules/knowledge/models.py
+++ b/invenio/modules/knowledge/models.py
@@ -24,7 +24,7 @@
from invenio.base.globals import cfg
from invenio.ext.sqlalchemy import db
from invenio.ext.sqlalchemy.utils import session_manager
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
from invenio.utils.text import slugify
from sqlalchemy.event import listens_for
diff --git a/invenio/modules/ranker/models.py b/invenio/modules/ranker/models.py
index c02490bb6c..e366ba0261 100644
--- a/invenio/modules/ranker/models.py
+++ b/invenio/modules/ranker/models.py
@@ -29,7 +29,7 @@
from invenio.modules.accounts.models import User
from invenio.modules.editor.models import Bibdoc
from invenio.modules.records.models import Record as Bibrec
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
class RnkMETHOD(db.Model):
diff --git a/invenio/modules/records/access.py b/invenio/modules/records/access.py
index 7e30157521..83d23044bd 100644
--- a/invenio/modules/records/access.py
+++ b/invenio/modules/records/access.py
@@ -21,7 +21,7 @@
from invenio.base.globals import cfg
-from invenio.modules.search.cache import (
+from invenio.modules.collections.cache import (
collection_reclist_cache,
get_collection_reclist,
restricted_collection_cache,
@@ -115,7 +115,7 @@ def check_user_can_view_record(user_info, recid):
"""
from invenio.modules.access.engine import acc_authorize_action
from invenio.modules.access.local_config import VIEWRESTRCOLL
- from invenio.modules.search.cache import is_record_in_any_collection
+ from invenio.modules.collections.cache import is_record_in_any_collection
from invenio.legacy.search_engine import record_public_p, record_exists
policy = cfg['CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY'].strip().upper()
diff --git a/invenio/modules/records/models.py b/invenio/modules/records/models.py
index 5748f27064..965e0cfaf2 100644
--- a/invenio/modules/records/models.py
+++ b/invenio/modules/records/models.py
@@ -103,13 +103,13 @@ def merged_recid_final(self):
@cached_property
def is_restricted(self):
"""Return True is record is restricted."""
- from invenio.modules.search.cache import get_all_restricted_recids
+ from invenio.modules.collections.cache import get_all_restricted_recids
return self.id in get_all_restricted_recids() or self.is_processed
@cached_property
def is_processed(self):
"""Return True is recods is processed (not in any collection)."""
- from invenio.modules.search.cache import is_record_in_any_collection
+ from invenio.modules.collections.cache import is_record_in_any_collection
return not is_record_in_any_collection(self.id,
recreate_cache_if_needed=False)
diff --git a/invenio/modules/records/views.py b/invenio/modules/records/views.py
index 8e9c9ad037..c693cba477 100644
--- a/invenio/modules/records/views.py
+++ b/invenio/modules/records/views.py
@@ -35,7 +35,7 @@
from invenio.config import CFG_SITE_RECORD
from invenio.ext.template.context_processor import \
register_template_context_processor
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
from invenio.modules.search.signals import record_viewed
from invenio.utils import apache
from .api import get_record
diff --git a/invenio/modules/search/admin.py b/invenio/modules/search/admin.py
index f4a11f44c4..5fcba802d5 100644
--- a/invenio/modules/search/admin.py
+++ b/invenio/modules/search/admin.py
@@ -21,7 +21,7 @@
from invenio.ext.admin.views import ModelView
from invenio.ext.sqlalchemy import db
-from invenio.modules.search.models import Collection, FacetCollection
+from invenio.modules.collections.models import Collection, FacetCollection
from invenio.modules.search.registry import facets
from wtforms.fields import IntegerField, SelectField
diff --git a/invenio/modules/search/cache.py b/invenio/modules/search/cache.py
index af099f0dad..c7eaf0567b 100644
--- a/invenio/modules/search/cache.py
+++ b/invenio/modules/search/cache.py
@@ -20,14 +20,14 @@
"""Implementation of search results caching."""
from intbitset import intbitset
+from flask import current_app
from invenio.base.globals import cfg
from invenio.ext.cache import cache
from invenio.legacy.miscutil.data_cacher import DataCacher, DataCacherProxy
from invenio.utils.hash import md5
-from invenio.utils.memoise import memoize
-from .models import Collection, Collectionname, Field, Fieldname
+from .models import Field, Fieldname
search_results_cache = cache
@@ -57,14 +57,20 @@ def get_search_results_cache_key_from_qid(qid=None):
def get_collection_name_from_cache(qid):
"""Return collection name from query identifier."""
- return search_results_cache.get(
- get_search_results_cache_key_from_qid(qid) + '::cc')
+ try:
+ return search_results_cache.get(
+ get_search_results_cache_key_from_qid(qid) + '::cc')
+ except Exception:
+ current_app.logger.exception('Invalid collection name cache.')
def get_pattern_from_cache(qid):
"""Return pattern from query identifier."""
- return search_results_cache.get(
- get_search_results_cache_key_from_qid(qid) + '::p')
+ try:
+ return search_results_cache.get(
+ get_search_results_cache_key_from_qid(qid) + '::p')
+ except Exception:
+ current_app.logger.exception('Invalid search pattern cache.')
def set_results_cache(results, query, collection_name=None, timeout=None):
@@ -85,204 +91,12 @@ def get_results_cache(query, collection_name=None):
"""Get results from cache."""
collection_name = collection_name or cfg['CFG_SITE_NAME']
qid = get_search_results_cache_key(p=query, cc=collection_name)
- results = search_results_cache.get(qid)
- if results is not None:
- return intbitset().fastload(results)
-
-
-class CollectionAllChildrenDataCacher(DataCacher):
-
- """Cache for all children of a collection."""
-
- def __init__(self):
- """Initilize cache."""
- def cache_filler():
- collections = Collection.query.all()
- collection_index = dict([(c.id, c.name) for c in collections])
-
- return dict([
- (c.name, map(collection_index.get, c.descendants_ids))
- for c in collections
- ])
-
- def timestamp_verifier():
- from invenio.legacy.dbquery import get_table_update_time
- return max(get_table_update_time('collection'),
- get_table_update_time('collection_collection'))
-
- DataCacher.__init__(self, cache_filler, timestamp_verifier)
-
-collection_allchildren_cache = DataCacherProxy(CollectionAllChildrenDataCacher)
-
-
-def get_collection_allchildren(coll, recreate_cache_if_needed=True):
- """Return the list of all children of a collection."""
- if recreate_cache_if_needed:
- collection_allchildren_cache.recreate_cache_if_needed()
- if coll not in collection_allchildren_cache.cache:
- return [] # collection does not exist; return empty list
- return collection_allchildren_cache.cache[coll]
-
-
-class CollectionRecListDataCacher(DataCacher):
-
- """Implement cache for collection reclist hitsets.
-
- This class is not to be used directly; use function
- get_collection_reclist() instead.
- """
-
- def __init__(self):
- def cache_filler():
- collections = Collection.query.all()
- setattr(get_all_recids, 'cache', dict())
- return dict([(c.name, c.reclist) for c in collections])
-
- def timestamp_verifier():
- from invenio.legacy.dbquery import get_table_update_time
- return get_table_update_time('collection')
-
- DataCacher.__init__(self, cache_filler, timestamp_verifier)
-
-
-collection_reclist_cache = DataCacherProxy(CollectionRecListDataCacher)
-
-
-def get_collection_reclist(coll, recreate_cache_if_needed=True):
- """Return hitset of recIDs that belong to the collection 'coll'."""
- if recreate_cache_if_needed:
- collection_reclist_cache.recreate_cache_if_needed()
- if coll not in collection_reclist_cache.cache:
- return intbitset()
- if not collection_reclist_cache.cache[coll]:
- c_coll = Collection.query.filter_by(name=coll).first()
- if c_coll:
- collection_reclist_cache.cache[coll] = c_coll.reclist
- return collection_reclist_cache.cache[coll] or intbitset()
-
-
-class RestrictedCollectionDataCacher(DataCacher):
- def __init__(self):
- def cache_filler():
- from invenio.modules.access.control import acc_get_action_id
- from invenio.modules.access.local_config import VIEWRESTRCOLL
- from invenio.modules.access.models import (
- AccAuthorization, AccARGUMENT
- )
- VIEWRESTRCOLL_ID = acc_get_action_id(VIEWRESTRCOLL)
-
- return [auth[0] for auth in AccAuthorization.query.join(
- AccAuthorization.argument
- ).filter(
- AccARGUMENT.keyword == 'collection',
- AccAuthorization.id_accACTION == VIEWRESTRCOLL_ID
- ).values(AccARGUMENT.value)]
-
- setattr(get_all_restricted_recids, 'cache', dict())
-
- def timestamp_verifier():
- from invenio.legacy.dbquery import get_table_update_time
- return max(get_table_update_time('accROLE_accACTION_accARGUMENT'),
- get_table_update_time('accARGUMENT'))
-
- DataCacher.__init__(self, cache_filler, timestamp_verifier)
-
-
-restricted_collection_cache = DataCacherProxy(RestrictedCollectionDataCacher)
-
-
-def collection_restricted_p(collection, recreate_cache_if_needed=True):
- if recreate_cache_if_needed:
- restricted_collection_cache.recreate_cache_if_needed()
- return collection in restricted_collection_cache.cache
-
-
-@memoize
-def get_all_restricted_recids():
- """Return the set of all the restricted recids.
-
- I.e. the ids of those records which belong to at least one restricted
- collection.
- """
- ret = intbitset()
- for collection in restricted_collection_cache.cache:
- ret |= get_collection_reclist(collection)
- return ret
-
-
-@memoize
-def get_all_recids():
- """Return the set of all recids."""
- ret = intbitset()
- for collection in collection_reclist_cache.cache:
- ret |= get_collection_reclist(collection)
- return ret
-
-
-def is_record_in_any_collection(recID, recreate_cache_if_needed=True):
- """Return True if the record belongs to at least one collection.
-
- This is a good, although not perfect, indicator to guess if webcoll has
- already run after this record has been entered into the system.
- """
- if recreate_cache_if_needed:
- collection_reclist_cache.recreate_cache_if_needed()
- return recID in get_all_recids()
-
-
-class CollectionI18nNameDataCacher(DataCacher):
- """
- Provides cache for I18N collection names. This class is not to be
- used directly; use function get_coll_i18nname() instead.
- """
- def __init__(self):
- def cache_filler():
- res = Collection.query.join(
- Collection.collection_names
- ).filter(Collectionname.type == 'ln').values(
- Collection.name, 'ln', 'value'
- )
- ret = {}
- for c, ln, i18nname in res:
- if i18nname:
- if c not in ret:
- ret[c] = {}
- ret[c][ln] = i18nname
- return ret
-
- def timestamp_verifier():
- from invenio.legacy.dbquery import get_table_update_time
- return get_table_update_time('collectionname')
-
- DataCacher.__init__(self, cache_filler, timestamp_verifier)
-
-collection_i18nname_cache = DataCacherProxy(CollectionI18nNameDataCacher)
-
-
-def get_coll_i18nname(c, ln=None, verify_cache_timestamp=True):
- """Return nicely formatted collection name for given language.
-
- This function uses collection_i18nname_cache, but it verifies
- whether the cache is up-to-date first by default. This
- verification step is performed by checking the DB table update
- time. So, if you call this function 1000 times, it can get very
- slow because it will do 1000 table update time verifications, even
- though collection names change not that often.
-
- Hence the parameter VERIFY_CACHE_TIMESTAMP which, when set to
- False, will assume the cache is already up-to-date. This is
- useful namely in the generation of collection lists for the search
- results page.
- """
- ln = ln or cfg['CFG_SITE_LANG']
- if verify_cache_timestamp:
- collection_i18nname_cache.recreate_cache_if_needed()
- out = c
try:
- out = collection_i18nname_cache.cache[c][ln]
- except KeyError:
- pass # translation in LN does not exist
- return out
+ results = search_results_cache.get(qid)
+ if results is not None:
+ return intbitset().fastload(results)
+ except Exception:
+ current_app.logger.exception('Invalid search results cache.')
class FieldI18nNameDataCacher(DataCacher):
diff --git a/invenio/modules/search/engine.py b/invenio/modules/search/engine.py
index e2a1bf2cec..d71ff37450 100644
--- a/invenio/modules/search/engine.py
+++ b/invenio/modules/search/engine.py
@@ -109,7 +109,7 @@ def search_unit(p, f=None, m='a', wl=0, ignore_synonyms=None):
if f in units:
hitset = units[f](p, f, m, wl)
elif m == 'a' or m == 'r' or f == 'subject' or (
- len(f) >= 2 and str(f[0]).isdigit() and str(f[1]).isdigit()):
+ f and len(f) >= 2 and str(f[0]).isdigit() and str(f[1]).isdigit()):
# we are doing either phrase search or regexp search
index_id = IdxINDEX.get_index_id_from_field(f)
if index_id != 0:
diff --git a/invenio/modules/search/facet_builders.py b/invenio/modules/search/facet_builders.py
index 132b9419c6..0b26368f36 100644
--- a/invenio/modules/search/facet_builders.py
+++ b/invenio/modules/search/facet_builders.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
-
+##
## This file is part of Invenio.
-## Copyright (C) 2012, 2013, 2014 CERN.
+## Copyright (C) 2012, 2013, 2014, 2015 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
@@ -19,26 +19,28 @@
"""Facet utility functions."""
-from operator import itemgetter
-from itertools import groupby
-from six import iteritems
from flask import g, url_for, request
from flask.ext.login import current_user
+from intbitset import intbitset
+from itertools import groupby
+from operator import itemgetter
+from six import iteritems
+
+from invenio.base.globals import cfg
+from invenio.modules.collections.cache import get_collection_reclist
+from invenio.modules.collections.models import Collection
from .cache import (
get_search_results_cache_key_from_qid,
search_results_cache,
)
from .engine import search_unit
-from .models import Collection, Field
+from .models import Field
from .utils import (
get_most_popular_field_values,
get_records_that_can_be_displayed,
)
-from invenio.base.globals import cfg
-from intbitset import intbitset
-
def get_current_user_records_that_can_be_displayed(qid):
"""Return records that current user can display.
@@ -187,7 +189,9 @@ def get_facets_for_query(self, qid, limit=20, parent=None):
collection = Collection.query.get(1)
facet = []
for c in collection.collection_children_r:
- num_records = len(c.reclist.intersection(recIDsHitSet))
+ num_records = len(get_collection_reclist(
+ c.name, recreate_cache_if_needed=False
+ ).intersection(recIDsHitSet))
if num_records:
facet.append((c.name, num_records, c.name_ln))
return sorted(facet, key=lambda x: x[1], reverse=True)[0:limit]
diff --git a/invenio/modules/search/fixtures.py b/invenio/modules/search/fixtures.py
index a67880a976..002c74b4e6 100644
--- a/invenio/modules/search/fixtures.py
+++ b/invenio/modules/search/fixtures.py
@@ -315,8 +315,8 @@ class Tag_10:
class Tag_11:
id = 11
- value = u'980__%'
- recjson_value = u'collection'
+ value = u''
+ recjson_value = u'_collections'
name = u'collection identifier'
class Tag_12:
@@ -946,7 +946,7 @@ class Tag_115:
class Tag_116:
id = 116
value = u'909C0e'
- recjson_value = u'experiment'
+ recjson_value = u'accelerator_experiment.experiment'
name = u'experiment'
class Tag_117:
diff --git a/invenio/modules/search/forms.py b/invenio/modules/search/forms.py
index 560f935744..4a7884337a 100644
--- a/invenio/modules/search/forms.py
+++ b/invenio/modules/search/forms.py
@@ -79,7 +79,7 @@ class GetCollections(object):
def __iter__(self):
"""Get all the collections."""
- from invenio.modules.search.models import Collection
+ from invenio.modules.collections.models import Collection
collections = Collection.query.all()
for coll in collections:
diff --git a/invenio/modules/search/models.py b/invenio/modules/search/models.py
index 6db988d92b..3fcd6544d8 100644
--- a/invenio/modules/search/models.py
+++ b/invenio/modules/search/models.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
-#
+##
## This file is part of Invenio.
-## Copyright (C) 2011, 2012, 2013, 2014 CERN.
+## Copyright (C) 2011, 2012, 2013, 2014, 2015 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
@@ -21,698 +21,16 @@
# General imports.
import datetime
-import re
-from flask import g, url_for, request
+from flask import g, request
from flask_login import current_user
-from intbitset import intbitset
-from operator import itemgetter
-from sqlalchemy.ext.associationproxy import association_proxy
-from sqlalchemy.ext.orderinglist import ordering_list
-from sqlalchemy.orm.collections import InstrumentedList
-from sqlalchemy.orm.collections import attribute_mapped_collection
-from sqlalchemy.orm.collections import collection
-from werkzeug.utils import cached_property
from invenio.base.globals import cfg
-from invenio.base.i18n import _, gettext_set_language
from invenio.ext.sqlalchemy import db
# Create your models here.
from invenio.modules.accounts.models import User
-from invenio.modules.formatter.models import Format
-
-
-class IntbitsetPickle(object):
-
- def dumps(self, obj, protocol=None):
- if obj is not None:
- return obj.fastdump()
- return intbitset([]).fastdump()
-
- def loads(self, obj):
- try:
- return intbitset(obj)
- except:
- return intbitset()
-
-
-def IntbitsetCmp(x, y):
- if x is None or y is None:
- return False
- else:
- return x == y
-
-
-class OrderedList(InstrumentedList):
-
- def append(self, item):
- if self:
- s = sorted(self, key=lambda obj: obj.score)
- item.score = s[-1].score + 1
- else:
- item.score = 1
- InstrumentedList.append(self, item)
-
- def set(self, item, index=0):
- if self:
- s = sorted(self, key=lambda obj: obj.score)
- if index >= len(s):
- item.score = s[-1].score + 1
- elif index < 0:
- item.score = s[0].score
- index = 0
- else:
- item.score = s[index].score + 1
-
- for i, it in enumerate(s[index:]):
- it.score = item.score + i + 1
- # if s[i+1].score more then break
- else:
- item.score = index
- InstrumentedList.append(self, item)
-
- def pop(self, item):
- # FIXME
- if self:
- obj_list = sorted(self, key=lambda obj: obj.score)
- for i, it in enumerate(obj_list):
- if obj_list[i] == item:
- return InstrumentedList.pop(self, i)
-
-
-def attribute_multi_dict_collection(creator, key_attr, val_attr):
- class MultiMappedCollection(dict):
-
- def __init__(self, data=None):
- self._data = data or {}
-
- @collection.appender
- def _append(self, obj):
- l = self._data.setdefault(key_attr(obj), [])
- l.append(obj)
-
- def __setitem__(self, key, value):
- self._append(creator(key, value))
-
- def __getitem__(self, key):
- return tuple(val_attr(obj) for obj in self._data[key])
-
- @collection.remover
- def _remove(self, obj):
- self._data[key_attr(obj)].remove(obj)
-
- @collection.iterator
- def _iterator(self):
- for objs in self._data.itervalues():
- for obj in objs:
- yield obj
-
- def __repr__(self):
- return '%s(%r)' % (type(self).__name__, self._data)
-
- return MultiMappedCollection
-
-external_collection_mapper = attribute_multi_dict_collection(
- creator=lambda k, v: CollectionExternalcollection(type=k,
- externalcollection=v),
- key_attr=lambda obj: obj.type,
- val_attr=lambda obj: obj.externalcollection)
-
-
-class Collection(db.Model):
-
- """Represent a Collection record."""
-
- def __repr__(self):
- return 'Collection '.format(self)
-
- def __unicode__(self):
- suffix = ' ({0})'.format(_('default')) if self.id == 1 else ''
- return u"{0.id}. {0.name}{1}".format(self, suffix)
-
- def __str__(self):
- return unicode(self).encode('utf-8')
-
- __tablename__ = 'collection'
- id = db.Column(db.MediumInteger(9, unsigned=True),
- primary_key=True)
- name = db.Column(db.String(255), unique=True, index=True,
- nullable=False)
- dbquery = db.Column(db.Text(20), nullable=True,
- index=True)
- nbrecs = db.Column(db.Integer(10, unsigned=True),
- server_default='0')
- # FIXME read only!!!
- reclist = db.Column(db.PickleType(pickler=IntbitsetPickle(),
- comparator=IntbitsetCmp))
-
- @property
- def is_hosted(self):
- """Return True if collection is hosted elsewhere."""
- return self.dbquery.startswith('hostedcollection:') if self.dbquery \
- else False
-
- _names = db.relationship(lambda: Collectionname,
- backref='collection',
- collection_class=attribute_mapped_collection(
- 'ln_type'),
- cascade="all, delete, delete-orphan")
-
- names = association_proxy(
- '_names', 'value',
- creator=lambda k, v: Collectionname(ln_type=k, value=v)
- )
- _boxes = db.relationship(lambda: Collectionboxname,
- backref='collection',
- collection_class=attribute_mapped_collection(
- 'ln_type'),
- cascade="all, delete, delete-orphan")
-
- boxes = association_proxy(
- '_boxes', 'value',
- creator=lambda k, v: Collectionboxname(ln_type=k, value=v)
- )
-
- _formatoptions = association_proxy('formats', 'format')
-
- # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
- def formatoptions(self):
- if len(self._formatoptions):
- return [dict(f) for f in self._formatoptions]
- else:
- return [{'code': u'hb',
- 'name': _("HTML %(format)s", format=_("brief")),
- 'content_type': u'text/html'}]
-
- formatoptions = property(formatoptions)
-
- _examples_example = association_proxy('_examples', 'example')
-
- @property
- # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
- def examples(self):
- return list(self._examples_example)
-
- @property
- def name_ln(self):
- from invenio.legacy.search_engine import get_coll_i18nname
- return get_coll_i18nname(self.name,
- getattr(g, 'ln', cfg['CFG_SITE_LANG']))
- # Another possible implementation with cache memoize
- # @cache.memoize
- # try:
- # return db.object_session(self).query(Collectionname).\
- # with_parent(self).filter(db.and_(Collectionname.ln==g.ln,
- # Collectionname.type=='ln')).first().value
- # except:
- # return self.name
-
- @property
- # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
- def portalboxes_ln(self):
- return db.object_session(self).query(CollectionPortalbox).\
- with_parent(self).\
- options(db.joinedload_all(CollectionPortalbox.portalbox)).\
- filter(CollectionPortalbox.ln == g.ln).\
- order_by(db.desc(CollectionPortalbox.score)).all()
-
- @property
- def most_specific_dad(self):
- return db.object_session(self).query(Collection).\
- join(Collection.sons).\
- filter(CollectionCollection.id_son == self.id).\
- order_by(db.asc(Collection.nbrecs)).\
- first()
-
- @property
- # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
- def is_restricted(self):
- from invenio.legacy.search_engine import collection_restricted_p
- return collection_restricted_p(self.name)
-
- @property
- def type(self):
- p = re.compile("\d+:.*")
- if self.dbquery is not None and \
- p.match(self.dbquery.lower()):
- return 'r'
- else:
- return 'v'
-
- _collection_children = db.relationship(
- lambda: CollectionCollection,
- collection_class=ordering_list('score'),
- primaryjoin=lambda: Collection.id == CollectionCollection.id_dad,
- foreign_keys=lambda: CollectionCollection.id_dad,
- order_by=lambda: db.asc(CollectionCollection.score)
- )
- _collection_children_r = db.relationship(
- lambda: CollectionCollection,
- collection_class=ordering_list('score'),
- primaryjoin=lambda: db.and_(
- Collection.id == CollectionCollection.id_dad,
- CollectionCollection.type == 'r'),
- foreign_keys=lambda: CollectionCollection.id_dad,
- order_by=lambda: db.asc(CollectionCollection.score)
- )
- _collection_children_v = db.relationship(
- lambda: CollectionCollection,
- collection_class=ordering_list('score'),
- primaryjoin=lambda: db.and_(
- Collection.id == CollectionCollection.id_dad,
- CollectionCollection.type == 'v'),
- foreign_keys=lambda: CollectionCollection.id_dad,
- order_by=lambda: db.asc(CollectionCollection.score)
- )
- collection_parents = db.relationship(
- lambda: CollectionCollection,
- collection_class=ordering_list('score'),
- primaryjoin=lambda: Collection.id == CollectionCollection.id_son,
- foreign_keys=lambda: CollectionCollection.id_son,
- order_by=lambda: db.asc(CollectionCollection.score)
- )
- collection_children = association_proxy('_collection_children', 'son')
- collection_children_r = association_proxy(
- '_collection_children_r', 'son',
- creator=lambda son: CollectionCollection(id_son=son.id, type='r')
- )
- collection_children_v = association_proxy(
- '_collection_children_v', 'son',
- creator=lambda son: CollectionCollection(id_son=son.id, type='v')
- )
-
- _externalcollections = db.relationship(
- lambda: CollectionExternalcollection,
- cascade="all, delete, delete-orphan"
- )
-
- def _externalcollections_type(type):
- return association_proxy(
- '_externalcollections_' + str(type),
- 'externalcollection',
- creator=lambda ext: CollectionExternalcollection(
- externalcollection=ext, type=type))
-
- externalcollections_0 = _externalcollections_type(0)
- externalcollections_1 = _externalcollections_type(1)
- externalcollections_2 = _externalcollections_type(2)
-
- externalcollections = db.relationship(
- lambda: CollectionExternalcollection,
- collection_class=external_collection_mapper,
- cascade="all, delete, delete-orphan"
- )
-
- # Search options
- _make_field_fieldvalue = lambda type: db.relationship(
- lambda: CollectionFieldFieldvalue,
- primaryjoin=lambda: db.and_(
- Collection.id == CollectionFieldFieldvalue.id_collection,
- CollectionFieldFieldvalue.type == type),
- order_by=lambda: CollectionFieldFieldvalue.score)
-
- _search_within = _make_field_fieldvalue('sew')
- _search_options = _make_field_fieldvalue('seo')
-
- @property
- # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
- def search_within(self):
- """
- Collect search within options.
- """
- default = [('', _('any field'))]
- found = [(o.field.code, o.field.name_ln) for o in self._search_within]
- if not found:
- found = [(f.name.replace(' ', ''), f.name_ln)
- for f in Field.query.filter(Field.name.in_(
- cfg['CFG_WEBSEARCH_SEARCH_WITHIN'])).all()]
- return default + sorted(found, key=itemgetter(1))
-
- @property
- # @cache.memoize(make_name=lambda fname: fname + '::' + g.ln)
- def search_options(self):
- return self._search_options
-
- @cached_property
- def ancestors_ids(self):
- """Get list of parent collection ids."""
- output = intbitset([self.id])
- for c in self.dads:
- ancestors = c.dad.ancestors_ids
- if self.id in ancestors:
- raise
- output |= ancestors
- return output
-
- @cached_property
- def descendants_ids(self):
- """Get list of child collection ids."""
- output = intbitset([self.id])
- for c in self.sons:
- descendants = c.son.descendants_ids
- if self.id in descendants:
- raise
- output |= descendants
- return output
-
- # Gets the list of localized names as an array
- collection_names = db.relationship(
- lambda: Collectionname,
- primaryjoin=lambda: Collection.id == Collectionname.id_collection,
- foreign_keys=lambda: Collectionname.id_collection
- )
-
- def translation(self, lang):
- """Get the translation according to the language code."""
- try:
- return db.object_session(self).query(Collectionname).\
- with_parent(self).filter(db.and_(
- Collectionname.ln == lang,
- Collectionname.type == 'ln'
- )).first().value
- except:
- return ""
-
- @property
- def sort_methods(self):
- """Get sort methods for collection.
-
- If not sort methods are defined for a collection the root collections
- sort methods are retuned. If not methods are defined for the root
- collection, all possible sort methods are returned.
-
- Note: Noth sorting methods and ranking methods are now defined via
- the sorter.
- """
- from invenio.modules.sorter.models import BsrMETHOD, \
- Collection_bsrMETHOD
-
- get_method = lambda obj: obj.bsrMETHOD
-
- for coll_id in (self.id, 1):
- methods = Collection_bsrMETHOD.query.filter_by(
- id_collection=coll_id
- ).order_by(
- Collection_bsrMETHOD.score
- ).options(
- db.joinedload(Collection_bsrMETHOD.bsrMETHOD)
- ).all()
-
- if len(methods) > 0:
- return map(get_method, methods)
-
- return BsrMETHOD.query.order_by(BsrMETHOD.name).all()
-
- def get_collectionbox_name(self, ln=None, box_type="r"):
- """Return collection-specific labelling subtrees.
-
- - 'Focus on': regular collection
- - 'Narrow by': virtual collection
- - 'Latest addition': boxes
-
- If translation for given language does not exist, use label
- for CFG_SITE_LANG. If no custom label is defined for
- CFG_SITE_LANG, return default label for the box.
-
- :param ln: the language of the label
- :param box_type: can be 'r' (=Narrow by), 'v' (=Focus on),
- 'l' (=Latest additions)
- """
- if ln is None:
- ln = g.ln
- collectionboxnamequery = db.object_session(self).query(
- Collectionboxname).with_parent(self)
- try:
- collectionboxname = collectionboxnamequery.filter(db.and_(
- Collectionboxname.ln == ln,
- Collectionboxname.type == box_type,
- )).one()
- except:
- try:
- collectionboxname = collectionboxnamequery.filter(db.and_(
- Collectionboxname.ln == ln,
- Collectionboxname.type == box_type,
- )).one()
- except:
- collectionboxname = None
-
- if collectionboxname is None:
- # load the right message language
- _ = gettext_set_language(ln)
- return _(Collectionboxname.TYPES.get(box_type, ''))
- else:
- return collectionboxname.value
-
- portal_boxes_ln = db.relationship(
- lambda: CollectionPortalbox,
- collection_class=ordering_list('score'),
- primaryjoin=lambda:
- Collection.id == CollectionPortalbox.id_collection,
- foreign_keys=lambda: CollectionPortalbox.id_collection,
- order_by=lambda: db.asc(CollectionPortalbox.score))
-
- def breadcrumbs(self, builder=None, ln=None):
- """Return breadcrumbs for collection."""
- ln = cfg.get('CFG_SITE_LANG') if ln is None else ln
- breadcrumbs = []
- # Get breadcrumbs for most specific dad if it exists.
- if self.most_specific_dad is not None:
- breadcrumbs = self.most_specific_dad.breadcrumbs(builder=builder,
- ln=ln)
-
- if builder is not None:
- crumb = builder(self)
- else:
- crumb = dict(
- text=self.name_ln,
- url=url_for('search.collection', name=self.name))
-
- breadcrumbs.append(crumb)
- return breadcrumbs
-
-
-class Collectionname(db.Model):
-
- """Represent a Collectionname record."""
- __tablename__ = 'collectionname'
- id_collection = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Collection.id),
- nullable=False, primary_key=True)
- ln = db.Column(db.Char(5), nullable=False, primary_key=True,
- server_default='')
- type = db.Column(db.Char(3), nullable=False, primary_key=True,
- server_default='sn')
- value = db.Column(db.String(255), nullable=False)
-
- @db.hybrid_property
- def ln_type(self):
- return (self.ln, self.type)
-
- @ln_type.setter
- def set_ln_type(self, value):
- (self.ln, self.type) = value
-
-
-class Collectionboxname(db.Model):
-
- """Represent a Collectionboxname record."""
-
- __tablename__ = 'collectionboxname'
-
- TYPES = {
- 'v': 'Focus on:',
- 'r': 'Narrow by collection:',
- 'l': 'Latest additions:',
- }
-
- id_collection = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Collection.id),
- nullable=False, primary_key=True)
- ln = db.Column(db.Char(5), nullable=False, primary_key=True,
- server_default='')
- type = db.Column(db.Char(3), nullable=False, primary_key=True,
- server_default='r')
- value = db.Column(db.String(255), nullable=False)
-
- @db.hybrid_property
- def ln_type(self):
- return (self.ln, self.type)
-
- @ln_type.setter
- def set_ln_type(self, value):
- (self.ln, self.type) = value
-
-
-class Collectiondetailedrecordpagetabs(db.Model):
-
- """Represent a Collectiondetailedrecordpagetabs record."""
- __tablename__ = 'collectiondetailedrecordpagetabs'
- id_collection = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Collection.id),
- nullable=False, primary_key=True)
- tabs = db.Column(db.String(255), nullable=False,
- server_default='')
- collection = db.relationship(Collection,
- backref='collectiondetailedrecordpagetabs')
-
-
-class CollectionCollection(db.Model):
-
- """Represent a CollectionCollection record."""
- __tablename__ = 'collection_collection'
- id_dad = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Collection.id), primary_key=True)
- id_son = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Collection.id), primary_key=True)
- type = db.Column(db.Char(1), nullable=False,
- server_default='r')
- score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False,
- server_default='0')
- son = db.relationship(Collection, primaryjoin=id_son == Collection.id,
- backref='dads',
- # FIX
- # collection_class=db.attribute_mapped_collection('score'),
- order_by=db.asc(score))
- dad = db.relationship(Collection, primaryjoin=id_dad == Collection.id,
- backref='sons', order_by=db.asc(score))
-
-
-class Example(db.Model):
-
- """Represent a Example record."""
- __tablename__ = 'example'
- id = db.Column(db.MediumInteger(9, unsigned=True), primary_key=True,
- autoincrement=True)
- type = db.Column(db.Text, nullable=False)
- body = db.Column(db.Text, nullable=False)
-
-
-class CollectionExample(db.Model):
-
- """Represent a CollectionExample record."""
- __tablename__ = 'collection_example'
- id_collection = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Collection.id), primary_key=True)
- id_example = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Example.id), primary_key=True)
- score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False,
- server_default='0')
- collection = db.relationship(Collection, backref='_examples',
- order_by=score)
- example = db.relationship(Example, backref='collections', order_by=score)
-
-
-class Portalbox(db.Model):
-
- """Represent a Portalbox record."""
- __tablename__ = 'portalbox'
- id = db.Column(db.MediumInteger(9, unsigned=True), autoincrement=True,
- primary_key=True)
- title = db.Column(db.Text, nullable=False)
- body = db.Column(db.Text, nullable=False)
-
-
-def get_pbx_pos():
- """Returns a list of all the positions for a portalbox"""
-
- position = {}
- position["rt"] = "Right Top"
- position["lt"] = "Left Top"
- position["te"] = "Title Epilog"
- position["tp"] = "Title Prolog"
- position["ne"] = "Narrow by coll epilog"
- position["np"] = "Narrow by coll prolog"
- return position
-
-
-class CollectionPortalbox(db.Model):
-
- """Represent a CollectionPortalbox record."""
- __tablename__ = 'collection_portalbox'
- id_collection = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Collection.id), primary_key=True)
- id_portalbox = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Portalbox.id), primary_key=True)
- ln = db.Column(db.Char(5), primary_key=True, server_default='',
- nullable=False)
- position = db.Column(db.Char(3), nullable=False,
- server_default='top')
- score = db.Column(db.TinyInteger(4, unsigned=True),
- nullable=False,
- server_default='0')
- collection = db.relationship(Collection, backref='portalboxes',
- order_by=score)
- portalbox = db.relationship(Portalbox, backref='collections',
- order_by=score)
-
-
-class Externalcollection(db.Model):
-
- """Represent a Externalcollection record."""
- __tablename__ = 'externalcollection'
- id = db.Column(db.MediumInteger(9, unsigned=True),
- primary_key=True)
- name = db.Column(db.String(255), unique=True, nullable=False,
- server_default='')
-
- @property
- def engine(self):
- from invenio.legacy.websearch_external_collections.searcher import (
- external_collections_dictionary
- )
- if self.name in external_collections_dictionary:
- return external_collections_dictionary[self.name]
-
-
-class CollectionExternalcollection(db.Model):
-
- """Represent a CollectionExternalcollection record."""
- __tablename__ = 'collection_externalcollection'
- id_collection = db.Column(db.MediumInteger(9,
- unsigned=True),
- db.ForeignKey(Collection.id), primary_key=True,
- server_default='0')
- id_externalcollection = db.Column(db.MediumInteger(9,
- unsigned=True),
- db.ForeignKey(Externalcollection.id),
- primary_key=True,
- server_default='0')
- type = db.Column(db.TinyInteger(4, unsigned=True),
- server_default='0',
- nullable=False)
-
- def _collection_type(type_):
- return db.relationship(
- Collection,
- primaryjoin=lambda: db.and_(
- CollectionExternalcollection.id_collection == Collection.id,
- CollectionExternalcollection.type == type_),
- backref='_externalcollections_{0}'.format(str(type_))
- )
- collection_0 = _collection_type(0)
- collection_1 = _collection_type(1)
- collection_2 = _collection_type(2)
-
- externalcollection = db.relationship(Externalcollection)
-
-
-class CollectionFormat(db.Model):
-
- """Represent a CollectionFormat record."""
- __tablename__ = 'collection_format'
- id_collection = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Collection.id), primary_key=True)
- id_format = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Format.id), primary_key=True)
- score = db.Column(db.TinyInteger(4, unsigned=True),
- nullable=False, server_default='0')
- collection = db.relationship(Collection, backref='formats',
- order_by=db.desc(score))
- format = db.relationship(Format, backref='collections',
- order_by=db.desc(score))
class Field(db.Model):
@@ -775,6 +93,7 @@ def __init__(self):
class Fieldname(db.Model):
"""Represent a Fieldname record."""
+
__tablename__ = 'fieldname'
id_field = db.Column(db.MediumInteger(9, unsigned=True),
db.ForeignKey(Field.id), primary_key=True)
@@ -787,6 +106,7 @@ class Fieldname(db.Model):
class Tag(db.Model):
"""Represent a Tag record."""
+
__tablename__ = 'tag'
id = db.Column(db.MediumInteger(9, unsigned=True), primary_key=True)
name = db.Column(db.String(255), nullable=False)
@@ -812,6 +132,7 @@ def as_tag(self):
class FieldTag(db.Model):
"""Represent a FieldTag record."""
+
__tablename__ = 'field_tag'
id_field = db.Column(db.MediumInteger(9, unsigned=True),
db.ForeignKey('field.id'), nullable=False,
@@ -840,6 +161,7 @@ def as_tag(self):
class WebQuery(db.Model):
"""Represent a WebQuery record."""
+
__tablename__ = 'query'
id = db.Column(db.Integer(15, unsigned=True), primary_key=True,
autoincrement=True)
@@ -879,94 +201,12 @@ def log(cls, urlargs=None, id_user=None):
db.session.commit()
-class CollectionFieldFieldvalue(db.Model):
-
- """Represent a CollectionFieldFieldvalue record."""
-
- __tablename__ = 'collection_field_fieldvalue'
- id_collection = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Collection.id),
- primary_key=True, nullable=False)
- id_field = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Field.id), primary_key=True,
- nullable=False)
- id_fieldvalue = db.Column(db.MediumInteger(9, unsigned=True),
- db.ForeignKey(Fieldvalue.id), primary_key=True,
- nullable=True)
- type = db.Column(db.Char(3), nullable=False,
- server_default='src')
- score = db.Column(db.TinyInteger(4, unsigned=True), nullable=False,
- server_default='0')
- score_fieldvalue = db.Column(db.TinyInteger(4, unsigned=True),
- nullable=False, server_default='0')
-
- collection = db.relationship(Collection, backref='field_fieldvalues',
- order_by=score)
- field = db.relationship(Field, backref='collection_fieldvalues',
- lazy='joined')
- fieldvalue = db.relationship(Fieldvalue, backref='collection_fields',
- lazy='joined')
-
-
-class FacetCollection(db.Model):
-
- """Facet configuration for collection."""
-
- __tablename__ = 'facet_collection'
-
- id = db.Column(db.Integer, primary_key=True)
- id_collection = db.Column(db.Integer, db.ForeignKey(Collection.id))
- order = db.Column(db.Integer)
- facet_name = db.Column(db.String(80))
-
- collection = db.relationship(Collection, backref='facets')
-
- def __repr__(self):
- return ('FacetCollection '.format(self))
-
- @classmethod
- def is_place_taken(cls, id_collection, order):
- """Check if there is already a facet on the given position.
-
- .. note:: This works well as a pre-check, however saving can still fail
- if somebody else creates the same record in other session
- (phantom reads).
- """
- return bool(cls.query.filter(
- cls.id_collection == id_collection,
- cls.order == order).count())
-
- @classmethod
- def is_duplicated(cls, id_collection, facet_name):
- """Check if the given facet is already assigned to this collection.
-
- .. note:: This works well as a pre-check, however saving can still fail
- if somebody else creates the same record in other session
- (phantom reads).
- """
- return bool(cls.query.filter(
- cls.id_collection == id_collection,
- cls.facet_name == facet_name).count())
-
-__all__ = ('Collection',
- 'Collectionname',
- 'Collectiondetailedrecordpagetabs',
- 'CollectionCollection',
- 'Example',
- 'CollectionExample',
- 'Portalbox',
- 'CollectionPortalbox',
- 'Externalcollection',
- 'CollectionExternalcollection',
- 'CollectionFormat',
- 'Field',
- 'Fieldvalue',
- 'Fieldname',
- 'Tag',
- 'FieldTag',
- 'WebQuery',
- 'UserQuery',
- 'CollectionFieldFieldvalue',
- 'FacetCollection')
+__all__ = (
+ 'Field',
+ 'Fieldvalue',
+ 'Fieldname',
+ 'Tag',
+ 'FieldTag',
+ 'WebQuery',
+ 'UserQuery',
+)
diff --git a/invenio/modules/search/registry.py b/invenio/modules/search/registry.py
index af9ac2ff3b..680689bbac 100644
--- a/invenio/modules/search/registry.py
+++ b/invenio/modules/search/registry.py
@@ -25,7 +25,7 @@
from invenio.ext.registry import DictModuleAutoDiscoverySubRegistry, \
ModuleAutoDiscoverySubRegistry
-from invenio.modules.search.models import FacetCollection
+from invenio.modules.collections.models import FacetCollection
from invenio.utils.memoise import memoize
searchext = RegistryProxy('searchext', ModuleAutoDiscoveryRegistry,
diff --git a/invenio/modules/search/templates/search/form/controls_base.html b/invenio/modules/search/templates/search/form/controls_base.html
index 4eb14f058e..ec2296b426 100644
--- a/invenio/modules/search/templates/search/form/controls_base.html
+++ b/invenio/modules/search/templates/search/form/controls_base.html
@@ -25,7 +25,7 @@
type="text"
tabindex="1"
value="{{ request.args.get('p', '') }}"
- {%- if request.endpoint == 'search.index' %}
+ {%- if request.endpoint == 'collections.index' %}
autofocus
{%- endif -%}/>
{%- endblock -%}
diff --git a/invenio/modules/search/testsuite/test_views.py b/invenio/modules/search/testsuite/test_views.py
index 313f808608..3719cf4252 100644
--- a/invenio/modules/search/testsuite/test_views.py
+++ b/invenio/modules/search/testsuite/test_views.py
@@ -29,11 +29,11 @@ class SearchViewTest(InvenioTestCase):
""" Test search view functions. """
def test_home_collection_page_availability(self):
- response = self.client.get(url_for('search.index'))
+ response = self.client.get(url_for('collections.index'))
self.assert200(response)
response = self.client.get(url_for(
- 'search.collection', name=current_app.config['CFG_SITE_NAME']))
+ 'collections.collection', name=current_app.config['CFG_SITE_NAME']))
self.assert200(response)
def test_search_page_availability(self):
diff --git a/invenio/modules/search/utils.py b/invenio/modules/search/utils.py
index a4f84cde2f..3728d6a25a 100644
--- a/invenio/modules/search/utils.py
+++ b/invenio/modules/search/utils.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
-## Copyright (C) 2014 CERN.
+## Copyright (C) 2014, 2015 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
@@ -19,21 +19,18 @@
"""Utility functions for search engine."""
+import numpy
+
from six import string_types, iteritems
+from intbitset import intbitset
-from .cache import (
+from invenio.base.globals import cfg
+from invenio.modules.collections.cache import (
get_collection_allchildren,
get_collection_reclist,
restricted_collection_cache,
)
-try:
- # import optional module:
- import numpy
- CFG_NUMPY_IMPORTABLE = True
-except ImportError:
- CFG_NUMPY_IMPORTABLE = False
-
def get_most_popular_field_values(recids, tags, exclude_values=None,
count_repetitive_values=True, split_by=0):
@@ -66,18 +63,6 @@ def get_most_popular_field_values(recids, tags, exclude_values=None,
"""
from invenio.legacy.bibrecord import get_fieldvalues
- def _get_most_popular_field_values_helper_sorter(val1, val2):
- """Compare VAL1 and VAL2.
-
- First, compare by frequencies, then alphabetically.
- """
- compared_via_frequencies = cmp(valuefreqdict[val2],
- valuefreqdict[val1])
- if compared_via_frequencies == 0:
- return cmp(val1.lower(), val2.lower())
- else:
- return compared_via_frequencies
-
valuefreqdict = {}
# sanity check:
if not exclude_values:
@@ -115,42 +100,25 @@ def _get_most_popular_field_values_helper_sorter(val1, val2):
else:
valuefreqdict[val] = 1
# sort by descending frequency of values:
- if not CFG_NUMPY_IMPORTABLE:
- # original version
- out = []
- vals = valuefreqdict.keys()
- vals.sort(_get_most_popular_field_values_helper_sorter)
- for val in vals:
- tmpdisplv = ''
- if val in displaytmp:
- tmpdisplv = displaytmp[val]
- else:
- tmpdisplv = val
- out.append((tmpdisplv, valuefreqdict[val]))
- return out
- else:
- f = [] # frequencies
- n = [] # original names
- ln = [] # lowercased names
- # build lists within one iteration
- for (val, freq) in iteritems(valuefreqdict):
- f.append(-1 * freq)
- if val in displaytmp:
- n.append(displaytmp[val])
- else:
- n.append(val)
- ln.append(val.lower())
- # sort by frequency (desc) and then by lowercased name.
- return [(n[i], -1 * f[i]) for i in numpy.lexsort([ln, f])]
+ f = [] # frequencies
+ n = [] # original names
+ ln = [] # lowercased names
+ # build lists within one iteration
+ for (val, freq) in iteritems(valuefreqdict):
+ f.append(-1 * freq)
+ if val in displaytmp:
+ n.append(displaytmp[val])
+ else:
+ n.append(val)
+ ln.append(val.lower())
+ # sort by frequency (desc) and then by lowercased name.
+ return [(n[i], -1 * f[i]) for i in numpy.lexsort([ln, f])]
def get_records_that_can_be_displayed(permitted_restricted_collections,
hitset_in_any_collection,
current_coll=None, colls=None):
"""Return records that can be displayed."""
- from intbitset import intbitset
- from invenio.base.globals import cfg
-
current_coll = current_coll or cfg['CFG_SITE_NAME']
records_that_can_be_displayed = intbitset()
@@ -201,6 +169,7 @@ def get_records_that_can_be_displayed(permitted_restricted_collections,
records_that_can_be_displayed = intbitset()
for coll in colls_to_be_displayed:
records_that_can_be_displayed |= get_collection_reclist(coll)
+ records_that_can_be_displayed -= notpermitted_recids
return records_that_can_be_displayed
diff --git a/invenio/modules/search/views/__init__.py b/invenio/modules/search/views/__init__.py
index 6a0e01b0de..ba15f03b73 100644
--- a/invenio/modules/search/views/__init__.py
+++ b/invenio/modules/search/views/__init__.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
+##
## This file is part of Invenio.
-## Copyright (C) 2013 CERN.
+## Copyright (C) 2013, 2015 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
@@ -16,7 +17,8 @@
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 331, Boston, MA 02111-1307, USA.
+"""Search view definitions."""
+
from .search import blueprint as search_blueprint
-from .admin import blueprint as admin_blueprint
-blueprints = [search_blueprint, admin_blueprint]
+blueprints = [search_blueprint]
diff --git a/invenio/modules/search/views/search.py b/invenio/modules/search/views/search.py
index 76c7790950..56ca634c42 100644
--- a/invenio/modules/search/views/search.py
+++ b/invenio/modules/search/views/search.py
@@ -40,40 +40,39 @@
"""
+import cStringIO
+import functools
import json
import string
-import functools
-import cStringIO
-from math import ceil
+
from flask import make_response, g, request, flash, jsonify, \
redirect, url_for, current_app, abort, session, Blueprint, \
render_template
+from flask.ext.breadcrumbs import \
+ register_breadcrumb, current_breadcrumbs, default_breadcrumb_root
from flask.ext.login import current_user
+from math import ceil
from six import iteritems
from werkzeug.local import LocalProxy
-from .. import receivers
-from ..cache import get_search_query_id, get_collection_name_from_cache
-from ..facet_builders import get_current_user_records_that_can_be_displayed, \
- faceted_results_filter
-from ..forms import EasySearchForm
-from ..models import Collection, Field
-from ..washers import wash_search_urlargd
-from flask.ext.menu import register_menu
-from invenio.base.signals import websearch_before_browse
-from invenio.modules.indexer import models as BibIndex
-from invenio.modules.formatter import format_record
-from invenio.base.i18n import _
from invenio.base.decorators import wash_arguments, templated
-from flask.ext.breadcrumbs import \
- register_breadcrumb, current_breadcrumbs, default_breadcrumb_root
+from invenio.base.i18n import _
+from invenio.base.signals import websearch_before_browse
+from invenio.modules.indexer.models import IdxINDEX
from invenio.ext.template.context_processor import \
register_template_context_processor
from invenio.utils.pagination import Pagination
-from invenio.utils.text import slugify
from invenio.modules.search.registry import facets
+from invenio.modules.collections.decorators import check_collection
+from .. import receivers
from ..api import SearchEngine
+from ..cache import get_search_query_id, get_collection_name_from_cache
+from ..facet_builders import get_current_user_records_that_can_be_displayed, \
+ faceted_results_filter
+from ..forms import EasySearchForm
+from ..models import Field
+from ..washers import wash_search_urlargd
blueprint = Blueprint('search', __name__, url_prefix="",
@@ -97,14 +96,6 @@ def _collection_of():
"""Collection output format."""
-def collection_name_from_request():
- """TODO."""
- collection = request.values.get('cc')
- if collection is None and len(request.values.getlist('c')) == 1:
- collection = request.values.get('c')
- return collection
-
-
def min_length(length, code=406):
"""TODO."""
def checker(value):
@@ -114,45 +105,6 @@ def checker(value):
return checker
-def check_collection(method=None, name_getter=collection_name_from_request,
- default_collection=False):
- """Check collection existence and authorization for current user."""
- if method is None:
- return functools.partial(check_collection, name_getter=name_getter,
- default_collection=default_collection)
-
- @functools.wraps(method)
- def decorated(*args, **kwargs):
- uid = current_user.get_id()
- name = name_getter()
- if name:
- g.collection = collection = Collection.query.filter(
- Collection.name == name).first_or_404()
- elif default_collection:
- g.collection = collection = Collection.query.get_or_404(1)
- else:
- return abort(404)
-
- if collection.is_restricted:
- from invenio.modules.access.engine import acc_authorize_action
- from invenio.modules.access.local_config import VIEWRESTRCOLL
- (auth_code, auth_msg) = acc_authorize_action(
- uid,
- VIEWRESTRCOLL,
- collection=collection.name
- )
- if auth_code:
- flash(_('This collection is restricted.'), 'error')
- if auth_code and current_user.is_guest:
- return redirect(url_for('webaccount.login',
- referer=request.url))
- elif auth_code:
- return abort(401)
-
- return method(collection, *args, **kwargs)
- return decorated
-
-
def response_formated_records(recids, collection, of, **kwargs):
"""TODO."""
from invenio.modules.formatter import (get_output_format_content_type,
@@ -163,64 +115,6 @@ def response_formated_records(recids, collection, of, **kwargs):
return response
-@blueprint.route('/index.html', methods=['GET', 'POST'])
-@blueprint.route('/index.py', methods=['GET', 'POST'])
-@blueprint.route('/', methods=['GET', 'POST'])
-@templated('search/index.html')
-@register_menu(blueprint, 'main.search', _('Search'), order=1)
-@register_breadcrumb(blueprint, '.', _('Home'))
-def index():
- """Render the homepage."""
- # legacy app support
- c = request.values.get('c')
- if c == current_app.config['CFG_SITE_NAME']:
- return redirect(url_for('.index', ln=g.ln))
- elif c is not None:
- return redirect(url_for('.collection', name=c, ln=g.ln))
-
- collection = Collection.query.get_or_404(1)
-
- @register_template_context_processor
- def index_context():
- return dict(
- of=request.values.get('of', collection.formatoptions[0]['code']),
- easy_search_form=EasySearchForm(csrf_enabled=False),
- format_record=format_record,
- )
- return dict(collection=collection)
-
-
-@blueprint.route('/collection/', methods=['GET', 'POST'])
-@blueprint.route('/collection/', methods=['GET', 'POST'])
-def collection(name=None):
- """Render the collection page.
-
- It renders it either with a collection specific template (aka
- collection_{collection_name}.html) or with the default collection
- template (collection.html)
- """
- if name is None:
- return redirect('.collection',
- name=current_app.config['CFG_SITE_NAME'])
- collection = Collection.query.filter(Collection.name == name) \
- .first_or_404()
-
- @register_template_context_processor
- def index_context():
- breadcrumbs = current_breadcrumbs + collection.breadcrumbs(ln=g.ln)[1:]
- return dict(
- of=request.values.get('of', collection.formatoptions[0]['code']),
- format_record=format_record,
- easy_search_form=EasySearchForm(csrf_enabled=False),
- breadcrumbs=breadcrumbs)
-
- return render_template(['search/collection_{0}.html'.format(collection.id),
- 'search/collection_{0}.html'.format(slugify(name,
- '_')),
- 'search/collection.html'],
- collection=collection)
-
-
class SearchUrlargs(object):
"""TODO."""
@@ -388,7 +282,6 @@ def index_context():
@check_collection(default_collection=True)
def rss(collection, p, jrec, so, rm):
"""Render RSS feed."""
- from invenio.legacy.search_engine import perform_request_search
of = 'xr'
argd = wash_search_urlargd(request.args)
argd['of'] = 'id'
@@ -399,7 +292,8 @@ def rss(collection, p, jrec, so, rm):
rg = int(argd['rg'])
qid = get_search_query_id(**argd)
- recids = perform_request_search(req=request.get_legacy_request(), **argd)
+ searcher = SearchEngine(p)
+ recids = searcher.search(collection=collection.name)
ctx = dict(
records=len(get_current_user_records_that_can_be_displayed(qid)),
@@ -587,8 +481,7 @@ def make_results(collection):
methods=['GET', 'POST'])
@wash_arguments({'q': (min_length(3), '')})
def autocomplete(field, q):
- """
- Autocomplete data from indexes.
+ """Autocomplete data from indexes.
It uses POSTed arguments with name `q` that has to be longer than 3
characters in order to returns any results.
@@ -598,13 +491,10 @@ def autocomplete(field, q):
:return: list of values matching query.
"""
- from invenio.legacy.bibindex.engine import get_index_id_from_index_name
- IdxPHRASE = BibIndex.__getattribute__('IdxPHRASE%02dF' %
- get_index_id_from_index_name(field))
-
- results = IdxPHRASE.query.filter(IdxPHRASE.term.contains(q))\
- .limit(20).all()
- results = map(lambda r: {'value': r.term}, results)
+ IdxPHRASE = IdxINDEX.idxPHRASEF(field, fallback=False)
+ results = IdxPHRASE.query.filter(
+ IdxPHRASE.term.contains(q)).limit(20).values('term')
+ results = map(lambda r: {'value': r[0]}, results)
return jsonify(results=results)
diff --git a/invenio/modules/sorter/models.py b/invenio/modules/sorter/models.py
index b67e34e9fd..0e2edcdec3 100644
--- a/invenio/modules/sorter/models.py
+++ b/invenio/modules/sorter/models.py
@@ -30,7 +30,7 @@
from invenio.utils.serializers import deserialize_via_marshal
# Create your models here.
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
class BsrMETHOD(db.Model):
diff --git a/invenio/modules/tags/views.py b/invenio/modules/tags/views.py
index a71dd10537..fac802fa01 100644
--- a/invenio/modules/tags/views.py
+++ b/invenio/modules/tags/views.py
@@ -36,7 +36,7 @@
# External imports
from invenio.modules.accounts.models import User
from invenio.modules.records.models import Record as Bibrec
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
from invenio.modules.search.views.search import response_formated_records
from flask.ext.menu import register_menu
from flask.ext.breadcrumbs import default_breadcrumb_root, register_breadcrumb
diff --git a/invenio/modules/textminer/testsuite/test_textminer_documents.py b/invenio/modules/textminer/testsuite/test_textminer_documents.py
index 5a0e3610f2..d7377c837d 100644
--- a/invenio/modules/textminer/testsuite/test_textminer_documents.py
+++ b/invenio/modules/textminer/testsuite/test_textminer_documents.py
@@ -31,7 +31,7 @@
def has_request():
try:
return requests.get(
- url_for('search.index', _external=True)).status_code == 200
+ url_for('collections.index', _external=True)).status_code == 200
except:
return False
HAS_REQUESTS = LocalProxy(has_request)
diff --git a/invenio/testsuite/test_ext_template.py b/invenio/testsuite/test_ext_template.py
index 9b75e723e7..9b07373214 100644
--- a/invenio/testsuite/test_ext_template.py
+++ b/invenio/testsuite/test_ext_template.py
@@ -78,13 +78,13 @@ class TemplateArgsTest(InvenioTestCase):
def setup_app(cls, app):
"""Custom setup function."""
from invenio.ext.template.context_processor import template_args
- from invenio.modules.search.views.search import index
+ from invenio.modules.collections.views.collections import index
@template_args(index)
def foo():
return {'foo': 'foo', 'baz': 'baz'}
- @template_args('search.index', app=app)
+ @template_args('collections.index', app=app)
def bar():
return {'bar': 'bar', 'baz': 'BAZ'}
@@ -97,7 +97,7 @@ def config(self):
return cfg
def test_template_args_loading(self):
- self.client.get(url_for('search.index'))
+ self.client.get(url_for('collections.index'))
self.assertEqual(self.get_context_variable('foo'), 'foo')
self.assertEqual(self.get_context_variable('bar'), 'bar')
self.assertEqual(self.get_context_variable('baz'), 'BAZ')
@@ -114,7 +114,7 @@ def foo():
return {'foo': 'foo'}
self.assertRaises(Exception,
- lambda: template_args('search.index')(foo))
+ lambda: template_args('collections.index')(foo))
TEST_SUITE = make_test_suite(TemplateTest, TemplateLoaderCase,
diff --git a/setup.py b/setup.py
index 05552deb27..1116c0dbbd 100644
--- a/setup.py
+++ b/setup.py
@@ -281,7 +281,6 @@ def run(self):
'textmarc2xmlmarc = invenio.legacy.bibrecord.scripts.textmarc2xmlmarc:main',
'webaccessadmin = invenio.modules.access.scripts.webaccessadmin:main',
'webauthorprofile = invenio.legacy.webauthorprofile.scripts.webauthorprofile:main',
- 'webcoll = invenio.legacy.websearch.scripts.webcoll:main',
'webmessageadmin = invenio.legacy.webmessage.scripts.webmessageadmin:main',
'webstatadmin = invenio.legacy.webstat.scripts.webstatadmin:main',
'websubmitadmin = invenio.legacy.websubmit.scripts.websubmitadmin:main',