Skip to content

Commit

Permalink
WosQueries - Web of Science queries
Browse files Browse the repository at this point in the history
  • Loading branch information
dazza-codes committed Oct 3, 2017
1 parent 6377f6e commit 3161344
Show file tree
Hide file tree
Showing 4 changed files with 412 additions and 0 deletions.
279 changes: 279 additions & 0 deletions lib/wos_queries.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@

# Queries on the Web of Science (or Web of Knowledge)
class WosQueries

# this is the maximum number that can be returned in a single query by WoS
MAX_RECORDS = 100

QUERY_LANGUAGE = 'en'.freeze

# limit the start date when searching for publications, format: YYYY-MM-DD
START_DATE = '1970-01-01'.freeze

attr_reader :wos_client

# @param wos_client [WosClient] a Web Of Science client
# @param database [String] a WOS database identifier (default 'WOK')
def initialize(wos_client, database = 'WOK')
@wos_client = wos_client
@database = database
end

# @param uid [String] a WOS UID
# @return [WosRecords]
def cited_references(uid)
cited_references_collator(uid)
end

# @param uid [String] a WOS UID
# @return [WosRecords]
def citing_articles(uid)
citing_articles_collator(uid)
end

# @param uid [String] a WOS UID
# @return [WosRecords]
def related_records(uid)
related_records_collator(uid)
end

# @param uids [Array<String>] a list of WOS UIDs
# @return [WosRecords]
def retrieve_by_id(uids)
retrieve_by_id_collator(uids)
end

# @param name [String] a CSV name pattern: {last name}, {first_name} [{middle_name} | {middle initial}]
# @return [WosRecords]
def search_by_name(name)
search_by_name_collator(name)
end

private

###################################################################
# WoS Query Record Collators

# @param uid [String] a WOS UID
# @return [WosRecords]
def cited_references_collator(uid)
message = cited_references_params(uid)
response = wos_client.search.call(:cited_references, message: message)
retrieve_additional_records(response, :cited_references_response, :cited_references_retrieve)
end

# @param uid [String] a WOS UID
# @return [WosRecords]
def citing_articles_collator(uid)
message = citing_articles_params(uid)
response = wos_client.search.call(:citing_articles, message: message)
retrieve_additional_records(response, :citing_articles_response)
end

# @param uid [String] a WOS UID
# @return [WosRecords]
def related_records_collator(uid)
message = related_records_params(uid)
response = wos_client.search.call(:related_records, message: message)
retrieve_additional_records(response, :related_records_response)
end

# @param uids [Array<String>] a list of WOS UIDs
# @return [WosRecords]
def retrieve_by_id_collator(uids)
message = retrieve_by_id_params(uids)
response = wos_client.search.call(:retrieve_by_id, message: message)
retrieve_additional_records(response, :retrieve_by_id_response)
end

# @return [WosRecords]
def search_by_name_collator(name)
message = search_by_name_params(name)
response = wos_client.search.call(:search, message: message)
retrieve_additional_records(response, :search_response)
end

# @param response [Savon::Response]
# @param response_type [Symbol]
# @param retrieve_operation [Symbol]
# @return [WosRecords]
def retrieve_additional_records(response, response_type, retrieve_operation = :retrieve)
records = records(response, response_type)
record_total = records_found(response, response_type)
if record_total > MAX_RECORDS
query_id = query_id(response, response_type)
# How many iterations to go? We've already got MAX_RECORDS
iterations = record_total / MAX_RECORDS
iterations -= 1 if (record_total % MAX_RECORDS).zero?
[*1..iterations].each do |i|
first_record = (MAX_RECORDS * i) + 1
message = {
queryId: query_id,
retrieveParameters: retrieve_parameters(first_record: first_record)
}
response_i = wos_client.search.call(retrieve_operation, message: message)
records_i = records(response_i, "#{retrieve_operation}_response".to_sym)
records = records.merge_records records_i
end
end
records
end

###################################################################
# WoS SOAP Response Parsers

# @param response [Savon::Response] a WoS SOAP response
# @param response_type [Symbol] a WoS SOAP response type
# @return [Hash] return data
def response_return(response, response_type)
response.body[response_type][:return]
end

# @param response [Savon::Response] a WoS SOAP response
# @param response_type [Symbol] a WoS SOAP response type
# @return [Integer]
def query_id(response, response_type)
response_return(response, response_type)[:query_id].to_i
end

# @param response [Savon::Response] a WoS SOAP response
# @param response_type [Symbol] a WoS SOAP response type
# @return [Integer]
def records_found(response, response_type)
response_return(response, response_type)[:records_found].to_i
end

# @param response [Savon::Response] a WoS SOAP response
# @param response_type [Symbol] a WoS SOAP response type
# @return [WosRecords]
def records(response, response_type)
WosRecords.new(records: response_return(response, response_type)[:records])
end

###################################################################
# Search User Query Helpers

# Constructs a WoS name query
# @param name [String] a CSV name pattern: {last name}, {first_name} [{middle_name} | {middle initial}]
def name_query(name)
split_name = name.split(',')
last_name = split_name[0]
first_middle_name = split_name[1]
first_name = first_middle_name.split(' ')[0]
middle_name = first_middle_name.split(' ')[1]
name_query = "#{last_name} #{first_name} OR #{last_name} #{first_name[0]}"
name_query += " OR #{last_name} #{first_name[0]}#{middle_name[0]} OR #{last_name} #{first_name} #{middle_name[0]}" unless middle_name.blank?
name_query
end

# Search authors from these institutions
# @return [Array<String>] institution names
def institutions
['Stanford University']
end

###################################################################
# WoS Query Parameters

# @param uid [String] a WOS UID
# @return [Hash] citedReferences parameters
def cited_references_params(uid)
retrieve_options = [ { key: 'Hot', value: 'On' } ]
{
databaseId: @database,
uid: uid,
queryLanguage: QUERY_LANGUAGE,
retrieveParameters: retrieve_parameters(options: retrieve_options)
}
end

# @param uid [String] a WOS UID
# @return [Hash] citingArticles parameters
def citing_articles_params(uid)
{
databaseId: @database,
uid: uid,
timeSpan: time_span,
queryLanguage: QUERY_LANGUAGE,
retrieveParameters: retrieve_parameters
}
end

# @param uid [String] a WOS UID
# @return [Hash] relatedRecords parameters
def related_records_params(uid)
# The 'WOS' database is the only option for this query
{
databaseId: 'WOS',
uid: uid,
timeSpan: time_span,
queryLanguage: QUERY_LANGUAGE,
retrieveParameters: retrieve_parameters
}
end

# @param uids [Array<String>] a list of WOS UIDs
# @return [Hash] retrieveById parameters
def retrieve_by_id_params(uids)
{
databaseId: @database,
uid: uids,
queryLanguage: QUERY_LANGUAGE,
retrieveParameters: retrieve_parameters
}
end

# @param first_record [Integer] the record number offset (defaults to 1)
# @param count [Integer] the number of records to retrieve (defaults to 100)
# @return [Hash] retrieve parameters
def retrieve_parameters(count: MAX_RECORDS, first_record: 1, options: retrieve_options)
{
firstRecord: first_record,
count: count,
option: options
}
end

# @return [Array<Hash>] retrieve parameter options
def retrieve_options
[
{
key: 'RecordIDs',
value: 'On'
},
{
key: 'targetNamespace',
value: 'http://scientific.thomsonreuters.com/schema/wok5.4/public/FullRecord'
}
]
end

# @param user_query [String]
# @return [Hash] search query parameters
def search_params(user_query)
{
queryParameters: {
databaseId: @database,
userQuery: user_query,
timeSpan: time_span,
queryLanguage: QUERY_LANGUAGE
},
retrieveParameters: retrieve_parameters
}
end

# @param name [String] a CSV name pattern: {last name}, {first_name} [{middle_name} | {middle initial}]
# @return [Hash] search query parameters
def search_by_name_params(name)
user_query = "AU=(#{name_query(name)}) AND AD=(#{institutions.join(' OR ')})"
search_params(user_query)
end

# @return [Hash] time span dates
def time_span
{
begin: START_DATE,
end: Time.zone.now.strftime('%Y-%m-%d')
}
end
end
21 changes: 21 additions & 0 deletions spec/fixtures/wos_client/wos_name_search_response.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?xml version="1.0"?>
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
<soap:Body>
<ns2:searchResponse xmlns:ns2="http://woksearch.v3.wokmws.thomsonreuters.com">
<return>
<queryId>2</queryId>
<recordsFound>2</recordsFound>
<recordsSearched>61323448</recordsSearched>
<optionValue>
<label>RecordIDs</label>
<value>WOS:A1972N549400003</value>
<value>WOS:A1976BW18000001</value>
</optionValue>
<records>&lt;records xmlns="http://scientific.thomsonreuters.com/schema/wok5.4/public/FullRecord"&gt;
&lt;REC r_id_disclaimer="ResearcherID data provided by Clarivate Analytics"&gt;&lt;UID&gt;WOS:A1972N549400003&lt;/UID&gt;&lt;static_data&gt;&lt;summary&gt;&lt;EWUID&gt;&lt;WUID coll_id="WOS"&gt;&lt;/WUID&gt;&lt;edition value="WOS.SSCI"&gt;&lt;/edition&gt;&lt;edition value="WOS.SCI"&gt;&lt;/edition&gt;&lt;/EWUID&gt;&lt;pub_info issue="5" pubtype="Journal" sortdate="1972-01-01" has_abstract="N" coverdate="1972" vol="33" pubyear="1972"&gt;&lt;page end="413" page_count="1" begin="413"&gt;413-413&lt;/page&gt;&lt;/pub_info&gt;&lt;titles count="6"&gt;&lt;title type="source"&gt;COLLEGE &amp;amp; RESEARCH LIBRARIES&lt;/title&gt;&lt;title type="source_abbrev"&gt;COLL RES LIBR&lt;/title&gt;&lt;title type="abbrev_iso"&gt;Coll. Res. Libr.&lt;/title&gt;&lt;title type="abbrev_11"&gt;COLL RES LI&lt;/title&gt;&lt;title type="abbrev_29"&gt;COLL RES LIBR&lt;/title&gt;&lt;title type="item"&gt;LIBRARY MANAGEMENT - BEHAVIOR-BASED PERSONNEL SYSTEMS (BBPS) - FRAMEWORK FOR ANALYSIS - KEMPER,RE&lt;/title&gt;&lt;/titles&gt;&lt;names count="1"&gt;&lt;name daisng_id="19669717" seq_no="1" role="author"&gt;&lt;display_name&gt;WEBER, DC&lt;/display_name&gt;&lt;full_name&gt;WEBER, DC&lt;/full_name&gt;&lt;wos_standard&gt;WEBER, DC&lt;/wos_standard&gt;&lt;first_name&gt;DC&lt;/first_name&gt;&lt;last_name&gt;WEBER&lt;/last_name&gt;&lt;/name&gt;&lt;/names&gt;&lt;doctypes count="1"&gt;&lt;doctype&gt;Book Review&lt;/doctype&gt;&lt;/doctypes&gt;&lt;publishers&gt;&lt;publisher&gt;&lt;address_spec addr_no="1"&gt;&lt;full_address&gt;50 E HURON ST, CHICAGO, IL 60611&lt;/full_address&gt;&lt;city&gt;CHICAGO&lt;/city&gt;&lt;/address_spec&gt;&lt;names count="1"&gt;&lt;name addr_no="1" role="publisher" seq_no="1"&gt;&lt;display_name&gt;ASSOC COLL RESEARCH LIBRARIES&lt;/display_name&gt;&lt;full_name&gt;ASSOC COLL RESEARCH LIBRARIES&lt;/full_name&gt;&lt;/name&gt;&lt;/names&gt;&lt;/publisher&gt;&lt;/publishers&gt;&lt;/summary&gt;&lt;fullrecord_metadata&gt;&lt;languages count="1"&gt;&lt;language type="primary"&gt;English&lt;/language&gt;&lt;/languages&gt;&lt;normalized_languages count="1"&gt;&lt;language type="primary"&gt;English&lt;/language&gt;&lt;/normalized_languages&gt;&lt;normalized_doctypes count="1"&gt;&lt;doctype&gt;Review&lt;/doctype&gt;&lt;/normalized_doctypes&gt;&lt;refs count="1"&gt;&lt;/refs&gt;&lt;addresses count="1"&gt;&lt;address_name&gt;&lt;address_spec addr_no="1"&gt;&lt;full_address&gt;STANFORD UNIV,STANFORD,CA 94305&lt;/full_address&gt;&lt;organizations count="2"&gt;&lt;organization&gt;STANFORD UNIV&lt;/organization&gt;&lt;organization pref="Y"&gt;Stanford University&lt;/organization&gt;&lt;/organizations&gt;&lt;city&gt;STANFORD&lt;/city&gt;&lt;state&gt;CA&lt;/state&gt;&lt;country&gt;USA&lt;/country&gt;&lt;zip location="AP"&gt;94305&lt;/zip&gt;&lt;/address_spec&gt;&lt;/address_name&gt;&lt;/addresses&gt;&lt;category_info&gt;&lt;headings count="1"&gt;&lt;heading&gt;Science &amp;amp; Technology&lt;/heading&gt;&lt;/headings&gt;&lt;subheadings count="1"&gt;&lt;subheading&gt;Technology&lt;/subheading&gt;&lt;/subheadings&gt;&lt;subjects count="3"&gt;&lt;subject ascatype="traditional" code="NU"&gt;Information Science &amp;amp; Library Science&lt;/subject&gt;&lt;subject ascatype="extended"&gt;Information Science &amp;amp; Library Science&lt;/subject&gt;&lt;subject ascatype="traditional" code="NU"&gt;INFORMATION SCIENCE &amp;amp; LIBRARY SCIENCE&lt;/subject&gt;&lt;/subjects&gt;&lt;/category_info&gt;&lt;/fullrecord_metadata&gt;&lt;item xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="itemType_wos"&gt;&lt;ids avail="Y"&gt;N5494&lt;/ids&gt;&lt;bib_id&gt;33 (5): 413-413 1972&lt;/bib_id&gt;&lt;/item&gt;&lt;/static_data&gt;&lt;dynamic_data&gt;&lt;citation_related&gt;&lt;tc_list&gt;&lt;silo_tc coll_id="WOS" local_count="0"&gt;&lt;/silo_tc&gt;&lt;/tc_list&gt;&lt;/citation_related&gt;&lt;cluster_related&gt;&lt;identifiers&gt;&lt;identifier value="0010-0870" type="issn"&gt;&lt;/identifier&gt;&lt;/identifiers&gt;&lt;/cluster_related&gt;&lt;/dynamic_data&gt;&lt;/REC&gt;
&lt;REC r_id_disclaimer="ResearcherID data provided by Clarivate Analytics"&gt;&lt;UID&gt;WOS:A1976BW18000001&lt;/UID&gt;&lt;static_data&gt;&lt;summary&gt;&lt;EWUID&gt;&lt;WUID coll_id="WOS"&gt;&lt;/WUID&gt;&lt;edition value="WOS.SSCI"&gt;&lt;/edition&gt;&lt;edition value="WOS.SCI"&gt;&lt;/edition&gt;&lt;/EWUID&gt;&lt;pub_info coverdate="1976" has_abstract="N" issue="3" pubtype="Journal" pubyear="1976" sortdate="1976-01-01" vol="37"&gt;&lt;page begin="205" end="221" page_count="17"&gt;205-221&lt;/page&gt;&lt;/pub_info&gt;&lt;titles count="6"&gt;&lt;title type="source"&gt;COLLEGE &amp;amp; RESEARCH LIBRARIES&lt;/title&gt;&lt;title type="source_abbrev"&gt;COLL RES LIBR&lt;/title&gt;&lt;title type="abbrev_iso"&gt;Coll. Res. Libr.&lt;/title&gt;&lt;title type="abbrev_11"&gt;COLL RES LI&lt;/title&gt;&lt;title type="abbrev_29"&gt;COLL RES LIBR&lt;/title&gt;&lt;title type="item"&gt;CENTURY OF COOPERATIVE PROGRAMS AMONG ACADEMIC-LIBRARIES&lt;/title&gt;&lt;/titles&gt;&lt;names count="1"&gt;&lt;name daisng_id="19670132" seq_no="1" role="author"&gt;&lt;display_name&gt;WEBER, DC&lt;/display_name&gt;&lt;full_name&gt;WEBER, DC&lt;/full_name&gt;&lt;wos_standard&gt;WEBER, DC&lt;/wos_standard&gt;&lt;first_name&gt;DC&lt;/first_name&gt;&lt;last_name&gt;WEBER&lt;/last_name&gt;&lt;/name&gt;&lt;/names&gt;&lt;doctypes count="1"&gt;&lt;doctype&gt;Article&lt;/doctype&gt;&lt;/doctypes&gt;&lt;publishers&gt;&lt;publisher&gt;&lt;address_spec addr_no="1"&gt;&lt;full_address&gt;50 E HURON ST, CHICAGO, IL 60611&lt;/full_address&gt;&lt;city&gt;CHICAGO&lt;/city&gt;&lt;/address_spec&gt;&lt;names count="1"&gt;&lt;name addr_no="1" role="publisher" seq_no="1"&gt;&lt;display_name&gt;ASSOC COLL RESEARCH LIBRARIES&lt;/display_name&gt;&lt;full_name&gt;ASSOC COLL RESEARCH LIBRARIES&lt;/full_name&gt;&lt;/name&gt;&lt;/names&gt;&lt;/publisher&gt;&lt;/publishers&gt;&lt;/summary&gt;&lt;fullrecord_metadata&gt;&lt;languages count="1"&gt;&lt;language type="primary"&gt;English&lt;/language&gt;&lt;/languages&gt;&lt;normalized_languages count="1"&gt;&lt;language type="primary"&gt;English&lt;/language&gt;&lt;/normalized_languages&gt;&lt;normalized_doctypes count="1"&gt;&lt;doctype&gt;Article&lt;/doctype&gt;&lt;/normalized_doctypes&gt;&lt;refs count="41"&gt;&lt;/refs&gt;&lt;addresses count="1"&gt;&lt;address_name&gt;&lt;address_spec addr_no="1"&gt;&lt;full_address&gt;STANFORD UNIV LIB,STANFORD,CA 94305&lt;/full_address&gt;&lt;organizations count="2"&gt;&lt;organization&gt;STANFORD UNIV LIB&lt;/organization&gt;&lt;organization pref="Y"&gt;Stanford University&lt;/organization&gt;&lt;/organizations&gt;&lt;city&gt;STANFORD&lt;/city&gt;&lt;state&gt;CA&lt;/state&gt;&lt;country&gt;USA&lt;/country&gt;&lt;zip location="AP"&gt;94305&lt;/zip&gt;&lt;/address_spec&gt;&lt;/address_name&gt;&lt;/addresses&gt;&lt;category_info&gt;&lt;headings count="1"&gt;&lt;heading&gt;Science &amp;amp; Technology&lt;/heading&gt;&lt;/headings&gt;&lt;subheadings count="1"&gt;&lt;subheading&gt;Technology&lt;/subheading&gt;&lt;/subheadings&gt;&lt;subjects count="3"&gt;&lt;subject ascatype="traditional" code="NU"&gt;Information Science &amp;amp; Library Science&lt;/subject&gt;&lt;subject ascatype="extended"&gt;Information Science &amp;amp; Library Science&lt;/subject&gt;&lt;subject ascatype="traditional" code="NU"&gt;INFORMATION SCIENCE &amp;amp; LIBRARY SCIENCE&lt;/subject&gt;&lt;/subjects&gt;&lt;/category_info&gt;&lt;/fullrecord_metadata&gt;&lt;item xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" coll_id="WOS" xsi:type="itemType_wos"&gt;&lt;ids avail="Y"&gt;BW180&lt;/ids&gt;&lt;bib_id&gt;37 (3): 205-221 1976&lt;/bib_id&gt;&lt;/item&gt;&lt;/static_data&gt;&lt;dynamic_data&gt;&lt;citation_related&gt;&lt;tc_list&gt;&lt;silo_tc coll_id="WOS" local_count="12"&gt;&lt;/silo_tc&gt;&lt;/tc_list&gt;&lt;/citation_related&gt;&lt;cluster_related&gt;&lt;identifiers&gt;&lt;identifier type="issn" value="0010-0870"&gt;&lt;/identifier&gt;&lt;/identifiers&gt;&lt;/cluster_related&gt;&lt;/dynamic_data&gt;&lt;/REC&gt;
&lt;/records&gt;</records>
</return>
</ns2:searchResponse>
</soap:Body>
</soap:Envelope>
Loading

0 comments on commit 3161344

Please sign in to comment.