Skip to content

Commit

Permalink
Web of Science queries - WosQueries
Browse files Browse the repository at this point in the history
  • Loading branch information
dazza-codes committed Sep 29, 2017
1 parent 5d8bf1b commit f35ade1
Show file tree
Hide file tree
Showing 4 changed files with 319 additions and 0 deletions.
184 changes: 184 additions & 0 deletions lib/wos_queries.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@

# Queries on the Web of Science (or Web of Knowledge)
class WosQueries

# Default database 'WOK' is an umbrella for everything
DATABASE = 'WOK'.freeze

# this is the maximum number that can be returned in single query by WoS
MAX_RECORDS = 100

QUERY_LANGUAGE = 'en'.freeze

# limit the start date when searching for publications, format: YYYY-MM-DD
START_DATE = '1970-01-01'.freeze

attr_reader :wos_client

# @param wos_client [WosClient] a Web Of Science client
# @param database [String] a WOS database identifier (default 'WOS')
def initialize(wos_client, database = DATABASE)
@wos_client = wos_client
@database = database
end

# @param name [String] a CSV name pattern: {last name}, {first_name} [{middle_name} | {middle initial}]
# @return [WosRecords]
def name_search(name)
name_search_collator(name)
end

# @param ids [Array<String>] a list of WOS IDs
# @return [WosRecords]
def retrieve_by_id(ids)
retrieve_by_id_collator(ids)
end

private

# @return [WosRecords]
def name_search_collator(name)
message = name_search_params(name)
response = wos_client.search.call(:search, message: message)
response_type = :search_response
records = records(response, response_type)
record_total = records_found(response, response_type)
query_id = query_id(response, response_type)
retrieve_additional_records(record_total, records, query_id)
end

# @param ids [Array<String>] a list of WOS IDs
# @return [WosRecords]
def retrieve_by_id_collator(ids)
message = retrieve_by_id_params(ids)
response = wos_client.search.call(:retrieve_by_id, message: message)
response_type = :retrieve_by_id_response
records = records(response, response_type)
record_total = records_found(response, response_type)
query_id = query_id(response, response_type)
retrieve_additional_records(record_total, records, query_id)
end

# @param record_total [Integer]
# @param records [WosRecords]
# @param query_id [Integer]
# @return [WosRecords]
def retrieve_additional_records(record_total, records, query_id)
if record_total > MAX_RECORDS
# How many iterations to go? We've already got MAX_RECORDS
iterations = record_total / MAX_RECORDS
iterations -= 1 if (record_total % MAX_RECORDS).zero?
[*1..iterations].each do |i|
message = {
queryId: query_id,
retrieveParameters: retrieve_parameters(MAX_RECORDS * i)
}
response_i = wos_client.search.call(:retrieve, message: message)
records_i = records(response_i, :retrieve_response)
records = records.merge_records records_i
end
end
records
end

# Constructs a WoS name query
# @param name [String] a CSV name pattern: {last name}, {first_name} [{middle_name} | {middle initial}]
def name_query(name)
split_name = name.split(',')
last_name = split_name[0]
first_middle_name = split_name[1]
first_name = first_middle_name.split(' ')[0]
middle_name = first_middle_name.split(' ')[1]
name_query = "#{last_name} #{first_name} OR #{last_name} #{first_name[0]}"
name_query += " OR #{last_name} #{first_name[0]}#{middle_name[0]} OR #{last_name} #{first_name} #{middle_name[0]}" unless middle_name.blank?
name_query
end

# Search authors from these institutions
# @return [Array<String>] institution names
def institutions
['Stanford University']
end

# @param name [String] a CSV name pattern: {last name}, {first_name} [{middle_name} | {middle initial}]
# @param first_record [Integer] the record number offset (defaults to 1)
# @param count [Integer] the number of records to retrieve (defaults to 100)
# @return [Hash] search query parameters
def name_search_params(name, first_record = 1, count = MAX_RECORDS)
{
queryParameters: {
databaseId: @database,
userQuery: "AU=(#{name_query(name)}) AND AD=(#{institutions.join(' OR ')})",
timeSpan: {
begin: START_DATE,
end: Time.zone.now.strftime('%Y-%m-%d')
},
queryLanguage: QUERY_LANGUAGE
},
retrieveParameters: retrieve_parameters(first_record, count)
}
end

# @param response [Savon::Response] a WoS SOAP response
# @param type [Symbol] a WoS SOAP response type
# @return [Hash] return data
def response_return(response, response_type)
response.body[response_type][:return]
end

# @param response [Savon::Response] a WoS SOAP response
# @param response_type [Symbol] a WoS SOAP response type
# @return [Integer]
def query_id(response, response_type)
response_return(response, response_type)[:query_id].to_i
end

# @param response [Savon::Response] a WoS SOAP response
# @param response_type [Symbol] a WoS SOAP response type
# @return [Integer]
def records_found(response, response_type)
response_return(response, response_type)[:records_found].to_i
end

# @param response [Savon::Response] a WoS SOAP response
# @param response_type [Symbol] a WoS SOAP response type
# @return [WosRecords]
def records(response, response_type)
WosRecords.new(records: response_return(response, response_type)[:records])
end

# @param ids [Array<String>] a list of WOS IDs
def retrieve_by_id_params(ids)
{
databaseId: @database,
uid: ids,
queryLanguage: QUERY_LANGUAGE,
retrieveParameters: retrieve_parameters
}
end

# @param first_record [Integer] the record number offset (defaults to 1)
# @param count [Integer] the number of records to retrieve (defaults to 100)
# @return [Hash] retrieve parameters
def retrieve_parameters(first_record = 1, count = MAX_RECORDS)
{
firstRecord: first_record,
count: count,
option: retrieve_options
}
end

# @return [Array<Hash>] retrieve parameter options
def retrieve_options
[
{
key: 'RecordIDs',
value: 'On'
},
{
key: 'targetNamespace',
value: 'http://scientific.thomsonreuters.com/schema/wok5.4/public/FullRecord'
}
]
end
end
21 changes: 21 additions & 0 deletions spec/fixtures/wos_client/wos_name_search_response.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?xml version="1.0"?>
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
<soap:Body>
<ns2:searchResponse xmlns:ns2="http://woksearch.v3.wokmws.thomsonreuters.com">
<return>
<queryId>2</queryId>
<recordsFound>2</recordsFound>
<recordsSearched>61323448</recordsSearched>
<optionValue>
<label>RecordIDs</label>
<value>WOS:A1972N549400003</value>
<value>WOS:A1976BW18000001</value>
</optionValue>
<records>&lt;records xmlns="http://scientific.thomsonreuters.com/schema/wok5.4/public/FullRecord"&gt;
&lt;REC r_id_disclaimer="ResearcherID data provided by Clarivate Analytics"&gt;&lt;UID&gt;WOS:A1972N549400003&lt;/UID&gt;&lt;static_data&gt;&lt;summary&gt;&lt;EWUID&gt;&lt;WUID coll_id="WOS"&gt;&lt;/WUID&gt;&lt;edition value="WOS.SSCI"&gt;&lt;/edition&gt;&lt;edition value="WOS.SCI"&gt;&lt;/edition&gt;&lt;/EWUID&gt;&lt;pub_info issue="5" pubtype="Journal" sortdate="1972-01-01" has_abstract="N" coverdate="1972" vol="33" pubyear="1972"&gt;&lt;page end="413" page_count="1" begin="413"&gt;413-413&lt;/page&gt;&lt;/pub_info&gt;&lt;titles count="6"&gt;&lt;title type="source"&gt;COLLEGE &amp;amp; RESEARCH LIBRARIES&lt;/title&gt;&lt;title type="source_abbrev"&gt;COLL RES LIBR&lt;/title&gt;&lt;title type="abbrev_iso"&gt;Coll. Res. Libr.&lt;/title&gt;&lt;title type="abbrev_11"&gt;COLL RES LI&lt;/title&gt;&lt;title type="abbrev_29"&gt;COLL RES LIBR&lt;/title&gt;&lt;title type="item"&gt;LIBRARY MANAGEMENT - BEHAVIOR-BASED PERSONNEL SYSTEMS (BBPS) - FRAMEWORK FOR ANALYSIS - KEMPER,RE&lt;/title&gt;&lt;/titles&gt;&lt;names count="1"&gt;&lt;name daisng_id="19669717" seq_no="1" role="author"&gt;&lt;display_name&gt;WEBER, DC&lt;/display_name&gt;&lt;full_name&gt;WEBER, DC&lt;/full_name&gt;&lt;wos_standard&gt;WEBER, DC&lt;/wos_standard&gt;&lt;first_name&gt;DC&lt;/first_name&gt;&lt;last_name&gt;WEBER&lt;/last_name&gt;&lt;/name&gt;&lt;/names&gt;&lt;doctypes count="1"&gt;&lt;doctype&gt;Book Review&lt;/doctype&gt;&lt;/doctypes&gt;&lt;publishers&gt;&lt;publisher&gt;&lt;address_spec addr_no="1"&gt;&lt;full_address&gt;50 E HURON ST, CHICAGO, IL 60611&lt;/full_address&gt;&lt;city&gt;CHICAGO&lt;/city&gt;&lt;/address_spec&gt;&lt;names count="1"&gt;&lt;name addr_no="1" role="publisher" seq_no="1"&gt;&lt;display_name&gt;ASSOC COLL RESEARCH LIBRARIES&lt;/display_name&gt;&lt;full_name&gt;ASSOC COLL RESEARCH LIBRARIES&lt;/full_name&gt;&lt;/name&gt;&lt;/names&gt;&lt;/publisher&gt;&lt;/publishers&gt;&lt;/summary&gt;&lt;fullrecord_metadata&gt;&lt;languages count="1"&gt;&lt;language type="primary"&gt;English&lt;/language&gt;&lt;/languages&gt;&lt;normalized_languages count="1"&gt;&lt;language type="primary"&gt;English&lt;/language&gt;&lt;/normalized_languages&gt;&lt;normalized_doctypes count="1"&gt;&lt;doctype&gt;Review&lt;/doctype&gt;&lt;/normalized_doctypes&gt;&lt;refs count="1"&gt;&lt;/refs&gt;&lt;addresses count="1"&gt;&lt;address_name&gt;&lt;address_spec addr_no="1"&gt;&lt;full_address&gt;STANFORD UNIV,STANFORD,CA 94305&lt;/full_address&gt;&lt;organizations count="2"&gt;&lt;organization&gt;STANFORD UNIV&lt;/organization&gt;&lt;organization pref="Y"&gt;Stanford University&lt;/organization&gt;&lt;/organizations&gt;&lt;city&gt;STANFORD&lt;/city&gt;&lt;state&gt;CA&lt;/state&gt;&lt;country&gt;USA&lt;/country&gt;&lt;zip location="AP"&gt;94305&lt;/zip&gt;&lt;/address_spec&gt;&lt;/address_name&gt;&lt;/addresses&gt;&lt;category_info&gt;&lt;headings count="1"&gt;&lt;heading&gt;Science &amp;amp; Technology&lt;/heading&gt;&lt;/headings&gt;&lt;subheadings count="1"&gt;&lt;subheading&gt;Technology&lt;/subheading&gt;&lt;/subheadings&gt;&lt;subjects count="3"&gt;&lt;subject ascatype="traditional" code="NU"&gt;Information Science &amp;amp; Library Science&lt;/subject&gt;&lt;subject ascatype="extended"&gt;Information Science &amp;amp; Library Science&lt;/subject&gt;&lt;subject ascatype="traditional" code="NU"&gt;INFORMATION SCIENCE &amp;amp; LIBRARY SCIENCE&lt;/subject&gt;&lt;/subjects&gt;&lt;/category_info&gt;&lt;/fullrecord_metadata&gt;&lt;item xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="itemType_wos"&gt;&lt;ids avail="Y"&gt;N5494&lt;/ids&gt;&lt;bib_id&gt;33 (5): 413-413 1972&lt;/bib_id&gt;&lt;/item&gt;&lt;/static_data&gt;&lt;dynamic_data&gt;&lt;citation_related&gt;&lt;tc_list&gt;&lt;silo_tc coll_id="WOS" local_count="0"&gt;&lt;/silo_tc&gt;&lt;/tc_list&gt;&lt;/citation_related&gt;&lt;cluster_related&gt;&lt;identifiers&gt;&lt;identifier value="0010-0870" type="issn"&gt;&lt;/identifier&gt;&lt;/identifiers&gt;&lt;/cluster_related&gt;&lt;/dynamic_data&gt;&lt;/REC&gt;
&lt;REC r_id_disclaimer="ResearcherID data provided by Clarivate Analytics"&gt;&lt;UID&gt;WOS:A1976BW18000001&lt;/UID&gt;&lt;static_data&gt;&lt;summary&gt;&lt;EWUID&gt;&lt;WUID coll_id="WOS"&gt;&lt;/WUID&gt;&lt;edition value="WOS.SSCI"&gt;&lt;/edition&gt;&lt;edition value="WOS.SCI"&gt;&lt;/edition&gt;&lt;/EWUID&gt;&lt;pub_info coverdate="1976" has_abstract="N" issue="3" pubtype="Journal" pubyear="1976" sortdate="1976-01-01" vol="37"&gt;&lt;page begin="205" end="221" page_count="17"&gt;205-221&lt;/page&gt;&lt;/pub_info&gt;&lt;titles count="6"&gt;&lt;title type="source"&gt;COLLEGE &amp;amp; RESEARCH LIBRARIES&lt;/title&gt;&lt;title type="source_abbrev"&gt;COLL RES LIBR&lt;/title&gt;&lt;title type="abbrev_iso"&gt;Coll. Res. Libr.&lt;/title&gt;&lt;title type="abbrev_11"&gt;COLL RES LI&lt;/title&gt;&lt;title type="abbrev_29"&gt;COLL RES LIBR&lt;/title&gt;&lt;title type="item"&gt;CENTURY OF COOPERATIVE PROGRAMS AMONG ACADEMIC-LIBRARIES&lt;/title&gt;&lt;/titles&gt;&lt;names count="1"&gt;&lt;name daisng_id="19670132" seq_no="1" role="author"&gt;&lt;display_name&gt;WEBER, DC&lt;/display_name&gt;&lt;full_name&gt;WEBER, DC&lt;/full_name&gt;&lt;wos_standard&gt;WEBER, DC&lt;/wos_standard&gt;&lt;first_name&gt;DC&lt;/first_name&gt;&lt;last_name&gt;WEBER&lt;/last_name&gt;&lt;/name&gt;&lt;/names&gt;&lt;doctypes count="1"&gt;&lt;doctype&gt;Article&lt;/doctype&gt;&lt;/doctypes&gt;&lt;publishers&gt;&lt;publisher&gt;&lt;address_spec addr_no="1"&gt;&lt;full_address&gt;50 E HURON ST, CHICAGO, IL 60611&lt;/full_address&gt;&lt;city&gt;CHICAGO&lt;/city&gt;&lt;/address_spec&gt;&lt;names count="1"&gt;&lt;name addr_no="1" role="publisher" seq_no="1"&gt;&lt;display_name&gt;ASSOC COLL RESEARCH LIBRARIES&lt;/display_name&gt;&lt;full_name&gt;ASSOC COLL RESEARCH LIBRARIES&lt;/full_name&gt;&lt;/name&gt;&lt;/names&gt;&lt;/publisher&gt;&lt;/publishers&gt;&lt;/summary&gt;&lt;fullrecord_metadata&gt;&lt;languages count="1"&gt;&lt;language type="primary"&gt;English&lt;/language&gt;&lt;/languages&gt;&lt;normalized_languages count="1"&gt;&lt;language type="primary"&gt;English&lt;/language&gt;&lt;/normalized_languages&gt;&lt;normalized_doctypes count="1"&gt;&lt;doctype&gt;Article&lt;/doctype&gt;&lt;/normalized_doctypes&gt;&lt;refs count="41"&gt;&lt;/refs&gt;&lt;addresses count="1"&gt;&lt;address_name&gt;&lt;address_spec addr_no="1"&gt;&lt;full_address&gt;STANFORD UNIV LIB,STANFORD,CA 94305&lt;/full_address&gt;&lt;organizations count="2"&gt;&lt;organization&gt;STANFORD UNIV LIB&lt;/organization&gt;&lt;organization pref="Y"&gt;Stanford University&lt;/organization&gt;&lt;/organizations&gt;&lt;city&gt;STANFORD&lt;/city&gt;&lt;state&gt;CA&lt;/state&gt;&lt;country&gt;USA&lt;/country&gt;&lt;zip location="AP"&gt;94305&lt;/zip&gt;&lt;/address_spec&gt;&lt;/address_name&gt;&lt;/addresses&gt;&lt;category_info&gt;&lt;headings count="1"&gt;&lt;heading&gt;Science &amp;amp; Technology&lt;/heading&gt;&lt;/headings&gt;&lt;subheadings count="1"&gt;&lt;subheading&gt;Technology&lt;/subheading&gt;&lt;/subheadings&gt;&lt;subjects count="3"&gt;&lt;subject ascatype="traditional" code="NU"&gt;Information Science &amp;amp; Library Science&lt;/subject&gt;&lt;subject ascatype="extended"&gt;Information Science &amp;amp; Library Science&lt;/subject&gt;&lt;subject ascatype="traditional" code="NU"&gt;INFORMATION SCIENCE &amp;amp; LIBRARY SCIENCE&lt;/subject&gt;&lt;/subjects&gt;&lt;/category_info&gt;&lt;/fullrecord_metadata&gt;&lt;item xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" coll_id="WOS" xsi:type="itemType_wos"&gt;&lt;ids avail="Y"&gt;BW180&lt;/ids&gt;&lt;bib_id&gt;37 (3): 205-221 1976&lt;/bib_id&gt;&lt;/item&gt;&lt;/static_data&gt;&lt;dynamic_data&gt;&lt;citation_related&gt;&lt;tc_list&gt;&lt;silo_tc coll_id="WOS" local_count="12"&gt;&lt;/silo_tc&gt;&lt;/tc_list&gt;&lt;/citation_related&gt;&lt;cluster_related&gt;&lt;identifiers&gt;&lt;identifier type="issn" value="0010-0870"&gt;&lt;/identifier&gt;&lt;/identifiers&gt;&lt;/cluster_related&gt;&lt;/dynamic_data&gt;&lt;/REC&gt;
&lt;/records&gt;</records>
</return>
</ns2:searchResponse>
</soap:Body>
</soap:Envelope>
Loading

0 comments on commit f35ade1

Please sign in to comment.