-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5d8bf1b
commit f35ade1
Showing
4 changed files
with
319 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
|
||
# Queries on the Web of Science (or Web of Knowledge) | ||
class WosQueries | ||
|
||
# Default database 'WOK' is an umbrella for everything | ||
DATABASE = 'WOK'.freeze | ||
|
||
# this is the maximum number that can be returned in single query by WoS | ||
MAX_RECORDS = 100 | ||
|
||
QUERY_LANGUAGE = 'en'.freeze | ||
|
||
# limit the start date when searching for publications, format: YYYY-MM-DD | ||
START_DATE = '1970-01-01'.freeze | ||
|
||
attr_reader :wos_client | ||
|
||
# @param wos_client [WosClient] a Web Of Science client | ||
# @param database [String] a WOS database identifier (default 'WOS') | ||
def initialize(wos_client, database = DATABASE) | ||
@wos_client = wos_client | ||
@database = database | ||
end | ||
|
||
# @param name [String] a CSV name pattern: {last name}, {first_name} [{middle_name} | {middle initial}] | ||
# @return [WosRecords] | ||
def name_search(name) | ||
name_search_collator(name) | ||
end | ||
|
||
# @param ids [Array<String>] a list of WOS IDs | ||
# @return [WosRecords] | ||
def retrieve_by_id(ids) | ||
retrieve_by_id_collator(ids) | ||
end | ||
|
||
private | ||
|
||
# @return [WosRecords] | ||
def name_search_collator(name) | ||
message = name_search_params(name) | ||
response = wos_client.search.call(:search, message: message) | ||
response_type = :search_response | ||
records = records(response, response_type) | ||
record_total = records_found(response, response_type) | ||
query_id = query_id(response, response_type) | ||
retrieve_additional_records(record_total, records, query_id) | ||
end | ||
|
||
# @param ids [Array<String>] a list of WOS IDs | ||
# @return [WosRecords] | ||
def retrieve_by_id_collator(ids) | ||
message = retrieve_by_id_params(ids) | ||
response = wos_client.search.call(:retrieve_by_id, message: message) | ||
response_type = :retrieve_by_id_response | ||
records = records(response, response_type) | ||
record_total = records_found(response, response_type) | ||
query_id = query_id(response, response_type) | ||
retrieve_additional_records(record_total, records, query_id) | ||
end | ||
|
||
# @param record_total [Integer] | ||
# @param records [WosRecords] | ||
# @param query_id [Integer] | ||
# @return [WosRecords] | ||
def retrieve_additional_records(record_total, records, query_id) | ||
if record_total > MAX_RECORDS | ||
# How many iterations to go? We've already got MAX_RECORDS | ||
iterations = record_total / MAX_RECORDS | ||
iterations -= 1 if (record_total % MAX_RECORDS).zero? | ||
[*1..iterations].each do |i| | ||
message = { | ||
queryId: query_id, | ||
retrieveParameters: retrieve_parameters(MAX_RECORDS * i) | ||
} | ||
response_i = wos_client.search.call(:retrieve, message: message) | ||
records_i = records(response_i, :retrieve_response) | ||
records = records.merge_records records_i | ||
end | ||
end | ||
records | ||
end | ||
|
||
# Constructs a WoS name query | ||
# @param name [String] a CSV name pattern: {last name}, {first_name} [{middle_name} | {middle initial}] | ||
def name_query(name) | ||
split_name = name.split(',') | ||
last_name = split_name[0] | ||
first_middle_name = split_name[1] | ||
first_name = first_middle_name.split(' ')[0] | ||
middle_name = first_middle_name.split(' ')[1] | ||
name_query = "#{last_name} #{first_name} OR #{last_name} #{first_name[0]}" | ||
name_query += " OR #{last_name} #{first_name[0]}#{middle_name[0]} OR #{last_name} #{first_name} #{middle_name[0]}" unless middle_name.blank? | ||
name_query | ||
end | ||
|
||
# Search authors from these institutions | ||
# @return [Array<String>] institution names | ||
def institutions | ||
['Stanford University'] | ||
end | ||
|
||
# @param name [String] a CSV name pattern: {last name}, {first_name} [{middle_name} | {middle initial}] | ||
# @param first_record [Integer] the record number offset (defaults to 1) | ||
# @param count [Integer] the number of records to retrieve (defaults to 100) | ||
# @return [Hash] search query parameters | ||
def name_search_params(name, first_record = 1, count = MAX_RECORDS) | ||
{ | ||
queryParameters: { | ||
databaseId: @database, | ||
userQuery: "AU=(#{name_query(name)}) AND AD=(#{institutions.join(' OR ')})", | ||
timeSpan: { | ||
begin: START_DATE, | ||
end: Time.zone.now.strftime('%Y-%m-%d') | ||
}, | ||
queryLanguage: QUERY_LANGUAGE | ||
}, | ||
retrieveParameters: retrieve_parameters(first_record, count) | ||
} | ||
end | ||
|
||
# @param response [Savon::Response] a WoS SOAP response | ||
# @param type [Symbol] a WoS SOAP response type | ||
# @return [Hash] return data | ||
def response_return(response, response_type) | ||
response.body[response_type][:return] | ||
end | ||
|
||
# @param response [Savon::Response] a WoS SOAP response | ||
# @param response_type [Symbol] a WoS SOAP response type | ||
# @return [Integer] | ||
def query_id(response, response_type) | ||
response_return(response, response_type)[:query_id].to_i | ||
end | ||
|
||
# @param response [Savon::Response] a WoS SOAP response | ||
# @param response_type [Symbol] a WoS SOAP response type | ||
# @return [Integer] | ||
def records_found(response, response_type) | ||
response_return(response, response_type)[:records_found].to_i | ||
end | ||
|
||
# @param response [Savon::Response] a WoS SOAP response | ||
# @param response_type [Symbol] a WoS SOAP response type | ||
# @return [WosRecords] | ||
def records(response, response_type) | ||
WosRecords.new(records: response_return(response, response_type)[:records]) | ||
end | ||
|
||
# @param ids [Array<String>] a list of WOS IDs | ||
def retrieve_by_id_params(ids) | ||
{ | ||
databaseId: @database, | ||
uid: ids, | ||
queryLanguage: QUERY_LANGUAGE, | ||
retrieveParameters: retrieve_parameters | ||
} | ||
end | ||
|
||
# @param first_record [Integer] the record number offset (defaults to 1) | ||
# @param count [Integer] the number of records to retrieve (defaults to 100) | ||
# @return [Hash] retrieve parameters | ||
def retrieve_parameters(first_record = 1, count = MAX_RECORDS) | ||
{ | ||
firstRecord: first_record, | ||
count: count, | ||
option: retrieve_options | ||
} | ||
end | ||
|
||
# @return [Array<Hash>] retrieve parameter options | ||
def retrieve_options | ||
[ | ||
{ | ||
key: 'RecordIDs', | ||
value: 'On' | ||
}, | ||
{ | ||
key: 'targetNamespace', | ||
value: 'http://scientific.thomsonreuters.com/schema/wok5.4/public/FullRecord' | ||
} | ||
] | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<?xml version="1.0"?> | ||
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"> | ||
<soap:Body> | ||
<ns2:searchResponse xmlns:ns2="http://woksearch.v3.wokmws.thomsonreuters.com"> | ||
<return> | ||
<queryId>2</queryId> | ||
<recordsFound>2</recordsFound> | ||
<recordsSearched>61323448</recordsSearched> | ||
<optionValue> | ||
<label>RecordIDs</label> | ||
<value>WOS:A1972N549400003</value> | ||
<value>WOS:A1976BW18000001</value> | ||
</optionValue> | ||
<records><records xmlns="http://scientific.thomsonreuters.com/schema/wok5.4/public/FullRecord"> | ||
<REC r_id_disclaimer="ResearcherID data provided by Clarivate Analytics"><UID>WOS:A1972N549400003</UID><static_data><summary><EWUID><WUID coll_id="WOS"></WUID><edition value="WOS.SSCI"></edition><edition value="WOS.SCI"></edition></EWUID><pub_info issue="5" pubtype="Journal" sortdate="1972-01-01" has_abstract="N" coverdate="1972" vol="33" pubyear="1972"><page end="413" page_count="1" begin="413">413-413</page></pub_info><titles count="6"><title type="source">COLLEGE &amp; RESEARCH LIBRARIES</title><title type="source_abbrev">COLL RES LIBR</title><title type="abbrev_iso">Coll. Res. Libr.</title><title type="abbrev_11">COLL RES LI</title><title type="abbrev_29">COLL RES LIBR</title><title type="item">LIBRARY MANAGEMENT - BEHAVIOR-BASED PERSONNEL SYSTEMS (BBPS) - FRAMEWORK FOR ANALYSIS - KEMPER,RE</title></titles><names count="1"><name daisng_id="19669717" seq_no="1" role="author"><display_name>WEBER, DC</display_name><full_name>WEBER, DC</full_name><wos_standard>WEBER, DC</wos_standard><first_name>DC</first_name><last_name>WEBER</last_name></name></names><doctypes count="1"><doctype>Book Review</doctype></doctypes><publishers><publisher><address_spec addr_no="1"><full_address>50 E HURON ST, CHICAGO, IL 60611</full_address><city>CHICAGO</city></address_spec><names count="1"><name addr_no="1" role="publisher" seq_no="1"><display_name>ASSOC COLL RESEARCH LIBRARIES</display_name><full_name>ASSOC COLL RESEARCH LIBRARIES</full_name></name></names></publisher></publishers></summary><fullrecord_metadata><languages count="1"><language type="primary">English</language></languages><normalized_languages count="1"><language type="primary">English</language></normalized_languages><normalized_doctypes count="1"><doctype>Review</doctype></normalized_doctypes><refs count="1"></refs><addresses count="1"><address_name><address_spec addr_no="1"><full_address>STANFORD UNIV,STANFORD,CA 94305</full_address><organizations count="2"><organization>STANFORD UNIV</organization><organization pref="Y">Stanford University</organization></organizations><city>STANFORD</city><state>CA</state><country>USA</country><zip location="AP">94305</zip></address_spec></address_name></addresses><category_info><headings count="1"><heading>Science &amp; Technology</heading></headings><subheadings count="1"><subheading>Technology</subheading></subheadings><subjects count="3"><subject ascatype="traditional" code="NU">Information Science &amp; Library Science</subject><subject ascatype="extended">Information Science &amp; Library Science</subject><subject ascatype="traditional" code="NU">INFORMATION SCIENCE &amp; LIBRARY SCIENCE</subject></subjects></category_info></fullrecord_metadata><item xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="itemType_wos"><ids avail="Y">N5494</ids><bib_id>33 (5): 413-413 1972</bib_id></item></static_data><dynamic_data><citation_related><tc_list><silo_tc coll_id="WOS" local_count="0"></silo_tc></tc_list></citation_related><cluster_related><identifiers><identifier value="0010-0870" type="issn"></identifier></identifiers></cluster_related></dynamic_data></REC> | ||
<REC r_id_disclaimer="ResearcherID data provided by Clarivate Analytics"><UID>WOS:A1976BW18000001</UID><static_data><summary><EWUID><WUID coll_id="WOS"></WUID><edition value="WOS.SSCI"></edition><edition value="WOS.SCI"></edition></EWUID><pub_info coverdate="1976" has_abstract="N" issue="3" pubtype="Journal" pubyear="1976" sortdate="1976-01-01" vol="37"><page begin="205" end="221" page_count="17">205-221</page></pub_info><titles count="6"><title type="source">COLLEGE &amp; RESEARCH LIBRARIES</title><title type="source_abbrev">COLL RES LIBR</title><title type="abbrev_iso">Coll. Res. Libr.</title><title type="abbrev_11">COLL RES LI</title><title type="abbrev_29">COLL RES LIBR</title><title type="item">CENTURY OF COOPERATIVE PROGRAMS AMONG ACADEMIC-LIBRARIES</title></titles><names count="1"><name daisng_id="19670132" seq_no="1" role="author"><display_name>WEBER, DC</display_name><full_name>WEBER, DC</full_name><wos_standard>WEBER, DC</wos_standard><first_name>DC</first_name><last_name>WEBER</last_name></name></names><doctypes count="1"><doctype>Article</doctype></doctypes><publishers><publisher><address_spec addr_no="1"><full_address>50 E HURON ST, CHICAGO, IL 60611</full_address><city>CHICAGO</city></address_spec><names count="1"><name addr_no="1" role="publisher" seq_no="1"><display_name>ASSOC COLL RESEARCH LIBRARIES</display_name><full_name>ASSOC COLL RESEARCH LIBRARIES</full_name></name></names></publisher></publishers></summary><fullrecord_metadata><languages count="1"><language type="primary">English</language></languages><normalized_languages count="1"><language type="primary">English</language></normalized_languages><normalized_doctypes count="1"><doctype>Article</doctype></normalized_doctypes><refs count="41"></refs><addresses count="1"><address_name><address_spec addr_no="1"><full_address>STANFORD UNIV LIB,STANFORD,CA 94305</full_address><organizations count="2"><organization>STANFORD UNIV LIB</organization><organization pref="Y">Stanford University</organization></organizations><city>STANFORD</city><state>CA</state><country>USA</country><zip location="AP">94305</zip></address_spec></address_name></addresses><category_info><headings count="1"><heading>Science &amp; Technology</heading></headings><subheadings count="1"><subheading>Technology</subheading></subheadings><subjects count="3"><subject ascatype="traditional" code="NU">Information Science &amp; Library Science</subject><subject ascatype="extended">Information Science &amp; Library Science</subject><subject ascatype="traditional" code="NU">INFORMATION SCIENCE &amp; LIBRARY SCIENCE</subject></subjects></category_info></fullrecord_metadata><item xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" coll_id="WOS" xsi:type="itemType_wos"><ids avail="Y">BW180</ids><bib_id>37 (3): 205-221 1976</bib_id></item></static_data><dynamic_data><citation_related><tc_list><silo_tc coll_id="WOS" local_count="12"></silo_tc></tc_list></citation_related><cluster_related><identifiers><identifier type="issn" value="0010-0870"></identifier></identifiers></cluster_related></dynamic_data></REC> | ||
</records></records> | ||
</return> | ||
</ns2:searchResponse> | ||
</soap:Body> | ||
</soap:Envelope> |
Oops, something went wrong.