From 18e4057e8b794136c908808086aa44f566f5feb5 Mon Sep 17 00:00:00 2001 From: Jason Dobry Date: Thu, 17 Nov 2016 15:21:35 -0800 Subject: [PATCH 1/3] Add Cloud Client NL API samples. --- language/cloud-client/README.rst.in | 5 +- language/cloud-client/requirements.txt | 2 +- language/cloud-client/resources/text.txt | 1 + language/cloud-client/snippets.py | 172 +++++++++++++++++++++++ language/cloud-client/snippets_test.py | 74 ++++++++++ 5 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 language/cloud-client/resources/text.txt create mode 100644 language/cloud-client/snippets.py create mode 100644 language/cloud-client/snippets_test.py diff --git a/language/cloud-client/README.rst.in b/language/cloud-client/README.rst.in index 78da29111a06..faf402bfe9c0 100644 --- a/language/cloud-client/README.rst.in +++ b/language/cloud-client/README.rst.in @@ -4,7 +4,7 @@ product: name: Google Cloud Natural Language API short_name: Cloud Natural Language API url: https://cloud.google.com/natural-language/docs/ - description: > + description: > The `Google Cloud Natural Language API`_ provides natural language understanding technologies to developers, including sentiment analysis, entity recognition, and syntax analysis. This API is part of the larger @@ -17,5 +17,8 @@ setup: samples: - name: Quickstart file: quickstart.py +- name: Snippets + file: snippets.py + show_help: true cloud_client_library: true diff --git a/language/cloud-client/requirements.txt b/language/cloud-client/requirements.txt index ce34e7df6b81..130d1cc79a4f 100644 --- a/language/cloud-client/requirements.txt +++ b/language/cloud-client/requirements.txt @@ -1 +1 @@ -google-cloud-language==0.21.0 +google-cloud-language==0.22.0 diff --git a/language/cloud-client/resources/text.txt b/language/cloud-client/resources/text.txt new file mode 100644 index 000000000000..97a1cea02b7a --- /dev/null +++ b/language/cloud-client/resources/text.txt @@ -0,0 +1 @@ +President Obama is speaking at the White House. \ No newline at end of file diff --git a/language/cloud-client/snippets.py b/language/cloud-client/snippets.py new file mode 100644 index 000000000000..c0f5f8a37226 --- /dev/null +++ b/language/cloud-client/snippets.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python + +# Copyright 2016 Google, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This application demonstrates how to perform basic operations with the +Google Cloud Natural Language API + +For more information, the documentation at +https://cloud.google.com/natural-language/docs. +""" + +import argparse + +from google.cloud import language + + +def sentiment_text(text): + """Detects sentiment in the text.""" + language_client = language.Client() + + # Instantiates a plain text document. + document = language_client.document_from_text(text) + + # Detects sentiment in the document. You can also analyze HTML with: + # document.doc_type == language.Document.HTML + sentiment = document.analyze_sentiment() + + print('Score: {}'.format(sentiment.score)) + print('Magnitude: {}'.format(sentiment.magnitude)) + + +def sentiment_file(gcs_uri): + """Detects sentiment in the file located in Google Cloud Storage.""" + language_client = language.Client() + + # Instantiates a plain text document. + document = language_client.document_from_url(gcs_uri) + + # Detects sentiment in the document. You can also analyze HTML with: + # document.doc_type == language.Document.HTML + sentiment = document.analyze_sentiment() + + print('Score: {}'.format(sentiment.score)) + print('Magnitude: {}'.format(sentiment.magnitude)) + + +def entities_text(text): + """Detects entities in the text.""" + language_client = language.Client() + + # Instantiates a plain text document. + document = language_client.document_from_text(text) + + # Detects entities in the document. You can also analyze HTML with: + # document.doc_type == language.Document.HTML + entities = document.analyze_entities() + + for entity in entities: + print('=' * 20) + print('{:<16}: {}'.format('name', entity.name)) + print('{:<16}: {}'.format('type', entity.entity_type)) + print('{:<16}: {}'.format('wikipedia_url', entity.wikipedia_url)) + print('{:<16}: {}'.format('metadata', entity.metadata)) + print('{:<16}: {}'.format('salience', entity.salience)) + + +def entities_file(gcs_uri): + """Detects entities in the file located in Google Cloud Storage.""" + language_client = language.Client() + + # Instantiates a plain text document. + document = language_client.document_from_url(gcs_uri) + + # Detects sentiment in the document. You can also analyze HTML with: + # document.doc_type == language.Document.HTML + entities = document.analyze_entities() + + for entity in entities: + print('=' * 20) + print('{:<16}: {}'.format('name', entity.name)) + print('{:<16}: {}'.format('type', entity.entity_type)) + print('{:<16}: {}'.format('wikipedia_url', entity.wikipedia_url)) + print('{:<16}: {}'.format('metadata', entity.metadata)) + print('{:<16}: {}'.format('salience', entity.salience)) + + +def syntax_text(text): + """Detects syntax in the text.""" + language_client = language.Client() + + # Instantiates a plain text document. + document = language_client.document_from_text(text) + + # Detects syntax in the document. You can also analyze HTML with: + # document.doc_type == language.Document.HTML + tokens = document.analyze_syntax() + + for token in tokens: + print('{}: {}'.format(token.part_of_speech, token.text_content)) + + +def syntax_file(gcs_uri): + """Detects syntax in the file located in Google Cloud Storage.""" + language_client = language.Client() + + # Instantiates a plain text document. + document = language_client.document_from_url(gcs_uri) + + # Detects syntax in the document. You can also analyze HTML with: + # document.doc_type == language.Document.HTML + tokens = document.analyze_syntax() + + for token in tokens: + print('{}: {}'.format(token.part_of_speech, token.text_content)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparsers = parser.add_subparsers(dest='command') + + sentiment_text_parser = subparsers.add_parser( + 'sentiment-text', help=sentiment_text.__doc__) + sentiment_text_parser.add_argument('text') + + sentiment_file_parser = subparsers.add_parser( + 'sentiment-file', help=sentiment_file.__doc__) + sentiment_file_parser.add_argument('gcs_uri') + + entities_text_parser = subparsers.add_parser( + 'entities-text', help=entities_text.__doc__) + entities_text_parser.add_argument('text') + + entities_file_parser = subparsers.add_parser( + 'entities-file', help=entities_file.__doc__) + entities_file_parser.add_argument('gcs_uri') + + syntax_text_parser = subparsers.add_parser( + 'syntax-text', help=syntax_text.__doc__) + syntax_text_parser.add_argument('text') + + syntax_file_parser = subparsers.add_parser( + 'syntax-file', help=syntax_file.__doc__) + syntax_file_parser.add_argument('gcs_uri') + + args = parser.parse_args() + + if args.command == 'sentiment-text': + sentiment_text(args.text) + elif args.command == 'sentiment-file': + sentiment_file(args.gcs_uri) + elif args.command == 'entities-text': + entities_text(args.text) + elif args.command == 'entities-file': + entities_file(args.gcs_uri) + elif args.command == 'syntax-text': + syntax_text(args.text) + elif args.command == 'syntax-file': + syntax_file(args.gcs_uri) diff --git a/language/cloud-client/snippets_test.py b/language/cloud-client/snippets_test.py new file mode 100644 index 000000000000..e2607816cd4e --- /dev/null +++ b/language/cloud-client/snippets_test.py @@ -0,0 +1,74 @@ +# Copyright 2016 Google, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import snippets + + +def test_sentiment_text(cloud_config, capsys): + snippets.sentiment_text('President Obama is speaking at the White House.') + out, _ = capsys.readouterr() + assert 'Score: 1' in out + + +def test_sentiment_file(cloud_config, capsys): + cloud_storage_input_uri = 'gs://{}/text.txt'.format( + cloud_config.storage_bucket) + snippets.sentiment_file(cloud_storage_input_uri) + out, _ = capsys.readouterr() + assert 'Score: 0.2' in out + + +def test_entities_text(cloud_config, capsys): + snippets.entities_text('President Obama is speaking at the White House.') + out, _ = capsys.readouterr() + assert 'name: Obama' in out + + +def test_entities_file(cloud_config, capsys): + cloud_storage_input_uri = 'gs://{}/text.txt'.format( + cloud_config.storage_bucket) + snippets.entities_file(cloud_storage_input_uri) + out, _ = capsys.readouterr() + assert 'name: Obama' in out + + +def test_syntax_text(cloud_config, capsys): + snippets.syntax_text('President Obama is speaking at the White House.') + out, _ = capsys.readouterr() + assert 'NOUN: President + 'NOUN: Obama' + 'VERB: is' + 'VERB: speaking' + 'ADP: at' + 'DET: the' + 'NOUN: White' + 'NOUN: House' + 'PUNCT: .' in out + + +def test_syntax_file(cloud_config, capsys): + cloud_storage_input_uri = 'gs://{}/text.txt'.format( + cloud_config.storage_bucket) + snippets.syntax_file(cloud_storage_input_uri) + out, _ = capsys.readouterr() + assert 'NOUN: President + 'NOUN: Obama' + 'VERB: is' + 'VERB: speaking' + 'ADP: at' + 'DET: the' + 'NOUN: White' + 'NOUN: House' + 'PUNCT: .' in out From a06ce1beb30737eb0111e30e7ede5d73a0b04494 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 6 Dec 2016 12:28:10 -0800 Subject: [PATCH 2/3] Fix lint Change-Id: I1771171bafb5c8f808133f5d910175d7e69d2fbc --- language/cloud-client/snippets_test.py | 38 ++++++++++++++------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/language/cloud-client/snippets_test.py b/language/cloud-client/snippets_test.py index e2607816cd4e..655bd7dbc5be 100644 --- a/language/cloud-client/snippets_test.py +++ b/language/cloud-client/snippets_test.py @@ -47,15 +47,16 @@ def test_entities_file(cloud_config, capsys): def test_syntax_text(cloud_config, capsys): snippets.syntax_text('President Obama is speaking at the White House.') out, _ = capsys.readouterr() - assert 'NOUN: President - 'NOUN: Obama' - 'VERB: is' - 'VERB: speaking' - 'ADP: at' - 'DET: the' - 'NOUN: White' - 'NOUN: House' - 'PUNCT: .' in out + assert ( + 'NOUN: President' + 'NOUN: Obama' + 'VERB: is' + 'VERB: speaking' + 'ADP: at' + 'DET: the' + 'NOUN: White' + 'NOUN: House' + 'PUNCT: .') in out def test_syntax_file(cloud_config, capsys): @@ -63,12 +64,13 @@ def test_syntax_file(cloud_config, capsys): cloud_config.storage_bucket) snippets.syntax_file(cloud_storage_input_uri) out, _ = capsys.readouterr() - assert 'NOUN: President - 'NOUN: Obama' - 'VERB: is' - 'VERB: speaking' - 'ADP: at' - 'DET: the' - 'NOUN: White' - 'NOUN: House' - 'PUNCT: .' in out + assert ( + 'NOUN: President' + 'NOUN: Obama' + 'VERB: is' + 'VERB: speaking' + 'ADP: at' + 'DET: the' + 'NOUN: White' + 'NOUN: House' + 'PUNCT: .') in out From 560495dc4cd20b121893deb7f2717ef2128fb178 Mon Sep 17 00:00:00 2001 From: Jason Dobry Date: Tue, 6 Dec 2016 13:03:37 -0800 Subject: [PATCH 3/3] Fix tests. --- language/cloud-client/quickstart.py | 2 +- language/cloud-client/snippets_test.py | 30 ++++++-------------------- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/language/cloud-client/quickstart.py b/language/cloud-client/quickstart.py index 24f2ff4dea1f..3b42ac65ab67 100644 --- a/language/cloud-client/quickstart.py +++ b/language/cloud-client/quickstart.py @@ -31,7 +31,7 @@ def run_quickstart(): sentiment = document.analyze_sentiment() print('Text: {}'.format(text)) - print('Sentiment: {}, {}'.format(sentiment.polarity, sentiment.magnitude)) + print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude)) # [END language_quickstart] diff --git a/language/cloud-client/snippets_test.py b/language/cloud-client/snippets_test.py index 655bd7dbc5be..47050e44e23c 100644 --- a/language/cloud-client/snippets_test.py +++ b/language/cloud-client/snippets_test.py @@ -19,7 +19,7 @@ def test_sentiment_text(cloud_config, capsys): snippets.sentiment_text('President Obama is speaking at the White House.') out, _ = capsys.readouterr() - assert 'Score: 1' in out + assert 'Score: 0.2' in out def test_sentiment_file(cloud_config, capsys): @@ -33,7 +33,8 @@ def test_sentiment_file(cloud_config, capsys): def test_entities_text(cloud_config, capsys): snippets.entities_text('President Obama is speaking at the White House.') out, _ = capsys.readouterr() - assert 'name: Obama' in out + assert 'name' in out + assert ': Obama' in out def test_entities_file(cloud_config, capsys): @@ -41,22 +42,14 @@ def test_entities_file(cloud_config, capsys): cloud_config.storage_bucket) snippets.entities_file(cloud_storage_input_uri) out, _ = capsys.readouterr() - assert 'name: Obama' in out + assert 'name' in out + assert ': Obama' in out def test_syntax_text(cloud_config, capsys): snippets.syntax_text('President Obama is speaking at the White House.') out, _ = capsys.readouterr() - assert ( - 'NOUN: President' - 'NOUN: Obama' - 'VERB: is' - 'VERB: speaking' - 'ADP: at' - 'DET: the' - 'NOUN: White' - 'NOUN: House' - 'PUNCT: .') in out + assert 'NOUN: President' in out def test_syntax_file(cloud_config, capsys): @@ -64,13 +57,4 @@ def test_syntax_file(cloud_config, capsys): cloud_config.storage_bucket) snippets.syntax_file(cloud_storage_input_uri) out, _ = capsys.readouterr() - assert ( - 'NOUN: President' - 'NOUN: Obama' - 'VERB: is' - 'VERB: speaking' - 'ADP: at' - 'DET: the' - 'NOUN: White' - 'NOUN: House' - 'PUNCT: .') in out + assert 'NOUN: President' in out