From 5cfef84de1940e9d02c66ac4c4ea2f2a7ec4c80a Mon Sep 17 00:00:00 2001 From: Eric Schmidt Date: Wed, 18 Mar 2020 15:45:05 -0700 Subject: [PATCH] feat: updated library from protos (#9) --- document-ai/snippets/package.json | 4 +- document-ai/snippets/parseForm.js | 2 +- document-ai/snippets/parseTable.js | 2 +- document-ai/snippets/quickstart.js | 72 +++++++++++++++---- .../{quickstart.js => quickstart.test.js} | 8 ++- 5 files changed, 68 insertions(+), 20 deletions(-) rename document-ai/snippets/test/{quickstart.js => quickstart.test.js} (80%) diff --git a/document-ai/snippets/package.json b/document-ai/snippets/package.json index 7435dab506b..ea52d7dd9c0 100644 --- a/document-ai/snippets/package.json +++ b/document-ai/snippets/package.json @@ -13,11 +13,11 @@ "test": "mocha test/*.js --timeout 600000" }, "dependencies": { - "@google-cloud/documentai": "^0.0.1", + "@google-cloud/documentai": "^0.1.0", "@google-cloud/storage": "^4.2.0" }, "devDependencies": { "chai": "^4.2.0", "mocha": "^6.2.0" } -} \ No newline at end of file +} diff --git a/document-ai/snippets/parseForm.js b/document-ai/snippets/parseForm.js index 81591a76c60..c23cf37eae7 100644 --- a/document-ai/snippets/parseForm.js +++ b/document-ai/snippets/parseForm.js @@ -72,7 +72,7 @@ async function main( // Configure the request for batch process const requests = { - parent: `projects/${projectId}`, + parent: `projects/${projectId}/locations/us-central1`, requests: [request], }; diff --git a/document-ai/snippets/parseTable.js b/document-ai/snippets/parseTable.js index 53d4839e842..d5a72369add 100644 --- a/document-ai/snippets/parseTable.js +++ b/document-ai/snippets/parseTable.js @@ -74,7 +74,7 @@ async function main( // Configure the request for batch process const requests = { - parent: `projects/${projectId}`, + parent: `projects/${projectId}/locations/us-central1`, requests: [request], }; diff --git a/document-ai/snippets/quickstart.js b/document-ai/snippets/quickstart.js index f4b6afac5cf..a6bef04db8c 100644 --- a/document-ai/snippets/quickstart.js +++ b/document-ai/snippets/quickstart.js @@ -1,5 +1,5 @@ /** - * Copyright 2019, Google, Inc. + * Copyright 2020, Google, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -16,21 +16,65 @@ 'use strict'; /** - * DESCRIBE WHAT THIS SAMPLE DOES. - * @param {string} LIST EXPECTED ARGUMENTS. + * Process a single PDF. + * @param {string} projectId your Google Cloud project ID + * @param {string} location region to use for this operation + * @param {string} gcsInputUri Cloud Storage URI of the PDF document to parse */ -async function main() { - // [START LIBRARY_NAME_quickstart] - async function batchProcessDocument() { - const { - DocumentUnderstandingServiceClient, - } = require('@google-cloud/documentai'); - const client = new DocumentUnderstandingServiceClient(); - // TODO: write sample here that demonstrates batch processing of documents. - console.info(client); +async function main( + projectId, + location, + gcsInputUri = 'gs://cloud-samples-data/documentai/invoice.pdf' +) { + // [START document_quickstart] + /** + * TODO(developer): Uncomment these variables before running the sample. + */ + // const projectId = 'YOUR_PROJECT_ID'; + // const location = 'YOUR_PROJECT_LOCATION'; + // const gcsInputUri = 'YOUR_SOURCE_PDF'; + + const { + DocumentUnderstandingServiceClient, + } = require('@google-cloud/documentai'); + const client = new DocumentUnderstandingServiceClient(); + + async function quickstart() { + // Configure the request for processing the PDF + const parent = `projects/${projectId}/locations/${location}`; + const request = { + parent, + inputConfig: { + gcsSource: { + uri: gcsInputUri, + }, + mimeType: 'application/pdf', + }, + }; + + // Recognizes text entities in the PDF document + const [result] = await client.processDocument(request); + + // Get all of the document text as one big string + const {text} = result; + + // Extract shards from the text field + function extractText(textAnchor) { + // First shard in document doesn't have startIndex property + const startIndex = textAnchor.textSegments[0].startIndex || 0; + const endIndex = textAnchor.textSegments[0].endIndex; + + return text.substring(startIndex, endIndex); + } + + for (const entity of result.entities) { + console.log(`\nEntity text: ${extractText(entity.textAnchor)}`); + console.log(`Entity type: ${entity.type}`); + console.log(`Entity mention text: ${entity.mentionText}`); + } } - // [END LIBRARY_NAME_quickstart] - await batchProcessDocument(); + // [END document_quickstart] + await quickstart(); } main(...process.argv.slice(2)).catch(err => { diff --git a/document-ai/snippets/test/quickstart.js b/document-ai/snippets/test/quickstart.test.js similarity index 80% rename from document-ai/snippets/test/quickstart.js rename to document-ai/snippets/test/quickstart.test.js index 0f0bd0d25c7..9a32e02e777 100644 --- a/document-ai/snippets/test/quickstart.js +++ b/document-ai/snippets/test/quickstart.test.js @@ -22,10 +22,14 @@ const cp = require('child_process'); const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); const cwd = path.join(__dirname, '..'); +const projectId = process.env.GCLOUD_PROJECT; +const LOCATION = 'us-central1'; describe('Quickstart', () => { it('should run quickstart', async () => { - const stdout = execSync(`node ./quickstart.js`, {cwd}); - assert.ok(stdout); + const stdout = execSync(`node ./quickstart.js ${projectId} ${LOCATION}`, { + cwd, + }); + assert.match(stdout, /Entity/); }); });