diff --git a/protos/chunks.proto b/protos/chunks.proto index fead00f..f9aba58 100644 --- a/protos/chunks.proto +++ b/protos/chunks.proto @@ -1,9 +1,10 @@ syntax = "proto3"; -package redactive.grpc.v1; +package redactive.grpc.v2; import "google/protobuf/timestamp.proto"; + message ChunkMetadata { // Chunk content's creation timestamp optional google.protobuf.Timestamp created_at = 1; @@ -13,17 +14,18 @@ message ChunkMetadata { } message SourceReference { - // Source system of the document e.g. confluence, slack, google-drive + // Source system of the document e.g. confluence, sharepoint string system = 1; // Version of the source system e.g. 1.0.0 string system_version = 2; - // Connection id to the source system e.g. confluence space id, slack channel id, google-drive drive id + // Connection id to the source system e.g. confluence space id, sharepoint drive id string connection_id = 3; - // Document id in the source system e.g. confluence page id, slack message id, google-drive document id + // Document id in the source system e.g. confluence page id, sharepoint file id string document_id = 4; - // Document version in the source system e.g. confluence page version, slack message version, google-drive document version + // Document version in the source system e.g. confluence page version, sharepoint file hash string document_version = 5; - // Document path in the source system e.g. "My Drive/document.txt", "slack-channel-name" + // Document path in the source system e.g. "redactiveai.atlassian.net/Engineering/Onboarding Guide" + // or "redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf" optional string document_path = 6; // Document name in the source system e.g. "document.txt" optional string document_name = 7; diff --git a/protos/search.proto b/protos/search.proto index 92a853a..f980734 100644 --- a/protos/search.proto +++ b/protos/search.proto @@ -1,29 +1,23 @@ syntax = "proto3"; -package redactive.grpc.v1; +package redactive.grpc.v2; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; - import "chunks.proto"; service Search { // Query the index for relevant chunks - rpc QueryChunks(QueryRequest) returns (QueryResponse); + rpc SearchChunks(SearchChunksRequest) returns (SearchChunksResponse); // Query the index for all chunks of a specific document - rpc QueryChunksByDocumentName(QueryByDocumentNameRequest) returns (QueryByDocumentNameResponse); - // Get chunks by URL - rpc GetChunksByUrl(GetChunksByUrlRequest) returns (GetChunksByUrlResponse); + rpc GetDocument(GetDocumentRequest) returns (GetDocumentResponse); } message Query { - // Semantic query to execute - string semantic_query = 1; -} - -message DocumentNameQuery { - // Document name to search for - string document_name = 1; + // Search query for semantic content + optional string semantic_query = 1; + // Specific keywords to search for in source document + optional string keyword_query = 2; } message TimeSpan { @@ -32,7 +26,10 @@ message TimeSpan { } message Filters { - // Scope e.g. "confluence", "slack://channel-name", "google-drive://CompanyDrive/document.docx" + // Scope of the query. This may either be the name of a fetcher, or a subspace of documents. + // Subspaces take the form of :/// + // e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding Guide' + // for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf' repeated string scope = 1; // Timespan of response chunk's creation optional TimeSpan created = 2; @@ -44,7 +41,7 @@ message Filters { optional bool include_content_in_trash = 5; } -message QueryRequest { +message SearchChunksRequest { // How many results to try to return (maximum number of results) optional uint32 count = 1; // The query to execute @@ -53,7 +50,14 @@ message QueryRequest { optional Filters filters = 3; } -message QueryResponse { +message GetDocumentRequest { + // A reference to the document to retrieve + string ref = 1; + // Query filters (only really for GetDocByTitle) + optional Filters filters = 2; +} + +message SearchChunksResponse { // Query was successful bool success = 1; // Error message if query failed @@ -62,28 +66,7 @@ message QueryResponse { repeated RelevantChunk relevant_chunks = 3; } -message GetChunksByUrlRequest { - // URL to document - string url = 1; -} - -message GetChunksByUrlResponse { - // Fetch was successful - bool success = 1; - // Error message if fetch failed - optional google.protobuf.Struct error = 2; - // List of chunks - repeated Chunk chunks = 3; -} - -message QueryByDocumentNameRequest { - // The query to execute - DocumentNameQuery query = 2; - // Filters to apply to query - optional Filters filters = 3; -} - -message QueryByDocumentNameResponse { +message GetDocumentResponse { // Query was successful bool success = 1; // Error message if query failed diff --git a/sdks/node/README.md b/sdks/node/README.md index 4cec228..b1ed8c0 100644 --- a/sdks/node/README.md +++ b/sdks/node/README.md @@ -7,7 +7,7 @@ The Redactive Node SDK provides a robust and intuitive interface for interacting In order to use the package to integrate with Redactive.ai, run: ```sh -npm install redactive +npm install @redactive/redactive ``` There is no need to clone this repository. @@ -35,28 +35,51 @@ The library has following components. AuthClient needs to be configured with your account's API key which is available in the Apps page at [Redactive Dashboard](https://dashboard.redactive.ai/). +The AuthClient can be used to present users with the data providers' OAuth consent +pages: + ```javascript import { AuthClient } from "@redactive/redactive"; +// Construct AuthClient using your Redactive API key +const client = new AuthClient("YOUR-API-KEY-HERE"); + // Establish an connection to data source -// Possible data sources: confluence, google-drive, jira, zendesk, slack, sharepoint -const redirectUri = "https://url-debugger.vercel.app"; +// Possible data sources: confluence, sharepoint +const redirectUri = "YOUR-REDIRECT-URI"; const provider = "confluence"; const signInUrl = await client.beginConnection({ provider, redirectUri }); -// Navigate User to signInUrl -// User will receive an oauth2 auth code after consenting the app's data source access permissions. -// Use this code to exchange Redactive access_token with Redactive API -const response = await client.exchangeTokens("OAUTH2-AUTH-CODE"); +// Now redirect your user to signInUrl +``` + +The user will be redirected back to your app's configured redirect uri after they have completed the steps on +the data provider's OAuth consent page. There will be a signin code present in the `code` parameter of the query string e.g. +`https://your-redirect-page.com?code=abcde12345`. + +This code may be exchanged for a user access token (which the user may use to issue queries against their data): + +```javascript +// Exchange signin code for a Redactive ID token +const response = await client.exchangeTokens({ code: "SIGNIN-CODE" }); +const accessToken = response.idToken; +``` + +Once a user has completed the OAuth flow, the data source should show up in their connected data sources: + +```javascript +(await client.listConnections({ accessToken }).connections) === ["confluence"]; // ✅ ``` +Use the `list_connections` method to keep your user's connection status up to date, and provide mechanisms to re-connect data sources. + ### SearchClient -With the Redactive access_token, you can perform three types of searches using the Redactive Search service: +With the Redactive `access_token`, you can perform two types of search -1. **Semantic Query Search**: Retrieve relevant chunks of information that are semantically related to a user query. -2. **URL-based Search**: Obtain all the chunks from a document by specifying its URL. -3. **Document Name Search**: Query for all the chunks from a document based on the name of the document. +#### Query-based Search + +Retrieve relevant chunks of information that are related to a user query. ```javascript import { SearchClient } from "@redactive/redactive"; @@ -64,25 +87,39 @@ import { SearchClient } from "@redactive/redactive"; const client = new SearchClient(); const accessToken = "REDACTIVE-ACCESS-TOKEN"; -// Semantic Search: retrieve text extracts (chunks) from various documents pertaining to the user query -const semanticQuery = "Tell me about AI"; -await client.queryChunks({ accessToken, semanticQuery }); - -// URL-based Search: retrieve all chunks of the document at that URL -const url = "https://example.com/document"; -await client.getChunksByUrl({ accessToken, url }); +// Query-based Search: retrieve text extracts (chunks) from various documents pertaining to the user query +const query = "Tell me about AI"; +await client.searchChunks({ accessToken, query }); +``` -// Document Name Search : retrieve all chunks of a document identified by its name -const documentName = "AI Research Paper"; -await client.queryChunksByDocumentName({ accessToken, documentName }); +**Filters** may be applied to query-based search operations. At present, the following fields may be provided as filter predicates: + +```protobuf +message Filters { + // Scope of the query. This may either be the name of a provider, or a subspace of documents. + // Subspaces take the form of :/// + // e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding Guide' + // for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf' + repeated string scope = 1; + // Timespan of response chunk's creation + optional TimeSpan created = 2; + // Timespan of response chunk's last modification + optional TimeSpan modified = 3; + // List of user emails associated with response chunk + repeated string user_emails = 4; + // Include content from documents in trash + optional bool include_content_in_trash = 5; +} ``` -### Filters +The query will only return results which match _ALL_ filter predicates i.e. if multiple fields are populated in the filter object, +the resulting filter is the logical 'AND' of all the fields. If a data source provider does not support a filter-type, then no +results from that provider are returned. -Query methods, i.e. `queryChunks`, `queryChunksByDocumentName`, support a set of optional filters. The filters are applied in a logical 'AND' operation. If a data source provider does not support a filter-type, then no results from that provider are returned. +Filters may be populated and provided to a query in the following way for the NodeJS SDK: -```typescript -import { Filters } from "@redactive/redactive/grpc/search"; +```javascript +import { Filters } from "@redactive/redactive"; // Query chunks from Confluence only, that are from documents created before last week, modified since last week, // and that are from documents associated with a user's email. Include chunks from trashed documents. @@ -98,7 +135,23 @@ const filters: Filters = { userEmails: ["myEmail@example.com"], includeContentInTrash: true }; -await client.queryChunks({ accessToken, semanticQuery, filters }); +await client.searchChunks({ accessToken, semanticQuery, filters }); + +``` + +#### Document Fetch + +Obtain all the chunks from a specific document by specifying a unique reference (i.e. a URL). + +```javascript +import { SearchClient } from "@redactive/redactive"; + +const client = new SearchClient(); +const accessToken = "REDACTIVE-ACCESS-TOKEN"; + +// URL-based Search: retrieve all chunks of the document at that URL +const url = "https://example.com/document"; +await client.getDocument({ accessToken, url }); ``` ### Multi-User Client @@ -124,8 +177,8 @@ let [signInCode, state] = ["", ""]; // from URL query parameters const isConnectionSuccessful = await multiUserClient.handleConnectionCallback(userId, signInCode, state); // User can now use Redactive search service via `MultiUserClient`'s other methods: -const semanticQuery = "Tell me about the missing research vessel, the Borealis"; -const chunks = await multiUserClient.queryChunks({ userId, semanticQuery }); +const query = "Tell me about the missing research vessel, the Borealis"; +const chunks = await multiUserClient.searchChunks({ userId, query }); ``` ## Development diff --git a/sdks/node/src/grpc/chunks.ts b/sdks/node/src/grpc/chunks.ts index bb0dce5..71f1153 100644 --- a/sdks/node/src/grpc/chunks.ts +++ b/sdks/node/src/grpc/chunks.ts @@ -1,7 +1,7 @@ // Code generated by protoc-gen-ts_proto. DO NOT EDIT. // versions: -// protoc-gen-ts_proto v2.2.0 -// protoc v3.21.12 +// protoc-gen-ts_proto v2.2.7 +// protoc v5.28.3 // source: chunks.proto /* eslint-disable */ @@ -9,7 +9,7 @@ import { BinaryReader, BinaryWriter } from "@bufbuild/protobuf/wire"; import { Timestamp } from "./google/protobuf/timestamp"; -export const protobufPackage = "redactive.grpc.v1"; +export const protobufPackage = "redactive.grpc.v2"; export interface ChunkMetadata { /** Chunk content's creation timestamp */ @@ -20,17 +20,20 @@ export interface ChunkMetadata { } export interface SourceReference { - /** Source system of the document e.g. confluence, slack, google-drive */ + /** Source system of the document e.g. confluence, sharepoint */ system: string; /** Version of the source system e.g. 1.0.0 */ systemVersion: string; - /** Connection id to the source system e.g. confluence space id, slack channel id, google-drive drive id */ + /** Connection id to the source system e.g. confluence space id, sharepoint drive id */ connectionId: string; - /** Document id in the source system e.g. confluence page id, slack message id, google-drive document id */ + /** Document id in the source system e.g. confluence page id, sharepoint file id */ documentId: string; - /** Document version in the source system e.g. confluence page version, slack message version, google-drive document version */ + /** Document version in the source system e.g. confluence page version, sharepoint file hash */ documentVersion: string; - /** Document path in the source system e.g. "My Drive/document.txt", "slack-channel-name" */ + /** + * Document path in the source system e.g. "redactiveai.atlassian.net/Engineering/Onboarding Guide" + * or "redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf" + */ documentPath?: string | undefined; /** Document name in the source system e.g. "document.txt" */ documentName?: string | undefined; @@ -101,27 +104,30 @@ export const ChunkMetadata: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.createdAt = fromTimestamp(Timestamp.decode(reader, reader.uint32())); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.modifiedAt = fromTimestamp(Timestamp.decode(reader, reader.uint32())); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.link = reader.string(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -210,55 +216,62 @@ export const SourceReference: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.system = reader.string(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.systemVersion = reader.string(); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.connectionId = reader.string(); continue; - case 4: + } + case 4: { if (tag !== 34) { break; } message.documentId = reader.string(); continue; - case 5: + } + case 5: { if (tag !== 42) { break; } message.documentVersion = reader.string(); continue; - case 6: + } + case 6: { if (tag !== 50) { break; } message.documentPath = reader.string(); continue; - case 7: + } + case 7: { if (tag !== 58) { break; } message.documentName = reader.string(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -347,27 +360,30 @@ export const ChunkReference: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.chunkingVersion = reader.string(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.chunkId = reader.string(); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.chunkHash = reader.string(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -442,41 +458,46 @@ export const RelevantChunk: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.source = SourceReference.decode(reader, reader.uint32()); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.chunk = ChunkReference.decode(reader, reader.uint32()); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.relevance = RelevantChunk_Relevance.decode(reader, reader.uint32()); continue; - case 4: + } + case 4: { if (tag !== 34) { break; } message.chunkBody = reader.string(); continue; - case 5: + } + case 5: { if (tag !== 42) { break; } message.documentMetadata = ChunkMetadata.decode(reader, reader.uint32()); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -557,13 +578,14 @@ export const RelevantChunk_Relevance: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 13) { break; } message.similarityScore = reader.float(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -623,34 +645,38 @@ export const Chunk: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.source = SourceReference.decode(reader, reader.uint32()); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.chunk = ChunkReference.decode(reader, reader.uint32()); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.chunkBody = reader.string(); continue; - case 4: + } + case 4: { if (tag !== 34) { break; } message.documentMetadata = ChunkMetadata.decode(reader, reader.uint32()); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; diff --git a/sdks/node/src/grpc/google/protobuf/struct.ts b/sdks/node/src/grpc/google/protobuf/struct.ts index 426d4c5..509fe72 100644 --- a/sdks/node/src/grpc/google/protobuf/struct.ts +++ b/sdks/node/src/grpc/google/protobuf/struct.ts @@ -1,7 +1,7 @@ // Code generated by protoc-gen-ts_proto. DO NOT EDIT. // versions: -// protoc-gen-ts_proto v2.2.0 -// protoc v3.21.12 +// protoc-gen-ts_proto v2.2.7 +// protoc v5.28.3 // source: google/protobuf/struct.proto /* eslint-disable */ @@ -13,7 +13,7 @@ export const protobufPackage = "google.protobuf"; * `NullValue` is a singleton enumeration to represent the null value for the * `Value` type union. * - * The JSON representation for `NullValue` is JSON `null`. + * The JSON representation for `NullValue` is JSON `null`. */ export enum NullValue { /** NULL_VALUE - Null value. */ @@ -117,7 +117,7 @@ export const Struct: MessageFns & StructWrapperFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } @@ -127,6 +127,7 @@ export const Struct: MessageFns & StructWrapperFns = { message.fields[entry1.key] = entry1.value; } continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -222,20 +223,22 @@ export const Struct_FieldsEntry: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.key = reader.string(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.value = Value.unwrap(Value.decode(reader, reader.uint32())); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -315,48 +318,54 @@ export const Value: MessageFns & AnyValueWrapperFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 8) { break; } message.nullValue = reader.int32() as any; continue; - case 2: + } + case 2: { if (tag !== 17) { break; } message.numberValue = reader.double(); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.stringValue = reader.string(); continue; - case 4: + } + case 4: { if (tag !== 32) { break; } message.boolValue = reader.bool(); continue; - case 5: + } + case 5: { if (tag !== 42) { break; } message.structValue = Struct.unwrap(Struct.decode(reader, reader.uint32())); continue; - case 6: + } + case 6: { if (tag !== 50) { break; } message.listValue = ListValue.unwrap(ListValue.decode(reader, reader.uint32())); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -471,13 +480,14 @@ export const ListValue: MessageFns & ListValueWrapperFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.values.push(Value.unwrap(Value.decode(reader, reader.uint32()))); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; diff --git a/sdks/node/src/grpc/google/protobuf/timestamp.ts b/sdks/node/src/grpc/google/protobuf/timestamp.ts index cab0fd8..69605ad 100644 --- a/sdks/node/src/grpc/google/protobuf/timestamp.ts +++ b/sdks/node/src/grpc/google/protobuf/timestamp.ts @@ -1,7 +1,7 @@ // Code generated by protoc-gen-ts_proto. DO NOT EDIT. // versions: -// protoc-gen-ts_proto v2.2.0 -// protoc v3.21.12 +// protoc-gen-ts_proto v2.2.7 +// protoc v5.28.3 // source: google/protobuf/timestamp.proto /* eslint-disable */ @@ -97,7 +97,7 @@ export const protobufPackage = "google.protobuf"; * [`strftime`](https://docs.python.org/2/library/time.html#time.strftime) with * the time format spec '%Y-%m-%dT%H:%M:%S.%fZ'. Likewise, in Java, one can use * the Joda Time's [`ISODateTimeFormat.dateTime()`]( - * http://www.joda.org/joda-time/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateTime%2D%2D + * http://joda-time.sourceforge.net/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateTime() * ) to obtain a formatter capable of generating timestamps in this format. */ export interface Timestamp { @@ -138,20 +138,22 @@ export const Timestamp: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 8) { break; } message.seconds = longToNumber(reader.int64()); continue; - case 2: + } + case 2: { if (tag !== 16) { break; } message.nanos = reader.int32(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; diff --git a/sdks/node/src/grpc/search.ts b/sdks/node/src/grpc/search.ts index 26617b9..b59b53b 100644 --- a/sdks/node/src/grpc/search.ts +++ b/sdks/node/src/grpc/search.ts @@ -1,7 +1,7 @@ // Code generated by protoc-gen-ts_proto. DO NOT EDIT. // versions: -// protoc-gen-ts_proto v2.2.0 -// protoc v3.21.12 +// protoc-gen-ts_proto v2.2.7 +// protoc v5.28.3 // source: search.proto /* eslint-disable */ @@ -23,16 +23,13 @@ import { Chunk, RelevantChunk } from "./chunks"; import { Struct } from "./google/protobuf/struct"; import { Timestamp } from "./google/protobuf/timestamp"; -export const protobufPackage = "redactive.grpc.v1"; +export const protobufPackage = "redactive.grpc.v2"; export interface Query { - /** Semantic query to execute */ - semanticQuery: string; -} - -export interface DocumentNameQuery { - /** Document name to search for */ - documentName: string; + /** Search query for semantic content */ + semanticQuery?: string | undefined; + /** Specific keywords to search for in source document */ + keywordQuery?: string | undefined; } export interface TimeSpan { @@ -41,7 +38,12 @@ export interface TimeSpan { } export interface Filters { - /** Scope e.g. "confluence", "slack://channel-name", "google-drive://CompanyDrive/document.docx" */ + /** + * Scope of the query. This may either be the name of a fetcher, or a subspace of documents. + * Subspaces take the form of :/// + * e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding Guide' + * for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf' + */ scope: string[]; /** Timespan of response chunk's creation */ created?: TimeSpan | undefined; @@ -53,7 +55,7 @@ export interface Filters { includeContentInTrash?: boolean | undefined; } -export interface QueryRequest { +export interface SearchChunksRequest { /** How many results to try to return (maximum number of results) */ count?: number | undefined; /** The query to execute */ @@ -62,7 +64,14 @@ export interface QueryRequest { filters?: Filters | undefined; } -export interface QueryResponse { +export interface GetDocumentRequest { + /** A reference to the document to retrieve */ + ref: string; + /** Query filters (only really for GetDocByTitle) */ + filters?: Filters | undefined; +} + +export interface SearchChunksResponse { /** Query was successful */ success: boolean; /** Error message if query failed */ @@ -71,28 +80,7 @@ export interface QueryResponse { relevantChunks: RelevantChunk[]; } -export interface GetChunksByUrlRequest { - /** URL to document */ - url: string; -} - -export interface GetChunksByUrlResponse { - /** Fetch was successful */ - success: boolean; - /** Error message if fetch failed */ - error?: { [key: string]: any } | undefined; - /** List of chunks */ - chunks: Chunk[]; -} - -export interface QueryByDocumentNameRequest { - /** The query to execute */ - query: DocumentNameQuery | undefined; - /** Filters to apply to query */ - filters?: Filters | undefined; -} - -export interface QueryByDocumentNameResponse { +export interface GetDocumentResponse { /** Query was successful */ success: boolean; /** Error message if query failed */ @@ -102,14 +90,17 @@ export interface QueryByDocumentNameResponse { } function createBaseQuery(): Query { - return { semanticQuery: "" }; + return { semanticQuery: undefined, keywordQuery: undefined }; } export const Query: MessageFns = { encode(message: Query, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { - if (message.semanticQuery !== "") { + if (message.semanticQuery !== undefined) { writer.uint32(10).string(message.semanticQuery); } + if (message.keywordQuery !== undefined) { + writer.uint32(18).string(message.keywordQuery); + } return writer; }, @@ -120,13 +111,22 @@ export const Query: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.semanticQuery = reader.string(); continue; + } + case 2: { + if (tag !== 18) { + break; + } + + message.keywordQuery = reader.string(); + continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -137,14 +137,20 @@ export const Query: MessageFns = { }, fromJSON(object: any): Query { - return { semanticQuery: isSet(object.semanticQuery) ? globalThis.String(object.semanticQuery) : "" }; + return { + semanticQuery: isSet(object.semanticQuery) ? globalThis.String(object.semanticQuery) : undefined, + keywordQuery: isSet(object.keywordQuery) ? globalThis.String(object.keywordQuery) : undefined + }; }, toJSON(message: Query): unknown { const obj: any = {}; - if (message.semanticQuery !== "") { + if (message.semanticQuery !== undefined) { obj.semanticQuery = message.semanticQuery; } + if (message.keywordQuery !== undefined) { + obj.keywordQuery = message.keywordQuery; + } return obj; }, @@ -153,64 +159,8 @@ export const Query: MessageFns = { }, fromPartial, I>>(object: I): Query { const message = createBaseQuery(); - message.semanticQuery = object.semanticQuery ?? ""; - return message; - } -}; - -function createBaseDocumentNameQuery(): DocumentNameQuery { - return { documentName: "" }; -} - -export const DocumentNameQuery: MessageFns = { - encode(message: DocumentNameQuery, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { - if (message.documentName !== "") { - writer.uint32(10).string(message.documentName); - } - return writer; - }, - - decode(input: BinaryReader | Uint8Array, length?: number): DocumentNameQuery { - const reader = input instanceof BinaryReader ? input : new BinaryReader(input); - let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseDocumentNameQuery(); - while (reader.pos < end) { - const tag = reader.uint32(); - switch (tag >>> 3) { - case 1: - if (tag !== 10) { - break; - } - - message.documentName = reader.string(); - continue; - } - if ((tag & 7) === 4 || tag === 0) { - break; - } - reader.skip(tag & 7); - } - return message; - }, - - fromJSON(object: any): DocumentNameQuery { - return { documentName: isSet(object.documentName) ? globalThis.String(object.documentName) : "" }; - }, - - toJSON(message: DocumentNameQuery): unknown { - const obj: any = {}; - if (message.documentName !== "") { - obj.documentName = message.documentName; - } - return obj; - }, - - create, I>>(base?: I): DocumentNameQuery { - return DocumentNameQuery.fromPartial(base ?? ({} as any)); - }, - fromPartial, I>>(object: I): DocumentNameQuery { - const message = createBaseDocumentNameQuery(); - message.documentName = object.documentName ?? ""; + message.semanticQuery = object.semanticQuery ?? undefined; + message.keywordQuery = object.keywordQuery ?? undefined; return message; } }; @@ -237,20 +187,22 @@ export const TimeSpan: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.after = fromTimestamp(Timestamp.decode(reader, reader.uint32())); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.before = fromTimestamp(Timestamp.decode(reader, reader.uint32())); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -320,41 +272,46 @@ export const Filters: MessageFns = { while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 10) { break; } message.scope.push(reader.string()); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.created = TimeSpan.decode(reader, reader.uint32()); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.modified = TimeSpan.decode(reader, reader.uint32()); continue; - case 4: + } + case 4: { if (tag !== 34) { break; } message.userEmails.push(reader.string()); continue; - case 5: + } + case 5: { if (tag !== 40) { break; } message.includeContentInTrash = reader.bool(); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -414,12 +371,12 @@ export const Filters: MessageFns = { } }; -function createBaseQueryRequest(): QueryRequest { +function createBaseSearchChunksRequest(): SearchChunksRequest { return { count: undefined, query: undefined, filters: undefined }; } -export const QueryRequest: MessageFns = { - encode(message: QueryRequest, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { +export const SearchChunksRequest: MessageFns = { + encode(message: SearchChunksRequest, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { if (message.count !== undefined) { writer.uint32(8).uint32(message.count); } @@ -432,34 +389,37 @@ export const QueryRequest: MessageFns = { return writer; }, - decode(input: BinaryReader | Uint8Array, length?: number): QueryRequest { + decode(input: BinaryReader | Uint8Array, length?: number): SearchChunksRequest { const reader = input instanceof BinaryReader ? input : new BinaryReader(input); let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseQueryRequest(); + const message = createBaseSearchChunksRequest(); while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 8) { break; } message.count = reader.uint32(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.query = Query.decode(reader, reader.uint32()); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.filters = Filters.decode(reader, reader.uint32()); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -469,7 +429,7 @@ export const QueryRequest: MessageFns = { return message; }, - fromJSON(object: any): QueryRequest { + fromJSON(object: any): SearchChunksRequest { return { count: isSet(object.count) ? globalThis.Number(object.count) : undefined, query: isSet(object.query) ? Query.fromJSON(object.query) : undefined, @@ -477,7 +437,7 @@ export const QueryRequest: MessageFns = { }; }, - toJSON(message: QueryRequest): unknown { + toJSON(message: SearchChunksRequest): unknown { const obj: any = {}; if (message.count !== undefined) { obj.count = Math.round(message.count); @@ -491,11 +451,11 @@ export const QueryRequest: MessageFns = { return obj; }, - create, I>>(base?: I): QueryRequest { - return QueryRequest.fromPartial(base ?? ({} as any)); + create, I>>(base?: I): SearchChunksRequest { + return SearchChunksRequest.fromPartial(base ?? ({} as any)); }, - fromPartial, I>>(object: I): QueryRequest { - const message = createBaseQueryRequest(); + fromPartial, I>>(object: I): SearchChunksRequest { + const message = createBaseSearchChunksRequest(); message.count = object.count ?? undefined; message.query = object.query !== undefined && object.query !== null ? Query.fromPartial(object.query) : undefined; message.filters = @@ -504,52 +464,44 @@ export const QueryRequest: MessageFns = { } }; -function createBaseQueryResponse(): QueryResponse { - return { success: false, error: undefined, relevantChunks: [] }; +function createBaseGetDocumentRequest(): GetDocumentRequest { + return { ref: "", filters: undefined }; } -export const QueryResponse: MessageFns = { - encode(message: QueryResponse, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { - if (message.success !== false) { - writer.uint32(8).bool(message.success); +export const GetDocumentRequest: MessageFns = { + encode(message: GetDocumentRequest, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { + if (message.ref !== "") { + writer.uint32(10).string(message.ref); } - if (message.error !== undefined) { - Struct.encode(Struct.wrap(message.error), writer.uint32(18).fork()).join(); - } - for (const v of message.relevantChunks) { - RelevantChunk.encode(v!, writer.uint32(26).fork()).join(); + if (message.filters !== undefined) { + Filters.encode(message.filters, writer.uint32(18).fork()).join(); } return writer; }, - decode(input: BinaryReader | Uint8Array, length?: number): QueryResponse { + decode(input: BinaryReader | Uint8Array, length?: number): GetDocumentRequest { const reader = input instanceof BinaryReader ? input : new BinaryReader(input); let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseQueryResponse(); + const message = createBaseGetDocumentRequest(); while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: - if (tag !== 8) { + case 1: { + if (tag !== 10) { break; } - message.success = reader.bool(); + message.ref = reader.string(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } - message.error = Struct.unwrap(Struct.decode(reader, reader.uint32())); - continue; - case 3: - if (tag !== 26) { - break; - } - - message.relevantChunks.push(RelevantChunk.decode(reader, reader.uint32())); + message.filters = Filters.decode(reader, reader.uint32()); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -559,145 +511,85 @@ export const QueryResponse: MessageFns = { return message; }, - fromJSON(object: any): QueryResponse { + fromJSON(object: any): GetDocumentRequest { return { - success: isSet(object.success) ? globalThis.Boolean(object.success) : false, - error: isObject(object.error) ? object.error : undefined, - relevantChunks: globalThis.Array.isArray(object?.relevantChunks) - ? object.relevantChunks.map((e: any) => RelevantChunk.fromJSON(e)) - : [] + ref: isSet(object.ref) ? globalThis.String(object.ref) : "", + filters: isSet(object.filters) ? Filters.fromJSON(object.filters) : undefined }; }, - toJSON(message: QueryResponse): unknown { + toJSON(message: GetDocumentRequest): unknown { const obj: any = {}; - if (message.success !== false) { - obj.success = message.success; - } - if (message.error !== undefined) { - obj.error = message.error; - } - if (message.relevantChunks?.length) { - obj.relevantChunks = message.relevantChunks.map((e) => RelevantChunk.toJSON(e)); - } - return obj; - }, - - create, I>>(base?: I): QueryResponse { - return QueryResponse.fromPartial(base ?? ({} as any)); - }, - fromPartial, I>>(object: I): QueryResponse { - const message = createBaseQueryResponse(); - message.success = object.success ?? false; - message.error = object.error ?? undefined; - message.relevantChunks = object.relevantChunks?.map((e) => RelevantChunk.fromPartial(e)) || []; - return message; - } -}; - -function createBaseGetChunksByUrlRequest(): GetChunksByUrlRequest { - return { url: "" }; -} - -export const GetChunksByUrlRequest: MessageFns = { - encode(message: GetChunksByUrlRequest, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { - if (message.url !== "") { - writer.uint32(10).string(message.url); + if (message.ref !== "") { + obj.ref = message.ref; } - return writer; - }, - - decode(input: BinaryReader | Uint8Array, length?: number): GetChunksByUrlRequest { - const reader = input instanceof BinaryReader ? input : new BinaryReader(input); - let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseGetChunksByUrlRequest(); - while (reader.pos < end) { - const tag = reader.uint32(); - switch (tag >>> 3) { - case 1: - if (tag !== 10) { - break; - } - - message.url = reader.string(); - continue; - } - if ((tag & 7) === 4 || tag === 0) { - break; - } - reader.skip(tag & 7); - } - return message; - }, - - fromJSON(object: any): GetChunksByUrlRequest { - return { url: isSet(object.url) ? globalThis.String(object.url) : "" }; - }, - - toJSON(message: GetChunksByUrlRequest): unknown { - const obj: any = {}; - if (message.url !== "") { - obj.url = message.url; + if (message.filters !== undefined) { + obj.filters = Filters.toJSON(message.filters); } return obj; }, - create, I>>(base?: I): GetChunksByUrlRequest { - return GetChunksByUrlRequest.fromPartial(base ?? ({} as any)); + create, I>>(base?: I): GetDocumentRequest { + return GetDocumentRequest.fromPartial(base ?? ({} as any)); }, - fromPartial, I>>(object: I): GetChunksByUrlRequest { - const message = createBaseGetChunksByUrlRequest(); - message.url = object.url ?? ""; + fromPartial, I>>(object: I): GetDocumentRequest { + const message = createBaseGetDocumentRequest(); + message.ref = object.ref ?? ""; + message.filters = + object.filters !== undefined && object.filters !== null ? Filters.fromPartial(object.filters) : undefined; return message; } }; -function createBaseGetChunksByUrlResponse(): GetChunksByUrlResponse { - return { success: false, error: undefined, chunks: [] }; +function createBaseSearchChunksResponse(): SearchChunksResponse { + return { success: false, error: undefined, relevantChunks: [] }; } -export const GetChunksByUrlResponse: MessageFns = { - encode(message: GetChunksByUrlResponse, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { +export const SearchChunksResponse: MessageFns = { + encode(message: SearchChunksResponse, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { if (message.success !== false) { writer.uint32(8).bool(message.success); } if (message.error !== undefined) { Struct.encode(Struct.wrap(message.error), writer.uint32(18).fork()).join(); } - for (const v of message.chunks) { - Chunk.encode(v!, writer.uint32(26).fork()).join(); + for (const v of message.relevantChunks) { + RelevantChunk.encode(v!, writer.uint32(26).fork()).join(); } return writer; }, - decode(input: BinaryReader | Uint8Array, length?: number): GetChunksByUrlResponse { + decode(input: BinaryReader | Uint8Array, length?: number): SearchChunksResponse { const reader = input instanceof BinaryReader ? input : new BinaryReader(input); let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseGetChunksByUrlResponse(); + const message = createBaseSearchChunksResponse(); while (reader.pos < end) { const tag = reader.uint32(); switch (tag >>> 3) { - case 1: + case 1: { if (tag !== 8) { break; } message.success = reader.bool(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.error = Struct.unwrap(Struct.decode(reader, reader.uint32())); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } - message.chunks.push(Chunk.decode(reader, reader.uint32())); + message.relevantChunks.push(RelevantChunk.decode(reader, reader.uint32())); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -707,15 +599,17 @@ export const GetChunksByUrlResponse: MessageFns = { return message; }, - fromJSON(object: any): GetChunksByUrlResponse { + fromJSON(object: any): SearchChunksResponse { return { success: isSet(object.success) ? globalThis.Boolean(object.success) : false, error: isObject(object.error) ? object.error : undefined, - chunks: globalThis.Array.isArray(object?.chunks) ? object.chunks.map((e: any) => Chunk.fromJSON(e)) : [] + relevantChunks: globalThis.Array.isArray(object?.relevantChunks) + ? object.relevantChunks.map((e: any) => RelevantChunk.fromJSON(e)) + : [] }; }, - toJSON(message: GetChunksByUrlResponse): unknown { + toJSON(message: SearchChunksResponse): unknown { const obj: any = {}; if (message.success !== false) { obj.success = message.success; @@ -723,106 +617,30 @@ export const GetChunksByUrlResponse: MessageFns = { if (message.error !== undefined) { obj.error = message.error; } - if (message.chunks?.length) { - obj.chunks = message.chunks.map((e) => Chunk.toJSON(e)); + if (message.relevantChunks?.length) { + obj.relevantChunks = message.relevantChunks.map((e) => RelevantChunk.toJSON(e)); } return obj; }, - create, I>>(base?: I): GetChunksByUrlResponse { - return GetChunksByUrlResponse.fromPartial(base ?? ({} as any)); + create, I>>(base?: I): SearchChunksResponse { + return SearchChunksResponse.fromPartial(base ?? ({} as any)); }, - fromPartial, I>>(object: I): GetChunksByUrlResponse { - const message = createBaseGetChunksByUrlResponse(); + fromPartial, I>>(object: I): SearchChunksResponse { + const message = createBaseSearchChunksResponse(); message.success = object.success ?? false; message.error = object.error ?? undefined; - message.chunks = object.chunks?.map((e) => Chunk.fromPartial(e)) || []; - return message; - } -}; - -function createBaseQueryByDocumentNameRequest(): QueryByDocumentNameRequest { - return { query: undefined, filters: undefined }; -} - -export const QueryByDocumentNameRequest: MessageFns = { - encode(message: QueryByDocumentNameRequest, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { - if (message.query !== undefined) { - DocumentNameQuery.encode(message.query, writer.uint32(18).fork()).join(); - } - if (message.filters !== undefined) { - Filters.encode(message.filters, writer.uint32(26).fork()).join(); - } - return writer; - }, - - decode(input: BinaryReader | Uint8Array, length?: number): QueryByDocumentNameRequest { - const reader = input instanceof BinaryReader ? input : new BinaryReader(input); - let end = length === undefined ? reader.len : reader.pos + length; - const message = createBaseQueryByDocumentNameRequest(); - while (reader.pos < end) { - const tag = reader.uint32(); - switch (tag >>> 3) { - case 2: - if (tag !== 18) { - break; - } - - message.query = DocumentNameQuery.decode(reader, reader.uint32()); - continue; - case 3: - if (tag !== 26) { - break; - } - - message.filters = Filters.decode(reader, reader.uint32()); - continue; - } - if ((tag & 7) === 4 || tag === 0) { - break; - } - reader.skip(tag & 7); - } - return message; - }, - - fromJSON(object: any): QueryByDocumentNameRequest { - return { - query: isSet(object.query) ? DocumentNameQuery.fromJSON(object.query) : undefined, - filters: isSet(object.filters) ? Filters.fromJSON(object.filters) : undefined - }; - }, - - toJSON(message: QueryByDocumentNameRequest): unknown { - const obj: any = {}; - if (message.query !== undefined) { - obj.query = DocumentNameQuery.toJSON(message.query); - } - if (message.filters !== undefined) { - obj.filters = Filters.toJSON(message.filters); - } - return obj; - }, - - create, I>>(base?: I): QueryByDocumentNameRequest { - return QueryByDocumentNameRequest.fromPartial(base ?? ({} as any)); - }, - fromPartial, I>>(object: I): QueryByDocumentNameRequest { - const message = createBaseQueryByDocumentNameRequest(); - message.query = - object.query !== undefined && object.query !== null ? DocumentNameQuery.fromPartial(object.query) : undefined; - message.filters = - object.filters !== undefined && object.filters !== null ? Filters.fromPartial(object.filters) : undefined; + message.relevantChunks = object.relevantChunks?.map((e) => RelevantChunk.fromPartial(e)) || []; return message; } }; -function createBaseQueryByDocumentNameResponse(): QueryByDocumentNameResponse { +function createBaseGetDocumentResponse(): GetDocumentResponse { return { success: false, error: undefined, chunks: [] }; } -export const QueryByDocumentNameResponse: MessageFns = { - encode(message: QueryByDocumentNameResponse, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { +export const GetDocumentResponse: MessageFns = { + encode(message: GetDocumentResponse, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { if (message.success !== false) { writer.uint32(8).bool(message.success); } @@ -835,34 +653,37 @@ export const QueryByDocumentNameResponse: MessageFns>> 3) { - case 1: + case 1: { if (tag !== 8) { break; } message.success = reader.bool(); continue; - case 2: + } + case 2: { if (tag !== 18) { break; } message.error = Struct.unwrap(Struct.decode(reader, reader.uint32())); continue; - case 3: + } + case 3: { if (tag !== 26) { break; } message.chunks.push(Chunk.decode(reader, reader.uint32())); continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -872,7 +693,7 @@ export const QueryByDocumentNameResponse: MessageFns, I>>(base?: I): QueryByDocumentNameResponse { - return QueryByDocumentNameResponse.fromPartial(base ?? ({} as any)); + create, I>>(base?: I): GetDocumentResponse { + return GetDocumentResponse.fromPartial(base ?? ({} as any)); }, - fromPartial, I>>(object: I): QueryByDocumentNameResponse { - const message = createBaseQueryByDocumentNameResponse(); + fromPartial, I>>(object: I): GetDocumentResponse { + const message = createBaseGetDocumentResponse(); message.success = object.success ?? false; message.error = object.error ?? undefined; message.chunks = object.chunks?.map((e) => Chunk.fromPartial(e)) || []; @@ -909,100 +730,70 @@ export const QueryByDocumentNameResponse: MessageFns Buffer.from(QueryRequest.encode(value).finish()), - requestDeserialize: (value: Buffer) => QueryRequest.decode(value), - responseSerialize: (value: QueryResponse) => Buffer.from(QueryResponse.encode(value).finish()), - responseDeserialize: (value: Buffer) => QueryResponse.decode(value) + requestSerialize: (value: SearchChunksRequest) => Buffer.from(SearchChunksRequest.encode(value).finish()), + requestDeserialize: (value: Buffer) => SearchChunksRequest.decode(value), + responseSerialize: (value: SearchChunksResponse) => Buffer.from(SearchChunksResponse.encode(value).finish()), + responseDeserialize: (value: Buffer) => SearchChunksResponse.decode(value) }, /** Query the index for all chunks of a specific document */ - queryChunksByDocumentName: { - path: "/redactive.grpc.v1.Search/QueryChunksByDocumentName", - requestStream: false, - responseStream: false, - requestSerialize: (value: QueryByDocumentNameRequest) => - Buffer.from(QueryByDocumentNameRequest.encode(value).finish()), - requestDeserialize: (value: Buffer) => QueryByDocumentNameRequest.decode(value), - responseSerialize: (value: QueryByDocumentNameResponse) => - Buffer.from(QueryByDocumentNameResponse.encode(value).finish()), - responseDeserialize: (value: Buffer) => QueryByDocumentNameResponse.decode(value) - }, - /** Get chunks by URL */ - getChunksByUrl: { - path: "/redactive.grpc.v1.Search/GetChunksByUrl", + getDocument: { + path: "/redactive.grpc.v2.Search/GetDocument", requestStream: false, responseStream: false, - requestSerialize: (value: GetChunksByUrlRequest) => Buffer.from(GetChunksByUrlRequest.encode(value).finish()), - requestDeserialize: (value: Buffer) => GetChunksByUrlRequest.decode(value), - responseSerialize: (value: GetChunksByUrlResponse) => Buffer.from(GetChunksByUrlResponse.encode(value).finish()), - responseDeserialize: (value: Buffer) => GetChunksByUrlResponse.decode(value) + requestSerialize: (value: GetDocumentRequest) => Buffer.from(GetDocumentRequest.encode(value).finish()), + requestDeserialize: (value: Buffer) => GetDocumentRequest.decode(value), + responseSerialize: (value: GetDocumentResponse) => Buffer.from(GetDocumentResponse.encode(value).finish()), + responseDeserialize: (value: Buffer) => GetDocumentResponse.decode(value) } } as const; export interface SearchServer extends UntypedServiceImplementation { /** Query the index for relevant chunks */ - queryChunks: handleUnaryCall; + searchChunks: handleUnaryCall; /** Query the index for all chunks of a specific document */ - queryChunksByDocumentName: handleUnaryCall; - /** Get chunks by URL */ - getChunksByUrl: handleUnaryCall; + getDocument: handleUnaryCall; } export interface SearchClient extends Client { /** Query the index for relevant chunks */ - queryChunks( - request: QueryRequest, - callback: (error: ServiceError | null, response: QueryResponse) => void + searchChunks( + request: SearchChunksRequest, + callback: (error: ServiceError | null, response: SearchChunksResponse) => void ): ClientUnaryCall; - queryChunks( - request: QueryRequest, + searchChunks( + request: SearchChunksRequest, metadata: Metadata, - callback: (error: ServiceError | null, response: QueryResponse) => void + callback: (error: ServiceError | null, response: SearchChunksResponse) => void ): ClientUnaryCall; - queryChunks( - request: QueryRequest, + searchChunks( + request: SearchChunksRequest, metadata: Metadata, options: Partial, - callback: (error: ServiceError | null, response: QueryResponse) => void + callback: (error: ServiceError | null, response: SearchChunksResponse) => void ): ClientUnaryCall; /** Query the index for all chunks of a specific document */ - queryChunksByDocumentName( - request: QueryByDocumentNameRequest, - callback: (error: ServiceError | null, response: QueryByDocumentNameResponse) => void - ): ClientUnaryCall; - queryChunksByDocumentName( - request: QueryByDocumentNameRequest, - metadata: Metadata, - callback: (error: ServiceError | null, response: QueryByDocumentNameResponse) => void - ): ClientUnaryCall; - queryChunksByDocumentName( - request: QueryByDocumentNameRequest, - metadata: Metadata, - options: Partial, - callback: (error: ServiceError | null, response: QueryByDocumentNameResponse) => void - ): ClientUnaryCall; - /** Get chunks by URL */ - getChunksByUrl( - request: GetChunksByUrlRequest, - callback: (error: ServiceError | null, response: GetChunksByUrlResponse) => void + getDocument( + request: GetDocumentRequest, + callback: (error: ServiceError | null, response: GetDocumentResponse) => void ): ClientUnaryCall; - getChunksByUrl( - request: GetChunksByUrlRequest, + getDocument( + request: GetDocumentRequest, metadata: Metadata, - callback: (error: ServiceError | null, response: GetChunksByUrlResponse) => void + callback: (error: ServiceError | null, response: GetDocumentResponse) => void ): ClientUnaryCall; - getChunksByUrl( - request: GetChunksByUrlRequest, + getDocument( + request: GetDocumentRequest, metadata: Metadata, options: Partial, - callback: (error: ServiceError | null, response: GetChunksByUrlResponse) => void + callback: (error: ServiceError | null, response: GetDocumentResponse) => void ): ClientUnaryCall; } -export const SearchClient = makeGenericClientConstructor(SearchService, "redactive.grpc.v1.Search") as unknown as { +export const SearchClient = makeGenericClientConstructor(SearchService, "redactive.grpc.v2.Search") as unknown as { new (address: string, credentials: ChannelCredentials, options?: Partial): SearchClient; service: typeof SearchService; serviceName: string; diff --git a/sdks/node/src/index.ts b/sdks/node/src/index.ts index 311828a..8468431 100644 --- a/sdks/node/src/index.ts +++ b/sdks/node/src/index.ts @@ -1,3 +1,4 @@ export { SearchClient } from "./searchClient"; export { AuthClient } from "./authClient"; export { MultiUserClient } from "./multiUserClient"; +export { Filters } from "./grpc/search"; diff --git a/sdks/node/src/multiUserClient.test.ts b/sdks/node/src/multiUserClient.test.ts index bb759f7..1c7067e 100644 --- a/sdks/node/src/multiUserClient.test.ts +++ b/sdks/node/src/multiUserClient.test.ts @@ -137,18 +137,18 @@ describe("MultiUserClient", () => { it("should throw an error if no valid session when querying chunks", async () => { const userId = "user123"; - const semanticQuery = "query"; + const query = "query"; readUserData.mockResolvedValue(undefined); - await expect(multiUserClient.queryChunks({ userId, semanticQuery })).rejects.toThrow( + await expect(multiUserClient.searchChunks({ userId, query })).rejects.toThrow( `No valid Redactive session for user '${userId}'` ); }); it("should query chunks after refreshing idToken", async () => { const userId = "user123"; - const semanticQuery = "query"; + const query = "query"; const idToken = "idToken123"; const refreshToken = "refreshToken123"; const chunks = [{ chunk: "chunk1" }, { chunk: "chunk2" }]; @@ -166,16 +166,16 @@ describe("MultiUserClient", () => { readUserData.mockResolvedValueOnce(expiredUserData).mockResolvedValueOnce(refreshedUserData); multiUserClient._refreshUserData = vi.fn().mockResolvedValue(refreshedUserData); - mockSearchClient.queryChunks.mockResolvedValue(chunks as unknown as RelevantChunk[]); + mockSearchClient.searchChunks.mockResolvedValue(chunks as unknown as RelevantChunk[]); multiUserClient.searchClient = mockSearchClient; - const result = await multiUserClient.queryChunks({ userId, semanticQuery }); + const result = await multiUserClient.searchChunks({ userId, query }); expect(result).toEqual(chunks); - expect(mockSearchClient.queryChunks).toHaveBeenCalledWith({ accessToken: idToken, semanticQuery, count: 10 }); + expect(mockSearchClient.searchChunks).toHaveBeenCalledWith({ accessToken: idToken, query, count: 10 }); }); - it("should query chunks by document name after refreshing idToken", async () => { + it("should query chunks by document ref after refreshing idToken", async () => { const userId = "user123"; const documentName = "test-document"; const idToken = "idToken123"; @@ -195,41 +195,12 @@ describe("MultiUserClient", () => { readUserData.mockResolvedValueOnce(expiredUserData).mockResolvedValueOnce(refreshedUserData); multiUserClient._refreshUserData = vi.fn().mockResolvedValue(refreshedUserData); - mockSearchClient.queryChunksByDocumentName.mockResolvedValue(chunks as unknown as Chunk[]); + mockSearchClient.getDocument.mockResolvedValue(chunks as unknown as Chunk[]); multiUserClient.searchClient = mockSearchClient; - const result = await multiUserClient.queryChunksByDocumentName({ userId, documentName }); + const result = await multiUserClient.getDocument({ userId, ref: documentName }); expect(result).toEqual(chunks); - expect(mockSearchClient.queryChunksByDocumentName).toHaveBeenCalledWith({ accessToken: idToken, documentName }); - }); - - it("should get chunks by url after refreshing idToken", async () => { - const userId = "user123"; - const url = "https://example.com"; - const idToken = "idToken123"; - const refreshToken = "refreshToken123"; - const chunks = [{ chunk: "chunk1" }, { chunk: "chunk2" }]; - - const expiredUserData: UserData = { - idToken, - idTokenExpiry: new Date(Date.now() - 1000), - refreshToken - }; - const refreshedUserData: UserData = { - idToken, - idTokenExpiry: new Date(Date.now() + 3600 * 1000), - refreshToken - }; - - readUserData.mockResolvedValueOnce(expiredUserData).mockResolvedValueOnce(refreshedUserData); - multiUserClient._refreshUserData = vi.fn().mockResolvedValue(refreshedUserData); - mockSearchClient.getChunksByUrl.mockResolvedValue(chunks as unknown as Chunk[]); - - multiUserClient.searchClient = mockSearchClient; - const result = await multiUserClient.getChunksByUrl({ userId, url }); - - expect(result).toEqual(chunks); - expect(mockSearchClient.getChunksByUrl).toHaveBeenCalledWith({ accessToken: idToken, url }); + expect(mockSearchClient.getDocument).toHaveBeenCalledWith({ accessToken: idToken, ref: documentName }); }); }); diff --git a/sdks/node/src/multiUserClient.ts b/sdks/node/src/multiUserClient.ts index ee4baa4..b766d18 100644 --- a/sdks/node/src/multiUserClient.ts +++ b/sdks/node/src/multiUserClient.ts @@ -2,12 +2,7 @@ import { randomUUID } from "node:crypto"; import { AuthClient } from "./authClient"; import { Chunk, RelevantChunk } from "./grpc/chunks"; -import { - GetChunksByUrlSearchParams, - QueryChunksByDocumentNameSearchParams, - QueryChunksSearchParams, - SearchClient -} from "./searchClient"; +import { GetDocumentParams, SearchChunksParams, SearchClient } from "./searchClient"; export interface UserData { signInState?: string; @@ -17,14 +12,11 @@ export interface UserData { connections?: string[]; } -export interface QueryChunksParams extends Omit { - userId: string; -} -export interface QueryChunksByDocumentNameParams extends Omit { +export interface MultiUserSearchChunksParams extends Omit { userId: string; } -export interface GetChunksByUrlParams extends Omit { +export interface MultiUserGetDocumentParams extends Omit { userId: string; } @@ -154,35 +146,12 @@ export class MultiUserClient { /** * Query for relevant chunks based on a semantic query. * @param userId - The ID of the user. - * @param semanticQuery - The query string used to find relevant chunks. + * @param query - The query string used to find relevant chunks. * @param count - The number of relevant chunks to retrieve. Defaults to 10. * @param filters - An object of filters for querying. Optional. * @returns list of relevant chunks. */ - async queryChunks({ userId, semanticQuery, count = 10, filters }: QueryChunksParams): Promise { - let userData = await this.readUserData(userId); - if (!userData || !userData.refreshToken) { - throw new Error(`No valid Redactive session for user '${userId}'`); - } - if (!!userData.idTokenExpiry && new Date(userData.idTokenExpiry) < new Date()) { - userData = await this._refreshUserData(userId, userData.refreshToken, undefined); - } - - return await this.searchClient.queryChunks({ accessToken: userData.idToken!, semanticQuery, count, filters }); - } - - /** - * Query for chunks by document name. - * @param userId - The ID of the user. - * @param documentName - The name of the document to retrieve chunks. - * @param filters - The filters for querying documents. Optional. - * @returns The complete list of chunks for the matching document. - */ - async queryChunksByDocumentName({ - userId, - documentName, - filters - }: QueryChunksByDocumentNameParams): Promise { + async searchChunks({ userId, query, count = 10, filters }: MultiUserSearchChunksParams): Promise { let userData = await this.readUserData(userId); if (!userData || !userData.refreshToken) { throw new Error(`No valid Redactive session for user '${userId}'`); @@ -191,16 +160,17 @@ export class MultiUserClient { userData = await this._refreshUserData(userId, userData.refreshToken, undefined); } - return await this.searchClient.queryChunksByDocumentName({ accessToken: userData.idToken!, documentName, filters }); + return await this.searchClient.searchChunks({ accessToken: userData.idToken!, query, count, filters }); } /** * Get chunks from a document by its URL. * @param accessToken - The user's Redactive access token. - * @param url - The URL to the document for retrieving chunks. + * @param ref - A reference to the document we are retrieving. Can be either a url or document name. + * @param filters - The filters for querying documents. Optional. Only applicable for getting by document name. * @returns The complete list of chunks for the matching document. */ - async getChunksByUrl({ userId, url }: GetChunksByUrlParams): Promise { + async getDocument({ userId, ref, filters }: MultiUserGetDocumentParams): Promise { let userData = await this.readUserData(userId); if (!userData || !userData.refreshToken) { throw new Error(`No valid Redactive session for user '${userId}'`); @@ -209,6 +179,6 @@ export class MultiUserClient { userData = await this._refreshUserData(userId, userData.refreshToken, undefined); } - return await this.searchClient.getChunksByUrl({ accessToken: userData.idToken!, url }); + return await this.searchClient.getDocument({ accessToken: userData.idToken!, ref, filters }); } } diff --git a/sdks/node/src/searchClient.test.ts b/sdks/node/src/searchClient.test.ts index 03a8abe..a8dbe39 100644 --- a/sdks/node/src/searchClient.test.ts +++ b/sdks/node/src/searchClient.test.ts @@ -4,12 +4,10 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { Chunk, ChunkReference, RelevantChunk, RelevantChunk_Relevance, SourceReference } from "./grpc/chunks"; import { Filters, - GetChunksByUrlRequest, - GetChunksByUrlResponse, - QueryByDocumentNameRequest, - QueryByDocumentNameResponse, - QueryRequest, - QueryResponse, + GetDocumentRequest, + GetDocumentResponse, + SearchChunksRequest, + SearchChunksResponse, SearchClient as SearchServiceClient } from "./grpc/search"; import { SearchClient } from "./searchClient"; @@ -21,9 +19,9 @@ describe("Service client", () => { vi.clearAllMocks(); }); - it("should query chunks by document name", async () => { + it("should get chunks by document reference", async () => { const accessToken = "test-accessToken"; - const documentName = "test-documentName"; + const ref = "test-documentName"; const filters: Partial = { scope: ["dataprovider"], created: { before: new Date() }, @@ -55,21 +53,21 @@ describe("Service client", () => { // Mock the _getClient method of SearchClient to return a mock gRPC client vi.spyOn(SearchClient.prototype, "_getClient").mockReturnValue({ - queryChunksByDocumentName: ( - _request: QueryByDocumentNameRequest, + getDocument: ( + _request: GetDocumentRequest, _metadata: Metadata, - callback: (error: ServiceError | null, response: QueryByDocumentNameResponse) => void - ) => callback(null, { success: true, chunks: expectedResponse } as QueryByDocumentNameResponse) + callback: (error: ServiceError | null, response: GetDocumentResponse) => void + ) => callback(null, { success: true, chunks: expectedResponse } as GetDocumentResponse) } as unknown as SearchServiceClient); const client = new SearchClient(); - const response = await client.queryChunksByDocumentName({ accessToken, documentName, filters }); + const response = await client.getDocument({ accessToken, ref, filters }); expect(response).toStrictEqual(expectedResponse); }); - it("should query chunks", async () => { + it("should search chunks", async () => { const accessToken = "test-accessToken"; const query = "test-query"; const count = 1; @@ -107,63 +105,18 @@ describe("Service client", () => { // Mock the _getClient method of SearchClient to return a mock gRPC client vi.spyOn(SearchClient.prototype, "_getClient").mockReturnValue({ - queryChunks: ( - _request: QueryRequest, + searchChunks: ( + _request: SearchChunksRequest, _metadata: Metadata, - callback: (error: ServiceError | null, response: QueryResponse) => void - ) => callback(null, QueryResponse.fromJSON({ relevantChunks: expectedResponse })) + callback: (error: ServiceError | null, response: SearchChunksResponse) => void + ) => callback(null, SearchChunksResponse.fromJSON({ relevantChunks: expectedResponse })) } as unknown as SearchServiceClient); // Create an instance of SearchClient const client = new SearchClient(); // Call the queryChunks method and capture the response - const response = await client.queryChunks({ accessToken, semanticQuery: query, count, filters }); - - // Assert that the response matches the expected response - expect(response).toStrictEqual(expectedResponse); - }); - - it("should get chunks by url", async () => { - const accessToken = "test-accessToken"; - const url = "https://example.com"; - const expectedResponse: Chunk[] = Array.from({ length: 10 }, (_, i) => ({ - source: { - system: `system-${i}`, - systemVersion: `systemVersion-${i}`, - documentId: `documentId-${i}`, - documentVersion: `documentVersion-${i}`, - connectionId: `connectionId-${i}`, - documentName: `documentName-${i}`, - documentPath: `documentPath-${i}` - } as SourceReference, - chunk: { - chunkHash: `chunkHash-${i}`, - chunkId: `chunkId-${i}`, - chunkingVersion: `chunkingVersion-${i}` - } as ChunkReference, - chunkBody: `chunkBody-${i}`, - documentMetadata: { - createdAt: undefined, - link: undefined, - modifiedAt: undefined - } - })); - - // Mock the _getClient method of SearchClient to return a mock gRPC client - vi.spyOn(SearchClient.prototype, "_getClient").mockReturnValue({ - getChunksByUrl: ( - _request: GetChunksByUrlRequest, - _metadata: Metadata, - callback: (error: ServiceError | null, response: GetChunksByUrlResponse) => void - ) => callback(null, GetChunksByUrlResponse.fromJSON({ chunks: expectedResponse })) - } as unknown as SearchServiceClient); - - // Create an instance of SearchClient - const client = new SearchClient(); - - // Call the getChunksByUrl method and capture the response - const response = await client.getChunksByUrl({ accessToken, url }); + const response = await client.searchChunks({ accessToken, query, count, filters }); // Assert that the response matches the expected response expect(response).toStrictEqual(expectedResponse); diff --git a/sdks/node/src/searchClient.ts b/sdks/node/src/searchClient.ts index 3de55f1..759f586 100644 --- a/sdks/node/src/searchClient.ts +++ b/sdks/node/src/searchClient.ts @@ -2,36 +2,28 @@ import { Client, credentials, Metadata } from "@grpc/grpc-js"; import { Chunk, RelevantChunk } from "./grpc/chunks"; import { - DocumentNameQuery, Filters, - GetChunksByUrlRequest, - GetChunksByUrlResponse, + GetDocumentRequest, + GetDocumentResponse, Query, - QueryByDocumentNameRequest, - QueryByDocumentNameResponse, - QueryRequest, - QueryResponse, + SearchChunksRequest, + SearchChunksResponse, SearchClient as SearchServiceClient } from "./grpc/search"; -export interface QueryChunksSearchParams { +export interface SearchChunksParams { accessToken: string; - semanticQuery: string; + query: string; count?: number; filters?: Partial; } -export interface QueryChunksByDocumentNameSearchParams { +export interface GetDocumentParams { accessToken: string; - documentName: string; + ref: string; filters?: Partial; } -export interface GetChunksByUrlSearchParams { - accessToken: string; - url: string; -} - export class SearchClient { host: string = "grpc.redactive.ai"; port: number = 443; @@ -63,32 +55,27 @@ export class SearchClient { /** * Query for relevant chunks based on a semantic query. * @param accessToken - The user's Redactive access token. - * @param semanticQuery - The query string used to find relevant chunks. + * @param query - The query string used to find relevant chunks. * @param count - The number of relevant chunks to retrieve. Defaults to 10. * @param filters - An object of filters for querying. Optional. * @returns list of relevant chunks. */ - async queryChunks({ - accessToken, - semanticQuery, - count = 10, - filters - }: QueryChunksSearchParams): Promise { + async searchChunks({ accessToken, query, count = 10, filters }: SearchChunksParams): Promise { const requestMetadata = new Metadata(); requestMetadata.set("Authorization", `Bearer ${accessToken}`); requestMetadata.set("User-Agent", "redactive-sdk-node"); const client = this._getClient(SearchServiceClient.serviceName) as SearchServiceClient; - const query: Query = { semanticQuery }; + const query_obj: Query = { semanticQuery: query }; const _filters: Filters = { scope: [], userEmails: [], ...filters }; - const queryRequest: QueryRequest = { - query, + const searchRequest: SearchChunksRequest = { + query: query_obj, count, filters: filters ? _filters : undefined }; - const response = await new Promise((resolve, reject) => { - client.queryChunks(queryRequest, requestMetadata, (err, response) => { + const response = await new Promise((resolve, reject) => { + client.searchChunks(searchRequest, requestMetadata, (err, response) => { if (err) { reject(err); return; @@ -101,60 +88,26 @@ export class SearchClient { } /** - * Query for chunks by document name. + * Get chunks for a document via a specific reference * @param accessToken - The user's Redactive access token. - * @param documentName - The name of the document to retrieve chunks. - * @param filters - The filters for querying documents. Optional. + * @param ref - A reference to the document to retrieve. Can be either a url or document name. + * @param filters - The filters for querying documents. Optional. Only applicable for getting by document name. * @returns The complete list of chunks for the matching document. */ - async queryChunksByDocumentName({ - accessToken, - documentName, - filters - }: QueryChunksByDocumentNameSearchParams): Promise { + async getDocument({ accessToken, ref, filters }: GetDocumentParams): Promise { const requestMetadata = new Metadata(); requestMetadata.set("Authorization", `Bearer ${accessToken}`); requestMetadata.set("User-Agent", "redactive-sdk-node"); const client = this._getClient(SearchServiceClient.serviceName) as SearchServiceClient; - const query: DocumentNameQuery = { documentName }; const _filters: Filters = { scope: [], userEmails: [], ...filters }; - const queryRequest: QueryByDocumentNameRequest = { - query, + const queryRequest: GetDocumentRequest = { + ref, filters: filters ? _filters : undefined }; - const response = await new Promise((resolve, reject) => { - client.queryChunksByDocumentName(queryRequest, requestMetadata, (err, response) => { - if (err) { - reject(err); - return; - } - - return resolve(response); - }); - }); - return response.chunks; - } - - /** - * Get chunks from a document by its URL. - * @param accessToken - The user's Redactive access token. - * @param url - The URL to the document for retrieving chunks. - * @returns The complete list of chunks for the matching document. - */ - async getChunksByUrl({ accessToken, url }: GetChunksByUrlSearchParams): Promise { - const requestMetadata = new Metadata(); - requestMetadata.set("Authorization", `Bearer ${accessToken}`); - requestMetadata.set("User-Agent", "redactive-sdk-node"); - - const client = this._getClient(SearchServiceClient.serviceName) as SearchServiceClient; - const queryRequest: GetChunksByUrlRequest = { - url - }; - - const response = await new Promise((resolve, reject) => { - client.getChunksByUrl(queryRequest, requestMetadata, (err, response) => { + const response = await new Promise((resolve, reject) => { + client.getDocument(queryRequest, requestMetadata, (err, response) => { if (err) { reject(err); return; diff --git a/sdks/python/README.md b/sdks/python/README.md index 34989a8..f3752f8 100644 --- a/sdks/python/README.md +++ b/sdks/python/README.md @@ -36,31 +36,60 @@ The library has the following components: AuthClient needs to be configured with your account's API key which is available in the Apps page at [Redactive Dashboard](https://dashboard.redactive.ai/). +The AuthClient can be used to present users with the data providers' OAuth consent +pages: + ```python from redactive.auth_client import AuthClient -client = AuthClient(api_key="API-KEY") +client = AuthClient(api_key="YOUR-APP'S-API-KEY") + +# This value must _exactly_ match the redirect URI you provided when creating your +# Redactive app. +redirect_uri = "YOUR-APP'S-REDIRECT-URI" + +# Possible data sources: confluence, sharepoint +provider = "confluence" -# Establish an connection to data source -# Possible data sources: confluence, google-drive, jira, zendesk, slack, sharepoint -redirect_uri = "https://url-debugger.vercel.app" sign_in_url = await client.begin_connection( - provider="confluence", redirect_uri=redirect_uri + provider=provider, redirect_uri=redirect_uri ) -# Navigate User to sign_in_url -# User will receive an oauth2 auth code after consenting the app's data source access permissions. -# Use this code to exchange Redactive access_token with Redactive API -response = await client.exchange_tokens(code="OAUTH2-TOKEN") +# Now redirect your user to sign_in_url ``` +The user will be redirected back to your app's configured redirect uri after they have completed the steps on +the data provider's OAuth consent page. There will be a signin code present in the `code` parameter of the query string e.g. +`https://your-redirect-page.com?code=abcde12345`. + +This code may be exchanged for a user access token (which the user may use to issue queries against their data): + +```python +# Exchange signin code for a Redactive ID token +response = await client.exchange_tokens(code="SIGNIN-CODE") +access_token = response.idToken +``` + +Once a user has completed the OAuth flow, the data source should show up in their connected data sources: + +```python +response = await client.list_connections( + access_token=access_token +) + +assert "confluence" in response.connections # ✅ +``` + +Use the `list_connections` method to keep your user's connection status up to date, and provide mechanisms to re-connect data sources. + + ### SearchClient -With the Redactive access_token, you can perform three types of searches using the Redactive Search service: +With a Redactive `access_token`, you can perform two types of search + +#### Query-based Search -1. **Semantic Query Search**: Retrieve relevant chunks of information that are semantically related to a user query. -2. **URL-based Search**: Obtain all the chunks from a document by specifying its URL. -3. **Document Name Search**: Query for all the chunks from a document based on the name of the document. +Retrieve relevant chunks of information that are related to a user query. ```python from redactive.search_client import SearchClient @@ -68,32 +97,42 @@ from redactive.search_client import SearchClient client = SearchClient() # Semantic Search: retrieve text extracts (chunks) from various documents pertaining to the user query -client.query_chunks( - access_token="REDACTIVE-USER-ACCESS-TOKEN", - semantic_query="Tell me about AI" -) - -# URL-based Search: retrieve all chunks of the document at that URL -client.get_chunks_by_url( - access_token="REDACTIVE-USER-ACCESS-TOKEN", - url="https://example.com/document" +client.search_chunks( + access_token=access_token, + query="Tell me about AI" ) +``` -# Document Name Search: retrieve all chunks of a document identified by its name -client.query_chunks_by_document_name( - access_token="REDACTIVE-USER-ACCESS-TOKEN", - document_name="Project Plan" -) +**Filters** may be applied to query-based search operations. At present, the following fields may be provided as filter predicates: + +```protobuf +message Filters { + // Scope of the query. This may either be the name of a provider, or a subspace of documents. + // Subspaces take the form of :/// + // e.g. for Confluence: 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding Guide' + // for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding Guide.pdf' + repeated string scope = 1; + // Timespan of response chunk's creation + optional TimeSpan created = 2; + // Timespan of response chunk's last modification + optional TimeSpan modified = 3; + // List of user emails associated with response chunk + repeated string user_emails = 4; + // Include content from documents in trash + optional bool include_content_in_trash = 5; +} ``` -### Filters +The query will only return results which match _ALL_ filter predicates i.e. if multiple fields are populated in the filter object, +the resulting filter is the logical 'AND' of all the fields. If a data source provider does not support a filter-type, then no +results from that provider are returned. -Query methods, i.e. `query_chunks`, `query_chunks_by_document_name`, support a set of optional filters. The filters are applied in a logical 'AND' operation. If a data source provider does not support a filter-type, then no results from that provider are returned. +Filters may be populated and provided to a query in the following way for the Python SDK: ```python from datetime import datetime, timedelta from redactive.search_client import SearchClient -from redactive.grpc.v1 import Filters +from redactive.grpc.v2 import Filters client = SearchClient() @@ -111,13 +150,26 @@ filters = Filters().from_dict({ "userEmails": ["myEmail@example.com"], "includeContentInTrash": True, }) -client.query_chunks( +client.search_chunks( access_token="REDACTIVE-USER-ACCESS-TOKEN", semantic_query="Tell me about AI", filters=filters ) ``` + +#### Document Fetch + +Obtain all the chunks from a specific document by specifying a unique reference (i.e. a URL). + +```python +# URL-based Search: retrieve all chunks of the document at that URL +client.get_document( + access_token="REDACTIVE-USER-ACCESS-TOKEN", + ref="https://example.com/document" +) +``` + ### Multi-User Client The `MultiUserClient` class helps manage multiple users' authentication and access to the Redactive search service. @@ -145,8 +197,8 @@ is_connection_successful = await multi_user_client.handle_connection_callback( ) # User can now use Redactive search service via `MultiUserClient`'s other methods: -semantic_query = "Tell me about the missing research vessel, the Borealis" -chunks = await multi_user_client.query_chunks(user_id=user_id, semantic_query=semantic_query) +query = "Tell me about the missing research vessel, the Borealis" +chunks = await multi_user_client.search_chunks(user_id=user_id, query=query) ``` ## Development diff --git a/sdks/python/src/redactive/grpc/v1/__init__.py b/sdks/python/src/redactive/grpc/v2/__init__.py similarity index 61% rename from sdks/python/src/redactive/grpc/v1/__init__.py rename to sdks/python/src/redactive/grpc/v2/__init__.py index a5a75fb..d3adf77 100644 --- a/sdks/python/src/redactive/grpc/v1/__init__.py +++ b/sdks/python/src/redactive/grpc/v2/__init__.py @@ -37,33 +37,35 @@ class ChunkMetadata(betterproto.Message): @dataclass(eq=False, repr=False) class SourceReference(betterproto.Message): system: str = betterproto.string_field(1) - """Source system of the document e.g. confluence, slack, google-drive""" + """Source system of the document e.g. confluence, sharepoint""" system_version: str = betterproto.string_field(2) """Version of the source system e.g. 1.0.0""" connection_id: str = betterproto.string_field(3) """ - Connection id to the source system e.g. confluence space id, slack channel - id, google-drive drive id + Connection id to the source system e.g. confluence space id, sharepoint + drive id """ document_id: str = betterproto.string_field(4) """ - Document id in the source system e.g. confluence page id, slack message id, - google-drive document id + Document id in the source system e.g. confluence page id, sharepoint file + id """ document_version: str = betterproto.string_field(5) """ - Document version in the source system e.g. confluence page version, slack - message version, google-drive document version + Document version in the source system e.g. confluence page version, + sharepoint file hash """ document_path: Optional[str] = betterproto.string_field(6, optional=True, group="_document_path") """ - Document path in the source system e.g. "My Drive/document.txt", "slack- - channel-name" + Document path in the source system e.g. + "redactiveai.atlassian.net/Engineering/Onboarding Guide" or + "redactiveai.sharepoint.com/Shared Documents/Engineering/Onboarding + Guide.pdf" """ document_name: Optional[str] = betterproto.string_field(7, optional=True, group="_document_name") @@ -127,14 +129,11 @@ class Chunk(betterproto.Message): @dataclass(eq=False, repr=False) class Query(betterproto.Message): - semantic_query: str = betterproto.string_field(1) - """Semantic query to execute""" + semantic_query: Optional[str] = betterproto.string_field(1, optional=True, group="_semantic_query") + """Search query for semantic content""" - -@dataclass(eq=False, repr=False) -class DocumentNameQuery(betterproto.Message): - document_name: str = betterproto.string_field(1) - """Document name to search for""" + keyword_query: Optional[str] = betterproto.string_field(2, optional=True, group="_keyword_query") + """Specific keywords to search for in source document""" @dataclass(eq=False, repr=False) @@ -147,8 +146,12 @@ class TimeSpan(betterproto.Message): class Filters(betterproto.Message): scope: List[str] = betterproto.string_field(1) """ - Scope e.g. "confluence", "slack://channel-name", "google- - drive://CompanyDrive/document.docx" + Scope of the query. This may either be the name of a fetcher, or a subspace + of documents. Subspaces take the form of :/// e.g. + for Confluence: + 'confluence://redactiveai.atlassian.net/Engineering/Engineering Onboarding + Guide' for Sharepoint: 'sharepoint://redactiveai.sharepoint.com/Shared + Documents/Engineering/Onboarding Guide.pdf' """ created: Optional["TimeSpan"] = betterproto.message_field(2, optional=True, group="_created") @@ -167,7 +170,7 @@ class Filters(betterproto.Message): @dataclass(eq=False, repr=False) -class QueryRequest(betterproto.Message): +class SearchChunksRequest(betterproto.Message): count: Optional[int] = betterproto.uint32_field(1, optional=True, group="_count") """How many results to try to return (maximum number of results)""" @@ -179,50 +182,30 @@ class QueryRequest(betterproto.Message): @dataclass(eq=False, repr=False) -class QueryResponse(betterproto.Message): - success: bool = betterproto.bool_field(1) - """Query was successful""" - - error: Optional["betterproto_lib_google_protobuf.Struct"] = betterproto.message_field( - 2, optional=True, group="_error" - ) - """Error message if query failed""" +class GetDocumentRequest(betterproto.Message): + ref: str = betterproto.string_field(1) + """A reference to the document to retrieve""" - relevant_chunks: List["RelevantChunk"] = betterproto.message_field(3) - """List of relevant chunks""" + filters: Optional["Filters"] = betterproto.message_field(2, optional=True, group="_filters") + """Query filters (only really for GetDocByTitle)""" @dataclass(eq=False, repr=False) -class GetChunksByUrlRequest(betterproto.Message): - url: str = betterproto.string_field(1) - """URL to document""" - - -@dataclass(eq=False, repr=False) -class GetChunksByUrlResponse(betterproto.Message): +class SearchChunksResponse(betterproto.Message): success: bool = betterproto.bool_field(1) - """Fetch was successful""" + """Query was successful""" error: Optional["betterproto_lib_google_protobuf.Struct"] = betterproto.message_field( 2, optional=True, group="_error" ) - """Error message if fetch failed""" - - chunks: List["Chunk"] = betterproto.message_field(3) - """List of chunks""" - - -@dataclass(eq=False, repr=False) -class QueryByDocumentNameRequest(betterproto.Message): - query: "DocumentNameQuery" = betterproto.message_field(2) - """The query to execute""" + """Error message if query failed""" - filters: Optional["Filters"] = betterproto.message_field(3, optional=True, group="_filters") - """Filters to apply to query""" + relevant_chunks: List["RelevantChunk"] = betterproto.message_field(3) + """List of relevant chunks""" @dataclass(eq=False, repr=False) -class QueryByDocumentNameResponse(betterproto.Message): +class GetDocumentResponse(betterproto.Message): success: bool = betterproto.bool_field(1) """Query was successful""" @@ -236,52 +219,35 @@ class QueryByDocumentNameResponse(betterproto.Message): class SearchStub(betterproto.ServiceStub): - async def query_chunks( + async def search_chunks( self, - query_request: "QueryRequest", + search_chunks_request: "SearchChunksRequest", *, timeout: Optional[float] = None, deadline: Optional["Deadline"] = None, metadata: Optional["MetadataLike"] = None, - ) -> "QueryResponse": + ) -> "SearchChunksResponse": return await self._unary_unary( - "/redactive.grpc.v1.Search/QueryChunks", - query_request, - QueryResponse, + "/redactive.grpc.v2.Search/SearchChunks", + search_chunks_request, + SearchChunksResponse, timeout=timeout, deadline=deadline, metadata=metadata, ) - async def query_chunks_by_document_name( + async def get_document( self, - query_by_document_name_request: "QueryByDocumentNameRequest", + get_document_request: "GetDocumentRequest", *, timeout: Optional[float] = None, deadline: Optional["Deadline"] = None, metadata: Optional["MetadataLike"] = None, - ) -> "QueryByDocumentNameResponse": + ) -> "GetDocumentResponse": return await self._unary_unary( - "/redactive.grpc.v1.Search/QueryChunksByDocumentName", - query_by_document_name_request, - QueryByDocumentNameResponse, - timeout=timeout, - deadline=deadline, - metadata=metadata, - ) - - async def get_chunks_by_url( - self, - get_chunks_by_url_request: "GetChunksByUrlRequest", - *, - timeout: Optional[float] = None, - deadline: Optional["Deadline"] = None, - metadata: Optional["MetadataLike"] = None, - ) -> "GetChunksByUrlResponse": - return await self._unary_unary( - "/redactive.grpc.v1.Search/GetChunksByUrl", - get_chunks_by_url_request, - GetChunksByUrlResponse, + "/redactive.grpc.v2.Search/GetDocument", + get_document_request, + GetDocumentResponse, timeout=timeout, deadline=deadline, metadata=metadata, @@ -289,56 +255,38 @@ async def get_chunks_by_url( class SearchBase(ServiceBase): - async def query_chunks(self, query_request: "QueryRequest") -> "QueryResponse": - raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED) - - async def query_chunks_by_document_name( - self, query_by_document_name_request: "QueryByDocumentNameRequest" - ) -> "QueryByDocumentNameResponse": + async def search_chunks(self, search_chunks_request: "SearchChunksRequest") -> "SearchChunksResponse": raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED) - async def get_chunks_by_url(self, get_chunks_by_url_request: "GetChunksByUrlRequest") -> "GetChunksByUrlResponse": + async def get_document(self, get_document_request: "GetDocumentRequest") -> "GetDocumentResponse": raise grpclib.GRPCError(grpclib.const.Status.UNIMPLEMENTED) - async def __rpc_query_chunks(self, stream: "grpclib.server.Stream[QueryRequest, QueryResponse]") -> None: - request = await stream.recv_message() - response = await self.query_chunks(request) - await stream.send_message(response) - - async def __rpc_query_chunks_by_document_name( - self, - stream: "grpclib.server.Stream[QueryByDocumentNameRequest, QueryByDocumentNameResponse]", + async def __rpc_search_chunks( + self, stream: "grpclib.server.Stream[SearchChunksRequest, SearchChunksResponse]" ) -> None: request = await stream.recv_message() - response = await self.query_chunks_by_document_name(request) + response = await self.search_chunks(request) await stream.send_message(response) - async def __rpc_get_chunks_by_url( - self, - stream: "grpclib.server.Stream[GetChunksByUrlRequest, GetChunksByUrlResponse]", + async def __rpc_get_document( + self, stream: "grpclib.server.Stream[GetDocumentRequest, GetDocumentResponse]" ) -> None: request = await stream.recv_message() - response = await self.get_chunks_by_url(request) + response = await self.get_document(request) await stream.send_message(response) def __mapping__(self) -> Dict[str, grpclib.const.Handler]: return { - "/redactive.grpc.v1.Search/QueryChunks": grpclib.const.Handler( - self.__rpc_query_chunks, - grpclib.const.Cardinality.UNARY_UNARY, - QueryRequest, - QueryResponse, - ), - "/redactive.grpc.v1.Search/QueryChunksByDocumentName": grpclib.const.Handler( - self.__rpc_query_chunks_by_document_name, + "/redactive.grpc.v2.Search/SearchChunks": grpclib.const.Handler( + self.__rpc_search_chunks, grpclib.const.Cardinality.UNARY_UNARY, - QueryByDocumentNameRequest, - QueryByDocumentNameResponse, + SearchChunksRequest, + SearchChunksResponse, ), - "/redactive.grpc.v1.Search/GetChunksByUrl": grpclib.const.Handler( - self.__rpc_get_chunks_by_url, + "/redactive.grpc.v2.Search/GetDocument": grpclib.const.Handler( + self.__rpc_get_document, grpclib.const.Cardinality.UNARY_UNARY, - GetChunksByUrlRequest, - GetChunksByUrlResponse, + GetDocumentRequest, + GetDocumentResponse, ), } diff --git a/sdks/python/src/redactive/multi_user_client.py b/sdks/python/src/redactive/multi_user_client.py index f50d8bd..09ab679 100644 --- a/sdks/python/src/redactive/multi_user_client.py +++ b/sdks/python/src/redactive/multi_user_client.py @@ -7,7 +7,7 @@ import jwt from redactive.auth_client import AuthClient -from redactive.grpc.v1 import Chunk, Filters, RelevantChunk +from redactive.grpc.v2 import Chunk, Filters, RelevantChunk from redactive.search_client import SearchClient @@ -149,16 +149,16 @@ async def _get_id_token(self, user_id: str) -> str: raise InvalidRedactiveSessionError(user_id) return user_data.id_token - async def query_chunks( - self, user_id: str, semantic_query: str, count: int = 10, filters: Filters | dict[str, Any] | None = None + async def search_chunks( + self, user_id: str, query: str, count: int = 10, filters: Filters | dict[str, Any] | None = None ) -> list[RelevantChunk]: """ Query for relevant chunks based on a semantic query. :param user_id: The ID of the user. :type user_id: str - :param semantic_query: The query string used to find relevant chunks. - :type semantic_query: str + :param query: The query string used to find relevant chunks. + :type query: str :param count: The number of relevant chunks to retrieve. Defaults to 10. :type count: int, optional :param filters: The filters for relevant chunks. See `Filters` type. @@ -167,27 +167,9 @@ async def query_chunks( :rtype: list[RelevantChunk] """ id_token = await self._get_id_token(user_id) - return await self.search_client.query_chunks(id_token, semantic_query, count, filters=filters) + return await self.search_client.search_chunks(id_token, query, count, filters=filters) - async def query_chunks_by_document_name( - self, user_id: str, document_name: str, filters: Filters | dict[str, Any] | None = None - ) -> list[Chunk]: - """ - Query for chunks by document name. - - :param user_id: The ID of the user. - :type user_id: str - :param document_name: The name of the document to retrieve chunks. - :type document_name: str - :param filters: The filters for querying documents. See `Filters` type. - :type filters: Filters | dict[str, Any], optional - :return: The complete list of chunks for the matching document. - :rtype: list[Chunk] - """ - id_token = await self._get_id_token(user_id) - return await self.search_client.query_chunks_by_document_name(id_token, document_name, filters) - - async def get_chunks_by_url(self, user_id: str, url: str) -> list[Chunk]: + async def get_document(self, user_id: str, ref: str) -> list[Chunk]: """ Get chunks from a document by its URL. @@ -199,4 +181,4 @@ async def get_chunks_by_url(self, user_id: str, url: str) -> list[Chunk]: :rtype: list[Chunk] """ id_token = await self._get_id_token(user_id) - return await self.search_client.get_chunks_by_url(id_token, url) + return await self.search_client.get_document(id_token, ref) diff --git a/sdks/python/src/redactive/reranking/reranker.py b/sdks/python/src/redactive/reranking/reranker.py index 7bc7aa5..c277b6e 100644 --- a/sdks/python/src/redactive/reranking/reranker.py +++ b/sdks/python/src/redactive/reranking/reranker.py @@ -4,7 +4,7 @@ from rerankers import Reranker from redactive import search_client -from redactive.grpc.v1 import Filters, RelevantChunk +from redactive.grpc.v2 import Filters, RelevantChunk @dataclass @@ -32,9 +32,8 @@ def __init__(self, host: str = "grpc.redactive.ai", port: int = 443) -> None: async def query_chunks( self, access_token: str, - semantic_query: str, + query: str, count: int = 3, - query_filter: dict[str, Any] | None = None, filters: Filters | dict[str, Any] | None = None, ) -> list[RelevantChunk]: # Get many more results than the user is asking for, then @@ -43,11 +42,9 @@ async def query_chunks( if big_fetch_count > self.conf.max_fetch_results: big_fetch_count = self.conf.max_fetch_results - fetched_chunks = await super().query_chunks( - access_token, semantic_query, big_fetch_count, query_filter, filters - ) + fetched_chunks = await super().search_chunks(access_token, query, big_fetch_count, filters) ranker = Reranker(self.conf.reranking_algorithm) - return self.rerank(semantic_query, fetched_chunks, ranker, count) + return self.rerank(query, fetched_chunks, ranker, count) def rerank(self, query_string: str, fetched_chunks: list[RelevantChunk], ranker, top_k): """ diff --git a/sdks/python/src/redactive/search_client.py b/sdks/python/src/redactive/search_client.py index 9a02916..cdd4663 100644 --- a/sdks/python/src/redactive/search_client.py +++ b/sdks/python/src/redactive/search_client.py @@ -1,19 +1,15 @@ -import warnings from typing import Any -from urllib.parse import urlparse from grpclib.client import Channel from redactive._connection_mode import get_default_grpc_host_and_port as _get_default_grpc_host_and_port -from redactive.grpc.v1 import ( +from redactive.grpc.v2 import ( Chunk, - DocumentNameQuery, Filters, - GetChunksByUrlRequest, + GetDocumentRequest, Query, - QueryByDocumentNameRequest, - QueryRequest, RelevantChunk, + SearchChunksRequest, SearchStub, ) @@ -40,12 +36,11 @@ def __init__(self, host: str | None = None, port: int | None = None) -> None: self.host = host self.port = port - async def query_chunks( + async def search_chunks( self, access_token: str, - semantic_query: str, + query: str, count: int = 10, - query_filter: dict[str, Any] | None = None, filters: Filters | dict[str, Any] | None = None, ) -> list[RelevantChunk]: """ @@ -53,20 +48,15 @@ async def query_chunks( :param access_token: The user's Redactive access token. :type access_token: str - :param semantic_query: The query string used to find relevant chunks. - :type semantic_query: str + :param query: The query string used to find relevant chunks. + :type query: str :param count: The number of relevant chunks to retrieve. Defaults to 10. :type count: int, optional - :param query_filter: deprecated, use `filters`. - :type query_filter: dict[str, Any], optional :param filters: The filters for relevant chunks. See `Filters` type. :type filters: Filters | dict[str, Any], optional :return: A list of relevant chunks that match the query :rtype: list[RelevantChunk] """ - if query_filter is not None: - warnings.warn("`query_filter` has been renamed `filters``", DeprecationWarning, stacklevel=2) - async with Channel(self.host, self.port, ssl=True) as channel: stub = SearchStub(channel, metadata=({"authorization": f"Bearer {access_token}"})) @@ -75,67 +65,29 @@ async def query_chunks( _filters = filters elif isinstance(filters, dict): _filters = Filters(**filters) - elif query_filter is not None: - _filters = Filters(**query_filter) - request = QueryRequest(count=count, query=Query(semantic_query=semantic_query), filters=_filters) - response = await stub.query_chunks(request) + request = SearchChunksRequest(count=count, query=Query(semantic_query=query), filters=_filters) + response = await stub.search_chunks(request) return response.relevant_chunks - async def query_chunks_by_document_name( + async def get_document( self, access_token: str, - document_name: str, - filters: Filters | dict[str, Any] | None = None, + ref: str, ) -> list[Chunk]: """ Query for chunks by document name. :param access_token: The user's Redactive access token. :type access_token: str - :param document_name: The name of the document to retrieve chunks. - :type document_name: str - :param filters: The filters for querying documents. See `Filters` type. - :type filters: Filters | dict[str, Any], optional + :param ref: A reference to the document we are retrieving. + :type ref: str :return: The complete list of chunks for the matching document. :rtype: list[Chunk] """ async with Channel(self.host, self.port, ssl=True) as channel: stub = SearchStub(channel, metadata=({"authorization": f"Bearer {access_token}"})) - _filters: Filters | None = None - if isinstance(filters, Filters): - _filters = filters - elif isinstance(filters, dict): - _filters = Filters(**filters) - - request = QueryByDocumentNameRequest(query=DocumentNameQuery(document_name=document_name), filters=_filters) - response = await stub.query_chunks_by_document_name(request) - return response.chunks - - async def get_chunks_by_url( - self, - access_token: str, - url: str, - ) -> list[Chunk]: - """ - Get chunks from a document by its URL. - - :param access_token: The user access token - :type access_token: str - :param url: The URL to the document for retrieving chunks. - :type url: str - :return: The complete list of chunks for the document. - :rtype: list[Chunk] - """ - async with Channel(self.host, self.port, ssl=True) as channel: - stub = SearchStub(channel, metadata=({"authorization": f"Bearer {access_token}"})) - - parsed_url = urlparse(url) - if not all([parsed_url.scheme, parsed_url.netloc]): - msg = "Url is not valid" - raise ValueError(msg) - - request = GetChunksByUrlRequest(url=url) - response = await stub.get_chunks_by_url(request) + request = GetDocumentRequest(ref=ref) + response = await stub.get_document(request) return response.chunks diff --git a/sdks/python/tests/unit_tests/multi_user_client_tests.py b/sdks/python/tests/unit_tests/multi_user_client_tests.py index 99ead79..65164c1 100644 --- a/sdks/python/tests/unit_tests/multi_user_client_tests.py +++ b/sdks/python/tests/unit_tests/multi_user_client_tests.py @@ -5,7 +5,7 @@ import pytest from redactive.auth_client import AuthClient -from redactive.grpc.v1 import Chunk, RelevantChunk +from redactive.grpc.v2 import RelevantChunk from redactive.multi_user_client import MultiUserClient, UserData from redactive.search_client import SearchClient @@ -44,8 +44,8 @@ def test_multi_user_client_initialization() -> None: callback_uri = "http://callback.uri" read_user_data = mock.Mock() write_user_data = mock.Mock() - auth_base_url = ("http://auth.base.url",) - grpc_host = ("grpc.host",) + auth_base_url = "http://auth.base.url" + grpc_host = "grpc.host" grpc_port = 443 multi_user_client = MultiUserClient( @@ -85,60 +85,37 @@ def test_multi_user_client_initialization_with_no_options() -> None: @pytest.mark.asyncio -async def test_query_chunks(multi_user_client: MultiUserClient, mock_search_client: mock.AsyncMock) -> None: +async def test_search_chunks(multi_user_client: MultiUserClient, mock_search_client: mock.AsyncMock) -> None: user_id = "user123" - semantic_query = "example query" + query = "example query" count = 5 filters = {"key": "value"} relevant_chunks = [mock.Mock(spec=RelevantChunk) for _ in range(count)] multi_user_client.search_client = mock_search_client - multi_user_client.search_client.query_chunks.return_value = relevant_chunks + multi_user_client.search_client.search_chunks.return_value = relevant_chunks multi_user_client.read_user_data.side_effect = mock_read_user_data - result = await multi_user_client.query_chunks(user_id, semantic_query, count, filters=filters) + result = await multi_user_client.search_chunks(user_id, query, count, filters=filters) assert result == relevant_chunks - multi_user_client.search_client.query_chunks.assert_called_with( - "idToken123", semantic_query, count, filters=filters - ) - - -@pytest.mark.asyncio -async def test_query_chunks_by_document_name( - multi_user_client: MultiUserClient, mock_search_client: mock.AsyncMock -) -> None: - user_id = "user123" - document_name = "example_document" - filters = {"key": "value"} - chunks = [mock.Mock(spec=Chunk) for _ in range(3)] - - multi_user_client.search_client = mock_search_client - multi_user_client.search_client.query_chunks_by_document_name.return_value = chunks - multi_user_client.read_user_data.side_effect = mock_read_user_data - - result = await multi_user_client.query_chunks_by_document_name(user_id, document_name, filters) - - assert result == chunks - multi_user_client.search_client.query_chunks_by_document_name.assert_called_with( - "idToken123", document_name, filters - ) + multi_user_client.search_client.search_chunks.assert_called_with("idToken123", query, count, filters=filters) @pytest.mark.asyncio -async def test_get_chunks_by_url(multi_user_client: MultiUserClient, mock_search_client: mock.AsyncMock) -> None: +async def test_get_document_by_url(multi_user_client: MultiUserClient, mock_search_client: mock.AsyncMock) -> None: user_id = "user123" url = "http://example.com" chunks = [mock.Mock() for _ in range(3)] multi_user_client.search_client = mock_search_client - multi_user_client.search_client.get_chunks_by_url.return_value = chunks + multi_user_client.search_client.get_document.return_value = chunks multi_user_client.read_user_data.side_effect = mock_read_user_data - result = await multi_user_client.get_chunks_by_url(user_id, url) + result = await multi_user_client.get_document(user_id, url) assert result == chunks - multi_user_client.search_client.get_chunks_by_url.assert_called_with("idToken123", url) + multi_user_client.search_client.get_document.assert_called_with("idToken123", url) async def test_get_begin_connection_url(multi_user_client: MultiUserClient, mock_auth_client: mock.AsyncMock) -> None: diff --git a/sdks/python/tests/unit_tests/search_client_tests.py b/sdks/python/tests/unit_tests/search_client_tests.py index 35be4c8..483a922 100644 --- a/sdks/python/tests/unit_tests/search_client_tests.py +++ b/sdks/python/tests/unit_tests/search_client_tests.py @@ -2,7 +2,6 @@ import pytest -from redactive.grpc.v1 import DocumentNameQuery, Filters, QueryByDocumentNameRequest from redactive.search_client import SearchClient @@ -16,20 +15,20 @@ def test_init_client(): @mock.patch("grpclib.client.Channel") @pytest.mark.asyncio async def test_query_chunks(mock_channel_context): - from redactive.grpc.v1 import Query, QueryRequest + from redactive.grpc.v2 import Query, SearchChunksRequest access_token = "test-access_token" - semantic_query = "Tell me about somethings" + query = "Tell me about somethings" count = 1 mock_channel_context.return_value.__aenter__.side_effect = mock.AsyncMock() - with mock.patch("redactive.grpc.v1.SearchStub.query_chunks", side_effect=mock.AsyncMock()) as mock_query_chunks: + with mock.patch("redactive.grpc.v2.SearchStub.search_chunks", side_effect=mock.AsyncMock()) as mock_query_chunks: client = SearchClient() - await client.query_chunks(access_token, semantic_query, count) + await client.search_chunks(access_token, query, count) mock_query_chunks.assert_called_once_with( - QueryRequest( + SearchChunksRequest( count=count, - query=Query(semantic_query), + query=Query(query), ) ) @@ -37,52 +36,34 @@ async def test_query_chunks(mock_channel_context): @mock.patch("grpclib.client.Channel") @pytest.mark.asyncio async def test_query_chunks_with_filter(mock_channel_context): - from redactive.grpc.v1 import Query, QueryRequest + from redactive.grpc.v2 import Filters, Query, SearchChunksRequest access_token = "test-access_token" - semantic_query = "Tell me about somethings" + query = "Tell me about somethings" count = 1 filters = {"scope": "mock.scope"} mock_channel_context.return_value.__aenter__.side_effect = mock.AsyncMock() - with mock.patch("redactive.grpc.v1.SearchStub.query_chunks", side_effect=mock.AsyncMock()) as mock_query_chunks: + with mock.patch("redactive.grpc.v2.SearchStub.search_chunks", side_effect=mock.AsyncMock()) as mock_query_chunks: client = SearchClient() - await client.query_chunks(access_token, semantic_query, count, filters) + await client.search_chunks(access_token, query, count, filters) mock_query_chunks.assert_called_once_with( - QueryRequest(count=count, query=Query(semantic_query), filters=Filters(**filters)) - ) - - -@mock.patch("grpclib.client.Channel") -@pytest.mark.asyncio -async def test_query_chunks_by_document_name_with_filter(mock_channel_context): - access_token = "test-access_token" - document_name = "document_name" - filters = {"scope": "mock.scope"} - mock_channel_context.return_value.__aenter__.side_effect = mock.AsyncMock() - - with mock.patch( - "redactive.grpc.v1.SearchStub.query_chunks_by_document_name", side_effect=mock.AsyncMock() - ) as mock_query_chunks_by_document_name: - client = SearchClient() - await client.query_chunks_by_document_name(access_token, document_name, filters) - mock_query_chunks_by_document_name.assert_called_once_with( - QueryByDocumentNameRequest(query=DocumentNameQuery(document_name), filters=Filters(**filters)) + SearchChunksRequest(count=count, query=Query(query), filters=Filters(**filters)) ) @mock.patch("grpclib.client.Channel") @pytest.mark.asyncio async def test_get_chunks_by_url(mock_channel_context): - from redactive.grpc.v1 import GetChunksByUrlRequest + from redactive.grpc.v2 import GetDocumentRequest access_token = "test-access_token" url = "https://example.com" mock_channel_context.return_value.__aenter__.side_effect = mock.AsyncMock() with mock.patch( - "redactive.grpc.v1.SearchStub.get_chunks_by_url", side_effect=mock.AsyncMock() + "redactive.grpc.v2.SearchStub.get_document", side_effect=mock.AsyncMock() ) as mock_get_chunks_by_url: client = SearchClient() - await client.get_chunks_by_url(access_token, url) - mock_get_chunks_by_url.assert_called_once_with(GetChunksByUrlRequest(url=url)) + await client.get_document(access_token, url) + mock_get_chunks_by_url.assert_called_once_with(GetDocumentRequest(ref=url))